#!/usr/bin/python # -*- coding: iso-8859-15 -*- # Copyright 2011 by Samuele Giovanni Tonon # samu at linuxasylum dot net # This program comes with ABSOLUTELY NO WARRANTY; express or # implied . # This is free software, and you are welcome to redistribute it # under certain conditions; as expressed here # http://www.gnu.org/licenses/gpl-2.0.html import urllib import urllib2 import getopt import sys import json url="http://search.twitter.com/search.json" params = { 'rpp': '100', 'lang': 'en', 'woeid': '', 'q':'jtr' } short_options = "vhl:q:w:" long_options = ["verbose", "help", "lang=", "query=", "woeid=" ] removed = u'@#,.;:@"[]{}()…”!?”' verbose = False def do_trend_search(): ttw = [] api_url="https://api.twitter.com/1/trends/%s.json"%params['woeid'] req = urllib2.Request(api_url) res = urllib2.urlopen(req) data = res.read() mydict = json.loads(data) for query in mydict[0]['trends']: params['q'] = query['query'] ttw.extend(do_query_search()) return ttw def do_query_search(): total_words = [] myjson = get_url(url,params) mydict = json.loads(myjson) for msg in mydict['results']: total_words.append(msg['from_user']) words = parse_tweet(msg['text']) total_words.extend(words) return total_words def get_url(url,params): req = urllib2.Request(url, urllib.urlencode(params)) res = urllib2.urlopen(req) data = res.read() return data def usage(): print "%s -l string -q query | %s -woeid num"%(sys.argv[0],sys.argv[0]) def parse_tweet(tweet): tlist = [] wlist = tweet.split(' ') for word in wlist: if word.find('http') == -1: word = word.replace(u"\n",' ') for myc in removed: word = word.replace(myc,'') word = word.replace(u"'",'') word = word.replace(u" ",'') if len(word) > 2: tlist.append(word) return tlist try: opts, args = getopt.getopt(sys.argv[1:], short_options, long_options) except getopt.GetoptError, err: # print help information and exit: print str(err) # will print something like "option -a not recognized" usage() sys.exit(2) for o, a in opts: if o in ("-v", "--verbose"): verbose = True elif o in ("-h", "--help"): usage() sys.exit() elif o in ("-l", "--lang"): params['lang'] = a elif o in ("-q", "--query"): params['q'] = a elif o in ("-w", "--woeid"): params['woeid'] = a else: assert False, "unhandled option" usage() sys.exit() tlist = [] if params['woeid'] != "": tlist = do_trend_search() else: tlist = do_query_search() print len(tlist) tlist = list(set(tlist)) print len(tlist) for val in tlist: print unicode(val)