tweets.py - gophercgis - Collection of gopher CGI/DCGI for geomyidae
(HTM) hg clone https://bitbucket.org/iamleot/gophercgis
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) README
(DIR) LICENSE
---
weets.py
---
1 #!/usr/pkg/bin/python2.7
2
3 from __future__ import print_function, unicode_literals
4 import textwrap
5 import re
6 import urllib
7 from bs4 import BeautifulSoup
8
9
10 TWEETS_BASE = 'https://mobile.twitter.com'
11
12
13 def get_tweets(user):
14 r = urllib.urlopen(TWEETS_BASE + '/' + user)
15 return BeautifulSoup(r, 'html.parser')
16
17
18 def parse_tweets(bs):
19 ts = []
20 for t in bs.find_all('table', class_='main-tweet') + bs.find_all('table', class_='tweet'):
21 fullname = t.find(class_='fullname').text.strip()
22 username = t.find(class_='username').text.strip()
23 if t.find(class_='timestamp'):
24 timestamp = t.find(class_='timestamp').text.strip()
25 else:
26 timestamp = ''
27 if t.find(class_='tweet-reply-context'):
28 context = t.find(class_='tweet-reply-context').text.strip()
29 context = context.replace('\n', ' ')
30 context = ' '.join(context.split()) # Get rid of extra blanks
31 if t.find('a', text='View conversation'):
32 context_url = t.find('a', text='View conversation')['href']
33 context_url = context_url.lstrip('/').replace('?p=v', '')
34 else:
35 context_url = None
36 elif t.find(class_='tweet-social-context'):
37 context = t.find(class_='tweet-social-context').text.strip()
38 context = context.replace('\n', ' ')
39 context = ' '.join(context.split()) # Get rid of extra blanks
40 context_url = None
41 else:
42 context = None
43 context_url = None
44 text = parse_tweet_text(t.find(class_='tweet-text'))
45 url = username.replace('@', '') + '/status/' + \
46 t.find(class_='tweet-text').get('data-id', '')
47 ts.append({
48 'fullname': fullname,
49 'username': username,
50 'timestamp': timestamp,
51 'url': url,
52 'context': context,
53 'context_url': context_url,
54 'text': text,
55 })
56
57 return ts
58
59
60 def parse_tweet_text(tweet_text):
61 # Expand URLs
62 for a in tweet_text.find_all('a', class_='twitter_external_link'):
63 a.replace_with(a['data-url'])
64
65 return tweet_text.text.strip()
66
67
68 def header(bs):
69 fullname = bs.find(class_='fullname').text.strip()
70 username = bs.find(class_='username').text.replace('\n', '').strip()
71 url = bs.find('link', rel='canonical')['href']
72 print('t')
73 print('[h|{fullname} {username}|URL:{url}|server|port]'.format(
74 fullname=fullname.replace('|', '\|'),
75 username=username.replace('|', '\|'),
76 url=url.replace('|', '\|'),
77 ))
78 print('t')
79
80
81 def more(bs, cgi):
82 if bs.find(class_='w-button-more'):
83 more = bs.find(class_='w-button-more').a['href'].replace('/', '')
84 print('[1|<< Older tweets|{cgi}?{more}|server|port]'.format(
85 cgi=cgi,
86 more=more,
87 ))
88 print('t')
89
90
91 def tweet(t, cgi):
92 print('[1|{fullname} {username} {timestamp}|{cgi}?{url}|server|port]'.format(
93 cgi=cgi,
94 fullname=t['fullname'].replace('|', '\|'),
95 username=t['username'].replace('|', '\|'),
96 timestamp=t['timestamp'].replace('|', '\|'),
97 url=t['url'].replace('|', '\|'),
98 ))
99 if t['context']:
100 if t['context_url']:
101 print('[1|{context}|{cgi}?{url}|server|port]'.format(
102 cgi=cgi,
103 context=t['context'],
104 url=t['context_url'],
105 ))
106 else:
107 print('t{context}'.format(context=t['context']))
108 text = textwrap.fill(t['text'], width=80, break_long_words=False,
109 break_on_hyphens=False)
110
111 # XXX: RE and .replace() dance in order to have all URLs and [h|...]
112 # XXX: entries in a single line and without a leading "t".
113 text = re.sub(r'((?:http|https)://[^\s]+)',
114 '\n [h|\g<1>|URL:\g<1>|server|port]', text)
115 text = text.replace('\n\n', '\n')
116 text = re.sub('\n([^ ])', '\nt\g<1>', text)
117 text = text.replace('\n ', '\n')
118
119 print('t{text}'.format(text=text))
120 print('t')
121
122
123 if __name__ == '__main__':
124 import os
125
126 user = os.getenv('TWEET_USER')
127 cgi = os.getenv('CGI')
128 b = get_tweets(user)
129 header(b)
130 tweets = parse_tweets(b)
131 for t in tweets:
132 tweet(t, cgi)
133 more(b, cgi)