tweets.py - gophercgis - Collection of gopher CGI/DCGI for geomyidae
 (HTM) hg clone https://bitbucket.org/iamleot/gophercgis
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
       weets.py
       ---
            1 #!/usr/pkg/bin/python2.7
            2 
            3 from __future__ import print_function, unicode_literals
            4 import textwrap
            5 import re
            6 import urllib
            7 from bs4 import BeautifulSoup
            8 
            9 
           10 TWEETS_BASE = 'https://mobile.twitter.com'
           11 
           12 
           13 def get_tweets(user):
           14     r = urllib.urlopen(TWEETS_BASE + '/' + user)
           15     return BeautifulSoup(r, 'html.parser')
           16 
           17 
           18 def parse_tweets(bs):
           19     ts = []
           20     for t in bs.find_all('table', class_='main-tweet') + bs.find_all('table', class_='tweet'):
           21         fullname = t.find(class_='fullname').text.strip()
           22         username = t.find(class_='username').text.strip()
           23         if t.find(class_='timestamp'):
           24             timestamp = t.find(class_='timestamp').text.strip()
           25         else:
           26             timestamp = ''
           27         if t.find(class_='tweet-reply-context'):
           28             context = t.find(class_='tweet-reply-context').text.strip()
           29             context = context.replace('\n', ' ')
           30             context = ' '.join(context.split()) # Get rid of extra blanks
           31             if t.find('a', text='View conversation'):
           32                 context_url = t.find('a', text='View conversation')['href']
           33                 context_url = context_url.lstrip('/').replace('?p=v', '')
           34             else:
           35                 context_url = None
           36         elif t.find(class_='tweet-social-context'):
           37             context = t.find(class_='tweet-social-context').text.strip()
           38             context = context.replace('\n', ' ')
           39             context = ' '.join(context.split()) # Get rid of extra blanks
           40             context_url = None
           41         else:
           42             context = None
           43             context_url = None
           44         text = parse_tweet_text(t.find(class_='tweet-text'))
           45         url = username.replace('@', '') + '/status/' + \
           46             t.find(class_='tweet-text').get('data-id', '')
           47         ts.append({
           48             'fullname': fullname,
           49             'username': username,
           50             'timestamp': timestamp,
           51             'url': url,
           52             'context': context,
           53             'context_url': context_url,
           54             'text': text,
           55         })
           56 
           57     return ts
           58 
           59 
           60 def parse_tweet_text(tweet_text):
           61     # Expand URLs
           62     for a in tweet_text.find_all('a', class_='twitter_external_link'):
           63         a.replace_with(a['data-url'])
           64 
           65     return tweet_text.text.strip()
           66 
           67 
           68 def header(bs):
           69     fullname = bs.find(class_='fullname').text.strip()
           70     username = bs.find(class_='username').text.replace('\n', '').strip()
           71     url = bs.find('link', rel='canonical')['href']
           72     print('t')
           73     print('[h|{fullname}   {username}|URL:{url}|server|port]'.format(
           74         fullname=fullname.replace('|', '\|'),
           75         username=username.replace('|', '\|'),
           76         url=url.replace('|', '\|'),
           77     ))
           78     print('t')
           79 
           80 
           81 def more(bs, cgi):
           82     if bs.find(class_='w-button-more'):
           83         more = bs.find(class_='w-button-more').a['href'].replace('/', '')
           84         print('[1|<< Older tweets|{cgi}?{more}|server|port]'.format(
           85             cgi=cgi,
           86             more=more,
           87         ))
           88         print('t')
           89 
           90 
           91 def tweet(t, cgi):
           92     print('[1|{fullname}   {username}   {timestamp}|{cgi}?{url}|server|port]'.format(
           93         cgi=cgi,
           94         fullname=t['fullname'].replace('|', '\|'),
           95         username=t['username'].replace('|', '\|'),
           96         timestamp=t['timestamp'].replace('|', '\|'),
           97         url=t['url'].replace('|', '\|'),
           98     ))
           99     if t['context']:
          100         if t['context_url']:
          101             print('[1|{context}|{cgi}?{url}|server|port]'.format(
          102                 cgi=cgi,
          103                 context=t['context'],
          104                 url=t['context_url'],
          105             ))
          106         else:
          107             print('t{context}'.format(context=t['context']))
          108     text = textwrap.fill(t['text'], width=80, break_long_words=False,
          109                          break_on_hyphens=False)
          110 
          111     # XXX: RE and .replace() dance in order to have all URLs and [h|...]
          112     # XXX: entries in a single line and without a leading "t".
          113     text = re.sub(r'((?:http|https)://[^\s]+)',
          114                    '\n [h|\g<1>|URL:\g<1>|server|port]', text)
          115     text = text.replace('\n\n', '\n')
          116     text = re.sub('\n([^ ])', '\nt\g<1>', text)
          117     text = text.replace('\n ', '\n')
          118 
          119     print('t{text}'.format(text=text))
          120     print('t')
          121 
          122 
          123 if __name__ == '__main__':
          124     import os
          125 
          126     user = os.getenv('TWEET_USER')
          127     cgi = os.getenv('CGI')
          128     b = get_tweets(user)
          129     header(b)
          130     tweets = parse_tweets(b)
          131     for t in tweets:
          132         tweet(t, cgi)
          133     more(b, cgi)