Rename twitter CGI to pytwitter - gophercgis - Collection of gopher CGI/DCGI for geomyidae
(HTM) hg clone https://bitbucket.org/iamleot/gophercgis
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) README
(DIR) LICENSE
---
(DIR) changeset 3014f57fd6c6d1c29e9f67f07ded87edd7f44e46
(DIR) parent f160deab50628cdc0c854143e78ae5c89e452d3d
(HTM) Author: Leonardo Taccari <iamleot@gmail.com>
Date: Sun, 16 Dec 2018 02:25:04
Rename twitter CGI to pytwitter
(A reimplementation of it using tscrape will be hopefully imported soon)
Diffstat:
pytwitter/README | 35 +++++++++++++
pytwitter/tweets.dcgi | 5 +
pytwitter/tweets.py | 133 ++++++++++++++++++++++++++++++++++++++++++++++++++
twitter/README | 35 -------------
twitter/tweets.dcgi | 5 -
twitter/tweets.py | 133 --------------------------------------------------
6 files changed, 173 insertions(+), 173 deletions(-)
---
diff -r f160deab5062 -r 3014f57fd6c6 pytwitter/README
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pytwitter/README Sun Dec 16 02:25:04 2018 +0100
@@ -0,0 +1,35 @@
+twitter DCGI is a Python 2.7 script that uses BeautifulSoup module to scrape
+mobile.twitter.com.
+
+Given a user it display tweets, adding HTTP links if the tweet contains
+anchors.
+
+To use it just add `[1|...]' entries in a .gph file, e.g. for @netbsd:
+
+ t@netbsd
+ [1|netbsd|/cgi/twitter/tweets.dcgi?netbsd|server|port]
+
+Here a corresponding "screenshot" of sacc displaying it:
+
+ | |
+ | HTML+ NetBSD Foundation @netbsd
+ | |
+ | Dir + Jared McNeill @jmcwhatever Aug 2
+ | | NetBSD Foundation retweeted
+ | | Good morning, ROCKPro64 #NetBSD
+ | HTML+ https://twitter.com/jmcwhatever/status/1024948550344630272/photo/1
+ | |
+ | Dir + ryo @rsh Jul 31
+ | | NetBSD Foundation retweeted
+ | | tweet from firefox on NetBSD/pinebook!
+ | |
+ | Dir + Youri Mouton @YouriMouton Jul 31
+ | | NetBSD Foundation retweeted
+ | | Current status: trying to build Cinnamon on #arm64 #NetBSD on the Pinebook
+ | |
+ | Dir + ryo @rsh Jul 31
+ | | NetBSD Foundation retweeted
+ | | netbsd/evbarm64 ATF result: 5526 passed, 256 skipped, 94 expected failed, 172
+ | | failed. じわじわ減っている。
+ | |
+ | [...]
diff -r f160deab5062 -r 3014f57fd6c6 pytwitter/tweets.dcgi
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pytwitter/tweets.dcgi Sun Dec 16 02:25:04 2018 +0100
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+. ../common/config.sh
+
+CGI="${CGI_BASEDIR}/twitter/tweets.dcgi" TWEET_USER="$2" PYTHONIOENCODING="utf-8" python2.7 tweets.py
diff -r f160deab5062 -r 3014f57fd6c6 pytwitter/tweets.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pytwitter/tweets.py Sun Dec 16 02:25:04 2018 +0100
@@ -0,0 +1,133 @@
+#!/usr/pkg/bin/python2.7
+
+from __future__ import print_function, unicode_literals
+import textwrap
+import re
+import urllib
+from bs4 import BeautifulSoup
+
+
+TWEETS_BASE = 'https://mobile.twitter.com'
+
+
+def get_tweets(user):
+ r = urllib.urlopen(TWEETS_BASE + '/' + user)
+ return BeautifulSoup(r, 'html.parser')
+
+
+def parse_tweets(bs):
+ ts = []
+ for t in bs.find_all('table', class_='main-tweet') + bs.find_all('table', class_='tweet'):
+ fullname = t.find(class_='fullname').text.strip()
+ username = t.find(class_='username').text.strip()
+ if t.find(class_='timestamp'):
+ timestamp = t.find(class_='timestamp').text.strip()
+ else:
+ timestamp = ''
+ if t.find(class_='tweet-reply-context'):
+ context = t.find(class_='tweet-reply-context').text.strip()
+ context = context.replace('\n', ' ')
+ context = ' '.join(context.split()) # Get rid of extra blanks
+ if t.find('a', text='View conversation'):
+ context_url = t.find('a', text='View conversation')['href']
+ context_url = context_url.lstrip('/').replace('?p=v', '')
+ else:
+ context_url = None
+ elif t.find(class_='tweet-social-context'):
+ context = t.find(class_='tweet-social-context').text.strip()
+ context = context.replace('\n', ' ')
+ context = ' '.join(context.split()) # Get rid of extra blanks
+ context_url = None
+ else:
+ context = None
+ context_url = None
+ text = parse_tweet_text(t.find(class_='tweet-text'))
+ url = username.replace('@', '') + '/status/' + \
+ t.find(class_='tweet-text').get('data-id', '')
+ ts.append({
+ 'fullname': fullname,
+ 'username': username,
+ 'timestamp': timestamp,
+ 'url': url,
+ 'context': context,
+ 'context_url': context_url,
+ 'text': text,
+ })
+
+ return ts
+
+
+def parse_tweet_text(tweet_text):
+ # Expand URLs
+ for a in tweet_text.find_all('a', class_='twitter_external_link'):
+ a.replace_with(a['data-url'])
+
+ return tweet_text.text.strip()
+
+
+def header(bs):
+ fullname = bs.find(class_='fullname').text.strip()
+ username = bs.find(class_='username').text.replace('\n', '').strip()
+ url = bs.find('link', rel='canonical')['href']
+ print('t')
+ print('[h|{fullname} {username}|URL:{url}|server|port]'.format(
+ fullname=fullname.replace('|', '\|'),
+ username=username.replace('|', '\|'),
+ url=url.replace('|', '\|'),
+ ))
+ print('t')
+
+
+def more(bs, cgi):
+ if bs.find(class_='w-button-more'):
+ more = bs.find(class_='w-button-more').a['href'].replace('/', '')
+ print('[1|<< Older tweets|{cgi}?{more}|server|port]'.format(
+ cgi=cgi,
+ more=more,
+ ))
+ print('t')
+
+
+def tweet(t, cgi):
+ print('[1|{fullname} {username} {timestamp}|{cgi}?{url}|server|port]'.format(
+ cgi=cgi,
+ fullname=t['fullname'].replace('|', '\|'),
+ username=t['username'].replace('|', '\|'),
+ timestamp=t['timestamp'].replace('|', '\|'),
+ url=t['url'].replace('|', '\|'),
+ ))
+ if t['context']:
+ if t['context_url']:
+ print('[1|{context}|{cgi}?{url}|server|port]'.format(
+ cgi=cgi,
+ context=t['context'],
+ url=t['context_url'],
+ ))
+ else:
+ print('t{context}'.format(context=t['context']))
+ text = textwrap.fill(t['text'], width=80, break_long_words=False,
+ break_on_hyphens=False)
+
+ # XXX: RE and .replace() dance in order to have all URLs and [h|...]
+ # XXX: entries in a single line and without a leading "t".
+ text = re.sub(r'((?:http|https)://[^\s]+)',
+ '\n [h|\g<1>|URL:\g<1>|server|port]', text)
+ text = text.replace('\n\n', '\n')
+ text = re.sub('\n([^ ])', '\nt\g<1>', text)
+ text = text.replace('\n ', '\n')
+
+ print('t{text}'.format(text=text))
+ print('t')
+
+
+if __name__ == '__main__':
+ import os
+
+ user = os.getenv('TWEET_USER')
+ cgi = os.getenv('CGI')
+ b = get_tweets(user)
+ header(b)
+ tweets = parse_tweets(b)
+ for t in tweets:
+ tweet(t, cgi)
+ more(b, cgi)
diff -r f160deab5062 -r 3014f57fd6c6 twitter/README
--- a/twitter/README Sun Dec 09 15:00:53 2018 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,35 +0,0 @@
-twitter DCGI is a Python 2.7 script that uses BeautifulSoup module to scrape
-mobile.twitter.com.
-
-Given a user it display tweets, adding HTTP links if the tweet contains
-anchors.
-
-To use it just add `[1|...]' entries in a .gph file, e.g. for @netbsd:
-
- t@netbsd
- [1|netbsd|/cgi/twitter/tweets.dcgi?netbsd|server|port]
-
-Here a corresponding "screenshot" of sacc displaying it:
-
- | |
- | HTML+ NetBSD Foundation @netbsd
- | |
- | Dir + Jared McNeill @jmcwhatever Aug 2
- | | NetBSD Foundation retweeted
- | | Good morning, ROCKPro64 #NetBSD
- | HTML+ https://twitter.com/jmcwhatever/status/1024948550344630272/photo/1
- | |
- | Dir + ryo @rsh Jul 31
- | | NetBSD Foundation retweeted
- | | tweet from firefox on NetBSD/pinebook!
- | |
- | Dir + Youri Mouton @YouriMouton Jul 31
- | | NetBSD Foundation retweeted
- | | Current status: trying to build Cinnamon on #arm64 #NetBSD on the Pinebook
- | |
- | Dir + ryo @rsh Jul 31
- | | NetBSD Foundation retweeted
- | | netbsd/evbarm64 ATF result: 5526 passed, 256 skipped, 94 expected failed, 172
- | | failed. じわじわ減っている。
- | |
- | [...]
diff -r f160deab5062 -r 3014f57fd6c6 twitter/tweets.dcgi
--- a/twitter/tweets.dcgi Sun Dec 09 15:00:53 2018 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,5 +0,0 @@
-#!/bin/sh
-
-. ../common/config.sh
-
-CGI="${CGI_BASEDIR}/twitter/tweets.dcgi" TWEET_USER="$2" PYTHONIOENCODING="utf-8" python2.7 tweets.py
diff -r f160deab5062 -r 3014f57fd6c6 twitter/tweets.py
--- a/twitter/tweets.py Sun Dec 09 15:00:53 2018 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,133 +0,0 @@
-#!/usr/pkg/bin/python2.7
-
-from __future__ import print_function, unicode_literals
-import textwrap
-import re
-import urllib
-from bs4 import BeautifulSoup
-
-
-TWEETS_BASE = 'https://mobile.twitter.com'
-
-
-def get_tweets(user):
- r = urllib.urlopen(TWEETS_BASE + '/' + user)
- return BeautifulSoup(r, 'html.parser')
-
-
-def parse_tweets(bs):
- ts = []
- for t in bs.find_all('table', class_='main-tweet') + bs.find_all('table', class_='tweet'):
- fullname = t.find(class_='fullname').text.strip()
- username = t.find(class_='username').text.strip()
- if t.find(class_='timestamp'):
- timestamp = t.find(class_='timestamp').text.strip()
- else:
- timestamp = ''
- if t.find(class_='tweet-reply-context'):
- context = t.find(class_='tweet-reply-context').text.strip()
- context = context.replace('\n', ' ')
- context = ' '.join(context.split()) # Get rid of extra blanks
- if t.find('a', text='View conversation'):
- context_url = t.find('a', text='View conversation')['href']
- context_url = context_url.lstrip('/').replace('?p=v', '')
- else:
- context_url = None
- elif t.find(class_='tweet-social-context'):
- context = t.find(class_='tweet-social-context').text.strip()
- context = context.replace('\n', ' ')
- context = ' '.join(context.split()) # Get rid of extra blanks
- context_url = None
- else:
- context = None
- context_url = None
- text = parse_tweet_text(t.find(class_='tweet-text'))
- url = username.replace('@', '') + '/status/' + \
- t.find(class_='tweet-text').get('data-id', '')
- ts.append({
- 'fullname': fullname,
- 'username': username,
- 'timestamp': timestamp,
- 'url': url,
- 'context': context,
- 'context_url': context_url,
- 'text': text,
- })
-
- return ts
-
-
-def parse_tweet_text(tweet_text):
- # Expand URLs
- for a in tweet_text.find_all('a', class_='twitter_external_link'):
- a.replace_with(a['data-url'])
-
- return tweet_text.text.strip()
-
-
-def header(bs):
- fullname = bs.find(class_='fullname').text.strip()
- username = bs.find(class_='username').text.replace('\n', '').strip()
- url = bs.find('link', rel='canonical')['href']
- print('t')
- print('[h|{fullname} {username}|URL:{url}|server|port]'.format(
- fullname=fullname.replace('|', '\|'),
- username=username.replace('|', '\|'),
- url=url.replace('|', '\|'),
- ))
- print('t')
-
-
-def more(bs, cgi):
- if bs.find(class_='w-button-more'):
- more = bs.find(class_='w-button-more').a['href'].replace('/', '')
- print('[1|<< Older tweets|{cgi}?{more}|server|port]'.format(
- cgi=cgi,
- more=more,
- ))
- print('t')
-
-
-def tweet(t, cgi):
- print('[1|{fullname} {username} {timestamp}|{cgi}?{url}|server|port]'.format(
- cgi=cgi,
- fullname=t['fullname'].replace('|', '\|'),
- username=t['username'].replace('|', '\|'),
- timestamp=t['timestamp'].replace('|', '\|'),
- url=t['url'].replace('|', '\|'),
- ))
- if t['context']:
- if t['context_url']:
- print('[1|{context}|{cgi}?{url}|server|port]'.format(
- cgi=cgi,
- context=t['context'],
- url=t['context_url'],
- ))
- else:
- print('t{context}'.format(context=t['context']))
- text = textwrap.fill(t['text'], width=80, break_long_words=False,
- break_on_hyphens=False)
-
- # XXX: RE and .replace() dance in order to have all URLs and [h|...]
- # XXX: entries in a single line and without a leading "t".
- text = re.sub(r'((?:http|https)://[^\s]+)',
- '\n [h|\g<1>|URL:\g<1>|server|port]', text)
- text = text.replace('\n\n', '\n')
- text = re.sub('\n([^ ])', '\nt\g<1>', text)
- text = text.replace('\n ', '\n')
-
- print('t{text}'.format(text=text))
- print('t')
-
-
-if __name__ == '__main__':
- import os
-
- user = os.getenv('TWEET_USER')
- cgi = os.getenv('CGI')
- b = get_tweets(user)
- header(b)
- tweets = parse_tweets(b)
- for t in tweets:
- tweet(t, cgi)
- more(b, cgi)