Add a WIP CGI for technologyreview.com articles. - gophercgis - Collection of gopher CGI/DCGI for geomyidae
(HTM) hg clone https://bitbucket.org/iamleot/gophercgis
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) README
(DIR) LICENSE
---
(DIR) changeset 6bd29a0626a7cd68148f84a4894b8ca5661a7fca
(DIR) parent fc7dda8b810979d11f9811d193704f6bf0c675b1
(HTM) Author: Leonardo Taccari <iamleot@gmail.com>
Date: Tue, 21 Aug 2018 13:35:33
Add a WIP CGI for technologyreview.com articles.
At the moment only the `*/s/*' ones are supported.
Diffstat:
technologyreview/article.cgi | 66 ++++++++++++++++++++++++++++++++++++++++++++
1 files changed, 66 insertions(+), 0 deletions(-)
---
diff -r fc7dda8b8109 -r 6bd29a0626a7 technologyreview/article.cgi
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/technologyreview/article.cgi Tue Aug 21 13:35:33 2018 +0200
@@ -0,0 +1,66 @@
+#!/bin/sh
+
+
+#
+# Filter HTML content
+#
+filter_html()
+{
+
+ awk '
+ /^\<div class="article-social-list / || \
+ /^\<div class="l-automated-trending--ordered" / {
+ ignore = 1
+ next
+ }
+
+ /^\<\/div\>/ {
+ ignore = 0
+ next
+ }
+
+ /^ \<div class="callout--tagged-event-promo"/ {
+ ignore = 1
+ next
+ }
+ /^\<\/div\>/ {
+ ignore = 0
+ next
+ }
+
+ /^\<div class="article-enlarge__modal"\>/ {
+ exit 0
+ }
+
+ !ignore {
+ print
+ }
+ '
+
+}
+
+
+#
+# Render HTML as text
+#
+html_to_text()
+{
+ # Nonexistent proxy to block HTTP requests
+ export http_proxy="http://localhost:31283128"
+
+ browser="/usr/pkg/bin/w3m"
+
+ browser_options="-dump -T text/html -no-cookie -cols 72"
+ browser_options="${browser_options} -o display_link=false"
+ browser_options="${browser_options} -o display_link_number=false"
+ browser_options="${browser_options} -o display_image=false"
+
+ ${browser} ${browser_options}
+}
+
+
+url=$2
+/usr/pkg/bin/curl -sL "${url}" |
+ { /usr/pkg/bin/xmllint --html --format --xpath '//main' - 2>/dev/null ; } |
+ filter_html |
+ html_to_text