Add an initial CGI for theguardian.com - gophercgis - Collection of gopher CGI/DCGI for geomyidae
(HTM) hg clone https://bitbucket.org/iamleot/gophercgis
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) README
(DIR) LICENSE
---
(DIR) changeset e81f0327b4feecc17e1bc034fc7c462f6b9643d7
(DIR) parent af699209ba08b4fd020f913149c4bb5e7a41d56b
(HTM) Author: Leonardo Taccari <iamleot@gmail.com>
Date: Tue, 28 Aug 2018 00:32:48
Add an initial CGI for theguardian.com
Diffstat:
theguardian/article.cgi | 63 +++++++++++++++++++++++++++++++++++++++++++++++++
1 files changed, 63 insertions(+), 0 deletions(-)
---
diff -r af699209ba08 -r e81f0327b4fe theguardian/article.cgi
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/theguardian/article.cgi Tue Aug 28 00:32:48 2018 +0200
@@ -0,0 +1,63 @@
+#!/bin/sh
+
+
+#
+# Extract <article> content
+#
+extract_article()
+{
+
+ awk '
+ /<article/,/<\/article>/ {
+ print
+ }
+ '
+}
+
+
+#
+# Filter HTML content
+#
+filter_html()
+{
+
+ awk '
+ /<div class="meta__extras/,/<\/div>/ {
+ next
+ }
+
+ /<div class="after-article js-after-article">/ {
+ exit
+ }
+
+ {
+ print
+ }
+ '
+
+}
+
+#
+# Render HTML as text
+#
+html_to_text()
+{
+ # Nonexistent proxy to block HTTP requests
+ export http_proxy="http://localhost:31283128"
+
+ browser="/usr/pkg/bin/w3m"
+
+ browser_options="-dump -T text/html -no-cookie -cols 72"
+ browser_options="${browser_options} -o display_link=false"
+ browser_options="${browser_options} -o display_link_number=false"
+ browser_options="${browser_options} -o display_image=false"
+
+ env LANG=en_US.UTF-8 ${browser} ${browser_options}
+}
+
+
+url=$2
+/usr/pkg/bin/curl -sL "${url}" |
+ extract_article |
+ filter_html |
+ html_to_text