Add an initial CGI for theguardian.com - gophercgis - Collection of gopher CGI/DCGI for geomyidae
 (HTM) hg clone https://bitbucket.org/iamleot/gophercgis
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
 (DIR) changeset e81f0327b4feecc17e1bc034fc7c462f6b9643d7
 (DIR) parent af699209ba08b4fd020f913149c4bb5e7a41d56b
 (HTM) Author: Leonardo Taccari <iamleot@gmail.com>
       Date:   Tue, 28 Aug 2018 00:32:48 
       
       Add an initial CGI for theguardian.com
       
       Diffstat:
        theguardian/article.cgi |  63 +++++++++++++++++++++++++++++++++++++++++++++++++
        1 files changed, 63 insertions(+), 0 deletions(-)
       ---
       diff -r af699209ba08 -r e81f0327b4fe theguardian/article.cgi
       --- /dev/null   Thu Jan 01 00:00:00 1970 +0000
       +++ b/theguardian/article.cgi   Tue Aug 28 00:32:48 2018 +0200
       @@ -0,0 +1,63 @@
       +#!/bin/sh
       +
       +
       +#
       +# Extract <article> content
       +#
       +extract_article()
       +{
       +
       +       awk '
       +       /<article/,/<\/article>/ {
       +               print
       +       }
       +       '
       +}
       +
       +
       +#
       +# Filter HTML content
       +#
       +filter_html()
       +{
       +
       +       awk '
       +       /<div class="meta__extras/,/<\/div>/ {
       +               next
       +       }
       +
       +       /<div class="after-article js-after-article">/ {
       +               exit
       +       }
       +
       +       {
       +               print
       +       }
       +       '
       +
       +}
       +
       +#
       +# Render HTML as text
       +#
       +html_to_text()
       +{
       +       # Nonexistent proxy to block HTTP requests
       +       export http_proxy="http://localhost:31283128"
       +
       +       browser="/usr/pkg/bin/w3m"
       +
       +       browser_options="-dump -T text/html -no-cookie -cols 72"
       +       browser_options="${browser_options} -o display_link=false"
       +       browser_options="${browser_options} -o display_link_number=false"
       +       browser_options="${browser_options} -o display_image=false"
       +
       +       env LANG=en_US.UTF-8 ${browser} ${browser_options}
       +}
       +
       +
       +url=$2
       +/usr/pkg/bin/curl -sL "${url}" |
       +    extract_article |
       +    filter_html |
       +    html_to_text