tccr.it

       Add a CGI and DCGI for ansa.it articles. - gophercgis - Collection of gopher CGI/DCGI for geomyidae
 (HTM) hg clone https://bitbucket.org/iamleot/gophercgis
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
 (DIR) changeset 54479ca33691d982674637d35a9176615eb21d5d
 (DIR) parent 9c5c58307dd6991ff1fd086ec89e6cdf067236fa
 (HTM) Author: Leonardo Taccari <iamleot@gmail.com>
       Date:   Wed, 29 Aug 2018 11:31:22 
       
       Add a CGI and DCGI for ansa.it articles.
       
       Diffstat:
        ansa/article.cgi |  60 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
        ansa/feed.dcgi   |  45 ++++++++++++++++++++++++++++++++++++++++++
        2 files changed, 105 insertions(+), 0 deletions(-)
       ---
       diff -r 9c5c58307dd6 -r 54479ca33691 ansa/article.cgi
       --- /dev/null   Thu Jan 01 00:00:00 1970 +0000
       +++ b/ansa/article.cgi  Wed Aug 29 11:31:22 2018 +0200
       @@ -0,0 +1,60 @@
       +#!/bin/sh
       +
       +
       +#
       +# Extract <article> content
       +#
       +extract_article()
       +{
       +
       +       awk '
       +       /<article/,/<\/article>/ {
       +               print
       +       }
       +       '
       +}
       +
       +
       +#
       +# Filter HTML content
       +#
       +filter_html()
       +{
       +
       +       awk '
       +       /<header class="header-news">/,/<\/header>/ {
       +               print
       +       }
       +
       +       /<div itemprop="articleBody" class="news-txt">/,/<\/div>/ {
       +               print
       +       }
       +       '
       +
       +}
       +
       +
       +#
       +# Render HTML as text
       +#
       +html_to_text()
       +{
       +       # Nonexistent proxy to block HTTP requests
       +       export http_proxy="http://localhost:31283128"
       +
       +       browser="/usr/pkg/bin/w3m"
       +
       +       browser_options="-dump -T text/html -no-cookie -cols 72"
       +       browser_options="${browser_options} -o display_link=false"
       +       browser_options="${browser_options} -o display_link_number=false"
       +       browser_options="${browser_options} -o display_image=false"
       +
       +       env LANG=en_US.UTF-8 ${browser} ${browser_options}
       +}
       +
       +
       +url=$2
       +/usr/pkg/bin/curl -sL "${url}" |
       +    extract_article |
       +    filter_html |
       +    html_to_text
       diff -r 9c5c58307dd6 -r 54479ca33691 ansa/feed.dcgi
       --- /dev/null   Thu Jan 01 00:00:00 1970 +0000
       +++ b/ansa/feed.dcgi    Wed Aug 29 11:31:22 2018 +0200
       @@ -0,0 +1,45 @@
       +#!/bin/sh
       +
       +
       +ARTICLE_CGI="/cgi/ansa/article.cgi"
       +
       +
       +echo ""
       +echo "ANSA"
       +echo ""
       +
       +url="http://www.ansa.it/sito/ansait_rss.xml"
       +/usr/pkg/bin/curl -gs "${url}" |
       +{ /usr/pkg/bin/xmllint --format - 2>/dev/null ; } |
       +awk '
       +/<channel>/,/<copyright>/ {
       +       next    # ignore all non-item-s
       +}
       +
       +/<title/ {
       +       gsub("</?title>", "")
       +       gsub(/^[[:space:]]+/, "")
       +       gsub(/[[:space:]]+$/, "")
       +       title = $0
       +}
       +
       +/<link/ {
       +       gsub("</?link>", "")
       +       gsub(/^[[:space:]]+/, "")
       +       gsub(/[[:space:]]+$/, "")
       +       link = $0
       +}
       +
       +title && link {
       +       gsub("\\|", "\\|", title)
       +       gsub("\\|", "\\|", link)
       +       gsub("^<!\\[CDATA\\[", "", title)
       +       gsub("\\]\\]>$", "", title)
       +
       +       printf("[0|%s|'"${ARTICLE_CGI}?"'%s|server|port]\n", title, link)
       +
       +       title = link = ""
       +}
       +'
       +
       +echo ""