Add a CGI and DCGI for ansa.it articles. - gophercgis - Collection of gopher CGI/DCGI for geomyidae
(HTM) hg clone https://bitbucket.org/iamleot/gophercgis
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) README
(DIR) LICENSE
---
(DIR) changeset 54479ca33691d982674637d35a9176615eb21d5d
(DIR) parent 9c5c58307dd6991ff1fd086ec89e6cdf067236fa
(HTM) Author: Leonardo Taccari <iamleot@gmail.com>
Date: Wed, 29 Aug 2018 11:31:22
Add a CGI and DCGI for ansa.it articles.
Diffstat:
ansa/article.cgi | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
ansa/feed.dcgi | 45 ++++++++++++++++++++++++++++++++++++++++++
2 files changed, 105 insertions(+), 0 deletions(-)
---
diff -r 9c5c58307dd6 -r 54479ca33691 ansa/article.cgi
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/ansa/article.cgi Wed Aug 29 11:31:22 2018 +0200
@@ -0,0 +1,60 @@
+#!/bin/sh
+
+
+#
+# Extract <article> content
+#
+extract_article()
+{
+
+ awk '
+ /<article/,/<\/article>/ {
+ print
+ }
+ '
+}
+
+
+#
+# Filter HTML content
+#
+filter_html()
+{
+
+ awk '
+ /<header class="header-news">/,/<\/header>/ {
+ print
+ }
+
+ /<div itemprop="articleBody" class="news-txt">/,/<\/div>/ {
+ print
+ }
+ '
+
+}
+
+
+#
+# Render HTML as text
+#
+html_to_text()
+{
+ # Nonexistent proxy to block HTTP requests
+ export http_proxy="http://localhost:31283128"
+
+ browser="/usr/pkg/bin/w3m"
+
+ browser_options="-dump -T text/html -no-cookie -cols 72"
+ browser_options="${browser_options} -o display_link=false"
+ browser_options="${browser_options} -o display_link_number=false"
+ browser_options="${browser_options} -o display_image=false"
+
+ env LANG=en_US.UTF-8 ${browser} ${browser_options}
+}
+
+
+url=$2
+/usr/pkg/bin/curl -sL "${url}" |
+ extract_article |
+ filter_html |
+ html_to_text
diff -r 9c5c58307dd6 -r 54479ca33691 ansa/feed.dcgi
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/ansa/feed.dcgi Wed Aug 29 11:31:22 2018 +0200
@@ -0,0 +1,45 @@
+#!/bin/sh
+
+
+ARTICLE_CGI="/cgi/ansa/article.cgi"
+
+
+echo ""
+echo "ANSA"
+echo ""
+
+url="http://www.ansa.it/sito/ansait_rss.xml"
+/usr/pkg/bin/curl -gs "${url}" |
+{ /usr/pkg/bin/xmllint --format - 2>/dev/null ; } |
+awk '
+/<channel>/,/<copyright>/ {
+ next # ignore all non-item-s
+}
+
+/<title/ {
+ gsub("</?title>", "")
+ gsub(/^[[:space:]]+/, "")
+ gsub(/[[:space:]]+$/, "")
+ title = $0
+}
+
+/<link/ {
+ gsub("</?link>", "")
+ gsub(/^[[:space:]]+/, "")
+ gsub(/[[:space:]]+$/, "")
+ link = $0
+}
+
+title && link {
+ gsub("\\|", "\\|", title)
+ gsub("\\|", "\\|", link)
+ gsub("^<!\\[CDATA\\[", "", title)
+ gsub("\\]\\]>$", "", title)
+
+ printf("[0|%s|'"${ARTICLE_CGI}?"'%s|server|port]\n", title, link)
+
+ title = link = ""
+}
+'
+
+echo ""