Add a CGI for internazionale.it articles - gophercgis - Collection of gopher CGI/DCGI for geomyidae
(HTM) hg clone https://bitbucket.org/iamleot/gophercgis
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) README
(DIR) LICENSE
---
(DIR) changeset 2ab2d279d287c9f00d72369888942fc318f43513
(DIR) parent c94afb13b1ca9c59235dffb84b10b180ae1595a9
(HTM) Author: Leonardo Taccari <iamleot@gmail.com>
Date: Tue, 21 Aug 2018 21:49:12
Add a CGI for internazionale.it articles
Diffstat:
internazionale/article.cgi | 71 ++++++++++++++++++++++++++++++++++++++++++++++
1 files changed, 71 insertions(+), 0 deletions(-)
---
diff -r c94afb13b1ca -r 2ab2d279d287 internazionale/article.cgi
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/internazionale/article.cgi Tue Aug 21 21:49:12 2018 +0200
@@ -0,0 +1,71 @@
+#!/bin/sh
+
+
+#
+# Filter HTML content
+#
+filter_html()
+{
+
+ awk '
+ /^\<div class="hentry hentry--item hentry--banner">/ {
+ ignore = 1
+ next
+ }
+ /^\<\/div\>/ {
+ ignore = 0
+ next
+ }
+
+ /^ \<div class="item_tag_single">/ ||
+ /^ \<div class="item_share_inline">/ {
+ ignore = 1
+ next
+ }
+ /^ \<\/div>/ {
+ ignore = 0
+ next
+ }
+
+ /^ \<div class="item_tags" itemprop="keywords">/ ||
+ /^ \<div class="item_subscribe">/ {
+ ignore = 1
+ next
+ }
+ /^ \<\/div\>/ {
+ ignore = 0
+ next
+ }
+
+ !ignore {
+ print
+ }
+ '
+
+}
+
+
+#
+# Render HTML as text
+#
+html_to_text()
+{
+ # Nonexistent proxy to block HTTP requests
+ export http_proxy="http://localhost:31283128"
+
+ browser="/usr/pkg/bin/w3m"
+
+ browser_options="-dump -T text/html -no-cookie -cols 72"
+ browser_options="${browser_options} -o display_link=false"
+ browser_options="${browser_options} -o display_link_number=false"
+ browser_options="${browser_options} -o display_image=false"
+
+ env LANG=en_US.UTF-8 ${browser} ${browser_options}
+}
+
+
+url=$2
+/usr/pkg/bin/curl -sL "${url}" |
+ { /usr/pkg/bin/xmllint --html --format --xpath '//article' - 2>/dev/null ; } |
+ filter_html |
+ html_to_text