Add a CGI for rep.repubblica.it - gophercgis - Collection of gopher CGI/DCGI for geomyidae
(HTM) hg clone https://bitbucket.org/iamleot/gophercgis
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) README
(DIR) LICENSE
---
(DIR) changeset 41205430638ac133712ec23d42bb905c1cd6c346
(DIR) parent 97ac5640c437b5f6ecce9ac339711b3a33396b47
(HTM) Author: Leonardo Taccari <iamleot@gmail.com>
Date: Sun, 9 Sep 2018 01:32:31
Add a CGI for rep.repubblica.it
ATM only the "cover" is supported.
Diffstat:
rep/article.cgi | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++
rep/cover.dcgi | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 109 insertions(+), 0 deletions(-)
---
diff -r 97ac5640c437 -r 41205430638a rep/article.cgi
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/rep/article.cgi Sun Sep 09 01:32:31 2018 +0200
@@ -0,0 +1,52 @@
+#!/bin/sh
+
+. ../common/html.sh
+
+#
+# Extract article content
+#
+extract_article()
+{
+
+ awk '
+ /<article/,/<\/article>/ {
+ print
+ }
+ '
+}
+
+
+#
+# Filter HTML content
+#
+filter_html()
+{
+
+ awk '
+ /<h1 class="detail-article_title"/,/<\/h1>/ {
+ sub(/^.*<h1 class="detail-article_title"/, "<h1")
+ print
+ }
+ /<div class="detail-article_summary">/,/<\/div>/ {
+ print
+ }
+
+ #/<div amp-access="showContent" amp-access-hide class="paywall">/,/<div class="detail-tag_container">/ {
+
+ /<div amp-access="showContent"/,/<div class="detail-tag_container">/ {
+ if ($0 ~ /<div class="detail-tag_container">/) {
+ next
+ }
+
+ print
+ }
+ '
+
+}
+
+
+url=$2
+/usr/pkg/bin/curl -gsL "${url}" |
+ extract_article |
+ filter_html |
+ html_to_text
diff -r 97ac5640c437 -r 41205430638a rep/cover.dcgi
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/rep/cover.dcgi Sun Sep 09 01:32:31 2018 +0200
@@ -0,0 +1,57 @@
+#!/bin/sh
+
+ARTICLE_CGI="/cgi/rep/article.cgi"
+
+echo ""
+echo "Rep - Repubblica"
+echo ""
+
+url="https://rep.repubblica.it/ws/cover.json"
+/usr/bin/ftp -V -o - "${url}" |
+/usr/pkg/bin/jq -r '.feed.zones | .[].blocks | .[].entries | .[] |
+ .title + "\u001c" +
+ .author + "\u001c" +
+ .created[0:10] + "\u001c" +
+ .links[1].href + "\u001c" +
+ .summary + "\u001e"
+' | awk '
+
+BEGIN {
+ FS = "\034"
+ RS = "\036"
+}
+
+function html_decode(s)
+{
+ gsub(/“/, "\"", s);
+ gsub(/”/, "\"", s);
+ gsub(/’/, "\047", s);
+ gsub(/à/, "à", s);
+ gsub(/é/, "é", s);
+ gsub(/è/, "è", s);
+ gsub(/ì/, "ì", s);
+ gsub(/ò/, "ò", s);
+ gsub(/ù/, "ù", s);
+
+ return s
+}
+
+NF == 5 {
+ gsub(/\n/, "")
+
+ title = $1
+ author = $2
+ created = $3
+ link = $4
+ summary = $5
+
+ gsub("\\|", "\\|", title)
+ gsub("\\|", "\\|", link)
+
+ title = html_decode(title)
+
+ printf("[0|%s|%s|server|port]\n", title, "'${ARTICLE_CGI}?'" link)
+ printf("t%s %s\n", author, created)
+ printf("\n")
+}
+'