Add an extractor for il manifesto - gophercgis - Collection of gopher CGI/DCGI for geomyidae
(HTM) hg clone https://bitbucket.org/iamleot/gophercgis
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) README
(DIR) LICENSE
---
(DIR) changeset 57112e2a27fe6d67a2a57db055467df4fa4de6e9
(DIR) parent 8c321014eb1efc17e1cdd79c13fa791305c64868
(HTM) Author: Leonardo Taccari <iamleot@gmail.com>
Date: Sat, 8 Dec 2018 21:42:41
Add an extractor for il manifesto
Diffstat:
ilmanifesto/article.cgi | 36 ++++++++++++++++++++++++++++++++++++
ilmanifesto/feed.dcgi | 16 ++++++++++++++++
2 files changed, 52 insertions(+), 0 deletions(-)
---
diff -r 8c321014eb1e -r 57112e2a27fe ilmanifesto/article.cgi
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/ilmanifesto/article.cgi Sat Dec 08 21:42:41 2018 +0100
@@ -0,0 +1,36 @@
+#!/bin/sh
+
+. ../common/config.sh
+. ../common/html.sh
+
+#
+# Extract article content
+#
+extract_article()
+{
+
+ awk '
+ / <div class="article-header"/,/ <\/div>/ {
+ # Remove the article section
+ gsub(/<span itemprop="articleSection" .*<\/a><\/span>/, "")
+
+ print
+ }
+
+ /<div class="entry-content" itemprop="articleBody">/,/<div class="entry-tools">/ {
+ # Remove all img-s
+ gsub(/<img [^>]+\/?>/, "")
+
+ print
+ }
+ '
+}
+
+
+url="$2"
+curl -A Googlebot-News -Lgs -- "${url}" |
+ extract_article |
+ html_to_text
+
+echo ""
+echo "URL: <${url}>"
diff -r 8c321014eb1e -r 57112e2a27fe ilmanifesto/feed.dcgi
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/ilmanifesto/feed.dcgi Sat Dec 08 21:42:41 2018 +0100
@@ -0,0 +1,16 @@
+#!/bin/sh
+
+. ../common/config.sh
+. ../common/feed.sh
+
+FEED_AUTHOR=yes
+FEED_DATE=yes
+ARTICLE_CGI="${CGI_BASEDIR}/ilmanifesto/article.cgi"
+
+echo ""
+echo "il manifesto"
+echo ""
+
+url="https://ilmanifesto.it/feed/"
+curl -Lgs -- "${url}" |
+feed_to_gph