article.cgi - gophercgis - Collection of gopher CGI/DCGI for geomyidae
(HTM) hg clone https://bitbucket.org/iamleot/gophercgis
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) README
(DIR) LICENSE
---
article.cgi
---
1 #!/bin/sh
2
3 . ../common/config.sh
4 . ../common/html.sh
5
6 #
7 # Extract article content
8 #
9 extract_article()
10 {
11
12 awk '
13 /<article/,/<\/article>/ {
14 print
15 }
16 '
17 }
18
19
20 #
21 # Filter HTML content
22 #
23 filter_html()
24 {
25
26 awk '
27 /<h1 class="detail-article_title" itemprop="headline">/,/<\/h1>/ {
28 title = $0
29 sub(/^.*<h1/, "<h1", title)
30 sub(/<\/h1>.*$/, "</h1>", title)
31 print title
32 }
33 /<div class="detail-article_summary">/,/ <\/div>/ {
34 sub(/^.*<div class="detail-article_summary">/, "<div class=\"detail-article_summary\"><p>")
35 sub(/^.* <\/div>.*/, "</p></div>")
36 print
37 }
38
39 /<div subscriptions-section="content" class="paywall">/,/<div class="detail-tag_container">/ {
40 sub(/^.*<div subscriptions-section="content"/, "<div subscriptions-section=\"content\"")
41 sub(/<div class="detail-tag_container">.*/, "")
42
43 # Remove images
44 gsub(/<noscript><img.*<\/noscript>/, "")
45
46 print
47 }
48 '
49
50 }
51
52
53 url=$2
54 curl -Lgs -- "${url}" |
55 extract_article |
56 filter_html |
57 html_to_text
58
59 echo ""
60 echo "URL: <${url}>"