article.cgi - gophercgis - Collection of gopher CGI/DCGI for geomyidae
 (HTM) hg clone https://bitbucket.org/iamleot/gophercgis
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
       article.cgi
       ---
            1 #!/bin/sh
            2 
            3 . ../common/config.sh
            4 . ../common/html.sh
            5 
            6 #
            7 # Extract article content
            8 #
            9 extract_article()
           10 {
           11 
           12         awk '
           13         /<article/,/<\/article>/ {
           14                 print
           15         }
           16         '
           17 }
           18 
           19 
           20 #
           21 # Filter HTML content
           22 #
           23 filter_html()
           24 {
           25 
           26         awk '
           27         /<h1 class="detail-article_title" itemprop="headline">/,/<\/h1>/ {
           28                 title = $0
           29                 sub(/^.*<h1/, "<h1", title)
           30                 sub(/<\/h1>.*$/, "</h1>", title)
           31                 print title
           32         }
           33         /<div class="detail-article_summary">/,/ <\/div>/ {
           34                 sub(/^.*<div class="detail-article_summary">/, "<div class=\"detail-article_summary\"><p>")
           35                 sub(/^.* <\/div>.*/, "</p></div>")
           36                 print
           37         }
           38 
           39         /<div subscriptions-section="content" class="paywall">/,/<div class="detail-tag_container">/ {
           40                 sub(/^.*<div subscriptions-section="content"/, "<div subscriptions-section=\"content\"")
           41                 sub(/<div class="detail-tag_container">.*/, "")
           42 
           43                 # Remove images
           44                 gsub(/<noscript><img.*<\/noscript>/, "")
           45 
           46                 print
           47         }
           48         '
           49 
           50 }
           51 
           52 
           53 url=$2
           54 curl -Lgs -- "${url}" |
           55     extract_article |
           56     filter_html |
           57     html_to_text
           58 
           59 echo ""
           60 echo "URL: <${url}>"