tccr.it

       article.cgi - gophercgis - Collection of gopher CGI/DCGI for geomyidae
 (HTM) hg clone https://bitbucket.org/iamleot/gophercgis
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
       article.cgi
       ---
            1 #!/bin/sh
            2 
            3 . ../common/config.sh
            4 . ../common/html.sh
            5 
            6 #
            7 # Extract article content
            8 #
            9 extract_article()
           10 {
           11 
           12         awk '
           13         /       <div class="article-header"/,/  <\/div>/ {
           14                 # Remove the article section
           15                 gsub(/<span itemprop="articleSection" .*<\/a><\/span>/, "")
           16 
           17                 print
           18         }
           19 
           20         /<div class="entry-content" itemprop="articleBody">/,/<div class="entry-tools">/ {
           21                 # Remove all img-s
           22                 gsub(/<img [^>]+\/?>/, "")
           23 
           24                 print
           25         }
           26 
           27         /class="readoffline-shortcode"/ {
           28                 match($0, /class="pdf" rel="nofollow" href="[^"]+"/)
           29                 printf("<pre>")
           30                 printf("PDF:  &lt;%s&gt;\n", substr($0, RSTART + 33, RLENGTH - 34))
           31                 match($0, /class="epub" rel="nofollow" href="[^"]+"/)
           32                 printf("EPUB: &lt;%s&gt;\n", substr($0, RSTART + 34, RLENGTH - 35))
           33                 match($0, /class="mobi" rel="nofollow" href="[^"]+"/)
           34                 printf("MOBI: &lt;%s&gt;\n", substr($0, RSTART + 34, RLENGTH - 35))
           35                 printf("</pre>")
           36         }
           37         '
           38 }
           39 
           40 
           41 url="$2"
           42 curl -A Googlebot-News -Lgs -- "${url}" |
           43     extract_article |
           44     html_to_text
           45 
           46 echo ""
           47 echo "URL: <${url}>"