newsboat2sfeed.sh - randomcrap - random crap programs of varying quality
 (HTM) git clone git://git.codemadness.org/randomcrap
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
       newsboat2sfeed.sh (2206B)
       ---
            1 #!/bin/sh
            2 # Export newsbeuter/newsboat cached items from sqlite3 to the sfeed TSV format.
            3 # The data is split per file per feed with the name of the newsboat title/url.
            4 # It writes the URLs of the read items line by line to a "urls" file.
            5 #
            6 # Dependencies: sqlite3, awk.
            7 #
            8 # Usage: create some directory to store the feeds then run this script.
            9 
           10 # newsboat cache.db file.
           11 cachefile="$HOME/.newsboat/cache.db"
           12 test -n "$1" && cachefile="$1"
           13 
           14 # dump data.
           15 # .mode ascii: Columns/rows delimited by 0x1F and 0x1E
           16 # get the first fields in the order of the sfeed(5) format.
           17 sqlite3 "$cachefile" <<!EOF |
           18 .headers off
           19 .mode ascii
           20 .output
           21 SELECT
           22         i.pubDate, i.title, i.url, i.content, i.content_mime_type,
           23         i.guid, i.author, i.enclosure_url,
           24         f.rssurl AS rssurl, f.title AS feedtitle, i.unread
           25         -- i.id, i.enclosure_type, i.enqueued, i.flags, i.deleted, i.base
           26 FROM rss_feed f
           27 INNER JOIN rss_item i ON i.feedurl = f.rssurl
           28 ORDER BY
           29         i.feedurl ASC, i.pubDate DESC;
           30 .quit
           31 !EOF
           32 # convert to sfeed(5) TSV format.
           33 LC_ALL=C awk '
           34 BEGIN {
           35         FS = "\x1f";
           36         RS = "\x1e";
           37 }
           38 # normal non-content fields.
           39 function field(s) {
           40         gsub("^[[:space:]]*", "", s);
           41         gsub("[[:space:]]*$", "", s);
           42         gsub("[[:space:]]", " ", s);
           43         gsub("[[:cntrl:]]", "", s);
           44         return s;
           45 }
           46 # content field.
           47 function content(s) {
           48         gsub("^[[:space:]]*", "", s);
           49         gsub("[[:space:]]*$", "", s);
           50         # escape chars in content field.
           51         gsub("\\\\", "\\\\", s);
           52         gsub("\n", "\\n", s);
           53         gsub("\t", "\\t", s);
           54         return s;
           55 }
           56 function feedname(feedurl, feedtitle) {
           57         if (feedtitle == "") {
           58                 gsub("/", "_", feedurl);
           59                 return feedurl;
           60         }
           61         gsub("/", "_", feedtitle);
           62         return feedtitle;
           63 }
           64 {
           65         fname = feedname($9, $10);
           66         if (!feed[fname]++) {
           67                 print "Writing file: \"" fname "\" (title: " $10 ", url: " $9 ")" > "/dev/stderr";
           68         }
           69 
           70         contenttype = field($5);
           71         if (contenttype == "")
           72                 contenttype = "html";
           73         else if (index(contenttype, "/html") || index(contenttype, "/xhtml"))
           74                 contenttype = "html";
           75         else
           76                 contenttype = "plain";
           77 
           78         print $1 "\t" field($2) "\t" field($3) "\t" content($4) "\t" \
           79                 contenttype "\t" field($6) "\t" field($7) "\t" field($8) "\t" \
           80                 > fname;
           81 
           82         # write URLs of the read items to a file line by line.
           83         if ($11 == "0") {
           84                 print $3 > "urls";
           85         }
           86 }'