newsboat2sfeed.sh - randomcrap - random crap programs of varying quality
(HTM) git clone git://git.codemadness.org/randomcrap
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) README
(DIR) LICENSE
---
newsboat2sfeed.sh (2206B)
---
1 #!/bin/sh
2 # Export newsbeuter/newsboat cached items from sqlite3 to the sfeed TSV format.
3 # The data is split per file per feed with the name of the newsboat title/url.
4 # It writes the URLs of the read items line by line to a "urls" file.
5 #
6 # Dependencies: sqlite3, awk.
7 #
8 # Usage: create some directory to store the feeds then run this script.
9
10 # newsboat cache.db file.
11 cachefile="$HOME/.newsboat/cache.db"
12 test -n "$1" && cachefile="$1"
13
14 # dump data.
15 # .mode ascii: Columns/rows delimited by 0x1F and 0x1E
16 # get the first fields in the order of the sfeed(5) format.
17 sqlite3 "$cachefile" <<!EOF |
18 .headers off
19 .mode ascii
20 .output
21 SELECT
22 i.pubDate, i.title, i.url, i.content, i.content_mime_type,
23 i.guid, i.author, i.enclosure_url,
24 f.rssurl AS rssurl, f.title AS feedtitle, i.unread
25 -- i.id, i.enclosure_type, i.enqueued, i.flags, i.deleted, i.base
26 FROM rss_feed f
27 INNER JOIN rss_item i ON i.feedurl = f.rssurl
28 ORDER BY
29 i.feedurl ASC, i.pubDate DESC;
30 .quit
31 !EOF
32 # convert to sfeed(5) TSV format.
33 LC_ALL=C awk '
34 BEGIN {
35 FS = "\x1f";
36 RS = "\x1e";
37 }
38 # normal non-content fields.
39 function field(s) {
40 gsub("^[[:space:]]*", "", s);
41 gsub("[[:space:]]*$", "", s);
42 gsub("[[:space:]]", " ", s);
43 gsub("[[:cntrl:]]", "", s);
44 return s;
45 }
46 # content field.
47 function content(s) {
48 gsub("^[[:space:]]*", "", s);
49 gsub("[[:space:]]*$", "", s);
50 # escape chars in content field.
51 gsub("\\\\", "\\\\", s);
52 gsub("\n", "\\n", s);
53 gsub("\t", "\\t", s);
54 return s;
55 }
56 function feedname(feedurl, feedtitle) {
57 if (feedtitle == "") {
58 gsub("/", "_", feedurl);
59 return feedurl;
60 }
61 gsub("/", "_", feedtitle);
62 return feedtitle;
63 }
64 {
65 fname = feedname($9, $10);
66 if (!feed[fname]++) {
67 print "Writing file: \"" fname "\" (title: " $10 ", url: " $9 ")" > "/dev/stderr";
68 }
69
70 contenttype = field($5);
71 if (contenttype == "")
72 contenttype = "html";
73 else if (index(contenttype, "/html") || index(contenttype, "/xhtml"))
74 contenttype = "html";
75 else
76 contenttype = "plain";
77
78 print $1 "\t" field($2) "\t" field($3) "\t" content($4) "\t" \
79 contenttype "\t" field($6) "\t" field($7) "\t" field($8) "\t" \
80 > fname;
81
82 # write URLs of the read items to a file line by line.
83 if ($11 == "0") {
84 print $3 > "urls";
85 }
86 }'