sfeed_download - randomcrap - random crap programs of varying quality
(HTM) git clone git://git.codemadness.org/randomcrap
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) README
(DIR) LICENSE
---
sfeed_download (2679B)
---
1 #!/bin/sh
2 # sfeed_download: downloader for URLs and enclosures in sfeed(5) files.
3 # Dependencies: awk, curl, flock, xargs (-P), yt-dlp.
4
5 cachefile="${SFEED_CACHEFILE:-$HOME/.sfeed/downloaded_urls}"
6 jobs="${SFEED_JOBS:-4}"
7 lockfile="${HOME}/.sfeed/sfeed_download.lock"
8
9 # log(feedname, s, status)
10 log() {
11 if [ "$1" != "-" ]; then
12 s="[$1] $2"
13 else
14 s="$2"
15 fi
16 printf '[%s]: %s: %s\n' "$(date +'%H:%M:%S')" "${s}" "$3"
17 }
18
19 # fetch(url, feedname)
20 fetch() {
21 case "$1" in
22 *youtube.com*)
23 yt-dlp "$1";;
24 *.flac|*.ogg|*.m3u|*.m3u8|*.m4a|*.mkv|*.mp3|*.mp4|*.wav|*.webm)
25 # allow 2 redirects, hide User-Agent, connect timeout is 15 seconds.
26 curl -O -L --max-redirs 2 -H "User-Agent:" -f -s --connect-timeout 15 "$1";;
27 esac
28 }
29
30 # downloader(url, title, feedname)
31 downloader() {
32 url="$1"
33 title="$2"
34 feedname="${3##*/}"
35
36 msg="${title}: ${url}"
37
38 # download directory.
39 if [ "${feedname}" != "-" ]; then
40 mkdir -p "${feedname}"
41 if ! cd "${feedname}"; then
42 log "${feedname}" "${msg}: ${feedname}" "DIR FAIL" >&2
43 return 1
44 fi
45 fi
46
47 log "${feedname}" "${msg}" "START"
48 if fetch "${url}" "${feedname}"; then
49 log "${feedname}" "${msg}" "OK"
50
51 # append it safely in parallel to the cachefile on a
52 # successful download.
53 (flock 9 || exit 1
54 printf '%s\n' "${url}" >> "${cachefile}"
55 ) 9>"${lockfile}"
56 else
57 log "${feedname}" "${msg}" "FAIL" >&2
58 return 1
59 fi
60 return 0
61 }
62
63 if [ "${SFEED_DOWNLOAD_CHILD}" = "1" ]; then
64 # Downloader helper for parallel downloading.
65 # Receives arguments: $1 = URL, $2 = title, $3 = feed filename or "-".
66 # It should write the URI to the cachefile if it is successful.
67 downloader "$1" "$2" "$3"
68 exit $?
69 fi
70
71 # ...else parent mode:
72
73 tmp="$(mktemp)" || exit 1
74 trap "rm -f ${tmp}" EXIT
75
76 [ -f "${cachefile}" ] || touch "${cachefile}"
77 cat "${cachefile}" > "${tmp}"
78 echo >> "${tmp}" # force it to have one line for awk.
79
80 LC_ALL=C awk -F '\t' '
81 # fast prefilter what to download or not.
82 function filter(url, field, feedname) {
83 u = tolower(url);
84 return (match(u, "youtube\\.com") ||
85 match(u, "\\.(flac|ogg|m3u|m3u8|m4a|mkv|mp3|mp4|wav|webm)$"));
86 }
87 function download(url, field, title, filename) {
88 if (!length(url) || urls[url] || !filter(url, field, filename))
89 return;
90 # NUL-separated for xargs -0.
91 printf("%s%c%s%c%s%c", url, 0, title, 0, filename, 0);
92 urls[url] = 1; # print once
93 }
94 {
95 FILENR += (FNR == 1);
96 }
97 # lookup table from cachefile which contains downloaded URLs.
98 FILENR == 1 {
99 urls[$0] = 1;
100 }
101 # feed file(s).
102 FILENR != 1 {
103 download($3, 3, $2, FILENAME); # link
104 download($8, 8, $2, FILENAME); # enclosure
105 }
106 ' "${tmp}" "${@:--}" | \
107 SFEED_DOWNLOAD_CHILD="1" xargs -r -0 -L 3 -P "${jobs}" "$(readlink -f "$0")"