sfeed_download - randomcrap - random crap programs of varying quality
 (HTM) git clone git://git.codemadness.org/randomcrap
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
       sfeed_download (2679B)
       ---
            1 #!/bin/sh
            2 # sfeed_download: downloader for URLs and enclosures in sfeed(5) files.
            3 # Dependencies: awk, curl, flock, xargs (-P), yt-dlp.
            4 
            5 cachefile="${SFEED_CACHEFILE:-$HOME/.sfeed/downloaded_urls}"
            6 jobs="${SFEED_JOBS:-4}"
            7 lockfile="${HOME}/.sfeed/sfeed_download.lock"
            8 
            9 # log(feedname, s, status)
           10 log() {
           11         if [ "$1" != "-" ]; then
           12                 s="[$1] $2"
           13         else
           14                 s="$2"
           15         fi
           16         printf '[%s]: %s: %s\n' "$(date +'%H:%M:%S')" "${s}" "$3"
           17 }
           18 
           19 # fetch(url, feedname)
           20 fetch() {
           21         case "$1" in
           22         *youtube.com*)
           23                 yt-dlp "$1";;
           24         *.flac|*.ogg|*.m3u|*.m3u8|*.m4a|*.mkv|*.mp3|*.mp4|*.wav|*.webm)
           25                 # allow 2 redirects, hide User-Agent, connect timeout is 15 seconds.
           26                 curl -O -L --max-redirs 2 -H "User-Agent:" -f -s --connect-timeout 15 "$1";;
           27         esac
           28 }
           29 
           30 # downloader(url, title, feedname)
           31 downloader() {
           32         url="$1"
           33         title="$2"
           34         feedname="${3##*/}"
           35 
           36         msg="${title}: ${url}"
           37 
           38         # download directory.
           39         if [ "${feedname}" != "-" ]; then
           40                 mkdir -p "${feedname}"
           41                 if ! cd "${feedname}"; then
           42                         log "${feedname}" "${msg}: ${feedname}" "DIR FAIL" >&2
           43                         return 1
           44                 fi
           45         fi
           46 
           47         log "${feedname}" "${msg}" "START"
           48         if fetch "${url}" "${feedname}"; then
           49                 log "${feedname}" "${msg}" "OK"
           50 
           51                 # append it safely in parallel to the cachefile on a
           52                 # successful download.
           53                 (flock 9 || exit 1
           54                 printf '%s\n' "${url}" >> "${cachefile}"
           55                 ) 9>"${lockfile}"
           56         else
           57                 log "${feedname}" "${msg}" "FAIL" >&2
           58                 return 1
           59         fi
           60         return 0
           61 }
           62 
           63 if [ "${SFEED_DOWNLOAD_CHILD}" = "1" ]; then
           64         # Downloader helper for parallel downloading.
           65         # Receives arguments: $1 = URL, $2 = title, $3 = feed filename or "-".
           66         # It should write the URI to the cachefile if it is successful.
           67         downloader "$1" "$2" "$3"
           68         exit $?
           69 fi
           70 
           71 # ...else parent mode:
           72 
           73 tmp="$(mktemp)" || exit 1
           74 trap "rm -f ${tmp}" EXIT
           75 
           76 [ -f "${cachefile}" ] || touch "${cachefile}"
           77 cat "${cachefile}" > "${tmp}"
           78 echo >> "${tmp}" # force it to have one line for awk.
           79 
           80 LC_ALL=C awk -F '\t' '
           81 # fast prefilter what to download or not.
           82 function filter(url, field, feedname) {
           83         u = tolower(url);
           84         return (match(u, "youtube\\.com") ||
           85                 match(u, "\\.(flac|ogg|m3u|m3u8|m4a|mkv|mp3|mp4|wav|webm)$"));
           86 }
           87 function download(url, field, title, filename) {
           88         if (!length(url) || urls[url] || !filter(url, field, filename))
           89                 return;
           90         # NUL-separated for xargs -0.
           91         printf("%s%c%s%c%s%c", url, 0, title, 0, filename, 0);
           92         urls[url] = 1; # print once
           93 }
           94 {
           95         FILENR += (FNR == 1);
           96 }
           97 # lookup table from cachefile which contains downloaded URLs.
           98 FILENR == 1 {
           99         urls[$0] = 1;
          100 }
          101 # feed file(s).
          102 FILENR != 1 {
          103         download($3, 3, $2, FILENAME); # link
          104         download($8, 8, $2, FILENAME); # enclosure
          105 }
          106 ' "${tmp}" "${@:--}" | \
          107 SFEED_DOWNLOAD_CHILD="1" xargs -r -0 -L 3 -P "${jobs}" "$(readlink -f "$0")"