nos.sh - randomcrap - random crap programs of varying quality
 (HTM) git clone git://git.codemadness.org/randomcrap
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
       nos.sh (1864B)
       ---
            1 #!/bin/sh
            2 # Dependencies: awk, curl, randomcrap repo: extractjson directory.
            3 # read attached JSON resources and parse them.
            4 # parse livestream urls.
            5 # show only unique urls.
            6 #
            7 # DEBUG: extractjson < /tmp/p | while read -r l; do printf '%s' "$l" | json2tsv; done | less
            8 
            9 tmp=$(mktemp)
           10 trap "rm -f $tmp" EXIT
           11 
           12 # redirection is needed, some urls redirect to CDNs such as cloudfront.
           13 curl -L -m 10 -L -H 'User-Agent:' "$1" > "$tmp"
           14 
           15 #extractjson < "$tmp" | \
           16 #while read -r line; do
           17 #        printf '%s' "$line" | \
           18 #                jaq '{ print $1 "\t" $2 "\t" $3; }'
           19 #done
           20 
           21 (
           22 # JSON resources
           23 extractjson < "$tmp" | \
           24 while read -r line; do
           25         printf '%s' "$line" | \
           26                 jaq '
           27 function printitem() {
           28         if (url == "")
           29                 return;
           30         item = url;
           31         if (title != "") {
           32                 if (duration != "")
           33                         item = title " [" duration "]: " item;
           34                 else
           35                         item = title ": " item;
           36         }
           37         print item;
           38 }
           39 # title and metadata.
           40 $1 ~ /\.(items|storytellItems)\[\]$/ { printitem(); url = title = duration = ""; }
           41 $1 ~ /\.(items|storytellItems)\[\]\.title$/ { title = $3; }
           42 $1 ~ /\.(items|storytellItems)\[\]\.duration$/ { duration = $3; }
           43 $1 ~ /\.(items|storytellItems)\[\]\.source\.url$/ { url = $3; }
           44 
           45 # title and metadata.
           46 $1 ~ /\.video$/ && $2 == "o" { printitem(); url = title = duration = ""; }
           47 $1 ~ /\.video\.title$/ { title = $3; }
           48 $1 ~ /\.video\.duration$/ { duration = $3; }
           49 $1 ~ /\.video\.source\.url$/ { url = $3; }
           50 
           51 # old pages URLs
           52 $1 ~ /\.url\.mp4$/ && $2 == "s" { url = $3; printitem(); }
           53 $1 ~ /\.stream$/ && $2 == "s" && $3 ~ /^(http|https):\/\// { url = $3; printitem(); }
           54 $1 ~ /\.contentUrl$/ && $2 == "s" && $3 ~ /^(http|https):\/\// { url = $3; printitem(); }
           55 
           56 #$1 ~ /\.url$/ && $2 == "s" && $3 ~ /^(http|https):\/\// && $3 ~ /streaming/ { url = $3; }
           57 
           58 END {
           59         printitem();
           60 }
           61 '
           62 done
           63 
           64 # livestreams (old pages)
           65 sed -En 's@.*data-stream="([^"]*\.m3u8[^"]*)".*@\1@p' < "$tmp"
           66 ) | \
           67 LC_ALL=C awk '!x[$0]++'