#!/bin/sh # # Given https://*.bandcamp.com/music URLs generates entry for every album # fetch() { url=$1 curl -g -L --max-redirs 2 -f -s -S -m 15 "$url" } music_album_urls() { xmllint --html --format \ --xpath '//li[contains(@data-item-id, "album")]/a/@href' - 2>/dev/null | sed -nE 's;.*"([^"]+)".*;\1;p' | sed -e 's;\?.*$;;' } extract_album_json() { xmllint --html --format \ --xpath '//script[@type="application/ld+json"]/text()' - 2>/dev/null | sed -e 's/^$//' } sfeedify() { awk -F '\t' ' $1 == ".name" { album_title = $3 } $1 == ".datePublished" { gsub(/[^A-Za-z0-9:+ ]/, "", $3) # XXX: this is not portable (NetBSD date(1)) cmd = sprintf("date -ud \"%s\" +%%s", $3) cmd | getline timestamp close(cmd) } $1 == ".@id" { link = $3 } $1 == ".description" && $2 == "s" { content = $3 content_type = "plain" } $1 == ".byArtist.name" { author = $3 } $1 == ".@id" { id = $3 } END { if (id) { title = author " - " album_title print timestamp "\t" title "\t" link "\t" content "\t" content_type "\t" id "\t" author "\t" enclosure } } ' } # # Print usage messages and exit # usage() { echo "usage: $(basename $0) [-n number_of_items] ..." exit 1 } limiter=cat while getopts n: f; do case $f in n) nitems=$OPTARG ; limiter="head -n ${nitems}" ;; \?) usage ;; esac done shift $((OPTIND - 1)) if [ $# -lt 1 ]; then usage fi for u in "$@"; do base=$(dirname "$u") for a in $(fetch "$u" | music_album_urls | ${limiter}); do case $a in http://* | https://* ) album_url=$a ;; * ) album_url=$(printf "%s%s\n" "${base}" "${a}") ;; esac fetch "${album_url}" | extract_album_json | json2tsv | sfeedify done done