#!/bin/sh

#
# Given https://*.bandcamp.com/music URLs generates entry for every album
#

fetch()
{
	url=$1

	curl -g -L --max-redirs 2 -f -s -S -m 15 "$url"
}

music_album_urls()
{
	xmllint --html --format \
	    --xpath '//li[contains(@data-item-id, "album")]/a/@href' - 2>/dev/null |
	sed -nE 's;.*"([^"]+)".*;\1;p' |
	sed -e 's;\?.*$;;'
}

extract_album_json()
{
	xmllint --html --format \
	    --xpath '//script[@type="application/ld+json"]/text()' - 2>/dev/null |
	sed -e 's/^<!\[CDATA\[//' -e 's/\]\]>$//'
}

sfeedify()
{
	awk -F '\t' '
$1 == ".name" {
	album_title = $3
}

$1 == ".datePublished" {
        gsub(/[^A-Za-z0-9:+ ]/, "", $3)

        # XXX: this is not portable (NetBSD date(1))
        cmd = sprintf("date -ud \"%s\" +%%s", $3)
        cmd | getline timestamp
        close(cmd)
}

$1 == ".@id" {
	link = $3
}

$1 == ".description" && $2 == "s" {
	content = $3
	content_type = "plain"
}

$1 == ".byArtist.name" {
	author = $3
}

$1 == ".@id" {
	id = $3
}

END {
	if (id) {
		title = author " - " album_title 
		print timestamp "\t" title "\t" link "\t" content "\t" content_type "\t" id "\t" author "\t" enclosure
	}
}
'
}


#
# Print usage messages and exit
#
usage()
{
	echo "usage: $(basename $0) [-n number_of_items] ..."
	exit 1
}


limiter=cat


while getopts n: f; do
	case $f in
	n) nitems=$OPTARG ; limiter="head -n ${nitems}" ;;
	\?) usage ;;
	esac
done
shift $((OPTIND - 1))


if [ $# -lt 1 ]; then
	usage
fi


for u in "$@"; do
	base=$(dirname "$u")
	for a in $(fetch "$u" | music_album_urls | ${limiter}); do
		case $a in
		http://* | https://* )
			album_url=$a
			;;
		* )
			album_url=$(printf "%s%s\n" "${base}" "${a}")
			;;
		esac
		fetch "${album_url}" | extract_album_json | json2tsv | sfeedify
	done
done