tshdl - scholarref - tools for DOI and BiBTeX reference extraction, fetching, and parsing
 (HTM) git clone git://src.adamsgaard.dk/scholarref
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
       tshdl (5073B)
       ---
            1 #!/bin/sh
            2 set -e
            3 
            4 version=1.0
            5 shhost="http://sci-hub.st"
            6 
            7 show_help() {
            8     echo "usage: ${0##*/} [OPTIONS] [DOIs ...]"
            9     echo "will fetch each DOI from $shhost"
           10     echo "If no DOI is specified, this program will expect DOIs as stdin."
           11     echo "The program extracts the DOI if a full doi.org URL is passed to it."
           12     echo "If the PDF download fails, e.g. due to captias, a tor-browser or "
           13     echo "alternatively the system default browser will open sci-hub."
           14     echo
           15     echo "OPTIONS are one or more of the following:"
           16     echo "   -h,  --help          show this message"
           17     echo "   -v,  --version       show version and license information"
           18     echo "   -V,  --verbose       show verbose information"
           19     echo "   -t,  --tor-socks     use torsocks for requests to sci-hub"
           20     echo "   -b,  --browser       open sci-hub in browser immediately"
           21     echo "   -r,  --reference     add reference to bibliography using scholarref"
           22     echo "   -n,  --notify        send desktop notification when complete"
           23     echo "   -o,  --open          open downloaded document when complete"
           24     echo "   --                   do not consider any following args as options"
           25 }
           26 
           27 show_version() {
           28     echo "${0##*/} version $version"
           29     echo "Licensed under the ISC License"
           30     echo "written by Anders Damsgaard, anders@adamsgaard.dk"
           31     echo "https://gitlab.com/admesg/dotfiles"
           32 }
           33 
           34 regexmatch() {
           35         printf '%s' "$1" | grep -qE "$2"
           36 }
           37 
           38 browser_open() {
           39     echo "$1"
           40     if command -v tor-browser 2>/dev/null; then
           41         [ "$verbose" = 1 ] && echo "attempting to launch tor-browser"
           42         tor-browser "$1"
           43     elif command -v xdg-open 2>/dev/null; then
           44         [ "$verbose" = 1 ] && echo "launching default browser"
           45         xdg-open "$1"
           46     elif command -v open 2>/dev/null; then
           47         [ "$verbose" = 1 ] && echo "launching default browser"
           48         open "$1"
           49     else
           50         die 'Error: could not open a browser'
           51     fi
           52 }
           53 
           54 shdl_fetch() {
           55     shurl="$shhost/$doi"
           56     [ "$verbose" = 1 ] && echo "processing $doi"
           57     [ "$verbose" = 1 ] && echo "connecting to $shurl $prefix"
           58     pdfurl="$($prefix curl --connect-timeout 30 --silent "$shurl" | \
           59         grep location.href | grep -o '//.*pdf' | sed 's/^/http:/')"
           60     [ "$verbose" = 1 ] && echo "fetching pdf from $pdfurl"
           61     if ! file="$($prefix curl --connect-timeout 30 \
           62         --write-out "filename: %{filename_effective}\n" -O -L -J "$pdfurl" |\
           63         grep 'filename: ' | cut -d' ' -f2)"; then
           64         if [ "$verbose" = 1 ]; then
           65             [ "$notify" = 1 ] && notify-send "${0##*/}" "Error: could not fetch $doi"
           66             (echo "Error: could not fetch $doi PDF from $pdfurl" >&2)
           67         fi
           68         browser_open "$shurl"
           69     else
           70         echo "saved to $file"
           71         if [ ! -f "$file" ] || [ "$(file "$file" | cut -d' ' -f2)" = "HTML" ]; then
           72             (echo "Error: downloaded file $file is not valid. Opening browser..." >&2)
           73             [ -f "$file" ] && rm "$file"
           74             browser_open "$shurl"
           75         elif [ "$open" = 1 ]; then
           76             xdg-open "$file"
           77         fi
           78     fi
           79 }
           80 
           81 handle_doi() {
           82         # direct download of The Cryosphere pdfs
           83         if regexmatch "$1" '10.5194/tc-'; then
           84                 year="$(printf '%s' "$1" | sed 's/.*-\([12][0-9][0-9][0-9]\)$/\1/')"
           85                 volume="$(printf '%s' "$1" | sed 's,.*/tc-\([0-9][0-9]*\)-.*,\1,')"
           86                 page="$(printf '%s' "$1" | sed 's,.*/tc-[0-9][0-9]*-\([0-9][0-9]*\).*,\1,')"
           87                 curl -O "https://tc.copernicus.org/articles/${volume}/${page}/${year}/tc-${volume}-${page}-${year}.pdf"
           88                 return
           89         fi
           90     if [ "$browser" = 1 ]; then
           91         shurl="$shhost/$1"
           92         browser_open "$shurl"
           93     else
           94         shdl_fetch "$1"
           95     fi
           96     if [ "$reference" = 1 ]; then
           97         if command -v getref >/dev/null 2>&1; then
           98             if [ "$verbose" = 1 ]; then
           99                 echo "adding $1 to bibliography using getref"
          100             fi
          101             getref "$1" >> "$BIB"
          102         else
          103             die 'Error: getref not found in PATH'
          104         fi
          105     fi
          106 }
          107 
          108 die() {
          109     printf '%s\n' "$1" >&2
          110     exit 1
          111 }
          112 
          113 returnstatus=0
          114 prefix=""
          115 verbose=0
          116 browser=0
          117 reference=0
          118 notify=0
          119 open=0
          120 
          121 while :; do
          122     case "$1" in
          123         -h|-\?|--help)
          124             show_help
          125             exit 0
          126             ;;
          127         -v|--version)
          128             show_version
          129             exit 0
          130             ;;
          131         -V|--verbose)
          132             verbose=1
          133             ;;
          134         -b|--browser)
          135             browser=1
          136             ;;
          137         -t|--tor-socks)
          138             prefix="torsocks"
          139             ;;
          140         -r|--reference)
          141             reference=1
          142             ;;
          143         -n|--notify)
          144             notify=1
          145             ;;
          146         -o|--open)
          147             open=1
          148             ;;
          149         --) # end all options
          150             shift
          151             break
          152             ;;
          153         -?*)
          154             die 'Error: Unknown option specified'
          155             ;;
          156         *)  # No more options
          157             break
          158     esac
          159     shift
          160 done
          161 
          162 if [ $# -lt 1 ]; then
          163     doi="$(cat)"
          164     handle_doi "$doi"
          165     exit 0
          166 else
          167     for doi in "$@"; do
          168         handle_doi "$doi"
          169         [ "$notify" = 1 ] && notify-send "${0##*/}" "$doi complete"
          170     done
          171 fi
          172 exit "$returnstatus"