tAdd shdl - scholarref - tools for DOI and BiBTeX reference extraction, fetching, and parsing
(HTM) git clone git://src.adamsgaard.dk/scholarref
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) README
(DIR) LICENSE
---
(DIR) commit 8eab3ed889928ef7abf5f2aef7a87daa37fa2408
(DIR) parent 9ad685fc5b38869845e71a6a63ef4e399afb1077
(HTM) Author: Anders Damsgaard <anders@adamsgaard.dk>
Date: Mon, 16 Sep 2019 10:49:42 +0200
Add shdl
Diffstat:
M README | 1 +
A shdl | 164 +++++++++++++++++++++++++++++++
2 files changed, 165 insertions(+), 0 deletions(-)
---
(DIR) diff --git a/README b/README
t@@ -6,6 +6,7 @@ references.
- getdoi: extracts a DOI from a file or fetches it online from a search query
- getref: get a BiBTeX entry from a DOI and parse it
- scholarref: combine getref and getdoi for a BiBTeX entry from a search query
+- shdl: fetch a pdf from a DOI through sci-hub.tw, with Tor Browser as fallback
Use locally or run `make install` to install under `/usr/local/bin`. Run
`make uninstall` to remove.
(DIR) diff --git a/shdl b/shdl
t@@ -0,0 +1,164 @@
+#!/bin/sh
+set -e
+
+version=1.0
+shhost="http://sci-hub.tw"
+
+show_help() {
+ echo "usage: ${0##*/} [OPTIONS] DOI1 [DOI2...[DOI N]]"
+ echo "will fetch each DOI from $shhost"
+ echo "If no DOI is specified, this program will expect DOIs as stdin."
+ echo "The program extracts the DOI if a full doi.org URL is passed to it."
+ echo "If the PDF download fails, e.g. due to captias, a tor-browser or "
+ echo "alternatively the system default browser will open sci-hub."
+ echo
+ echo "OPTIONS are one or more of the following:"
+ echo " -h, --help show this message"
+ echo " -v, --version show version and license information"
+ echo " -V, --verbose show verbose information"
+ echo " -t, --tor-socks use torsocks for requests to sci-hub"
+ echo " -b, --browser open sci-hub in browser immediately"
+ echo " -r, --reference add reference to bibliography using scholarref"
+ echo " -n, --notify send desktop notification when complete"
+ echo " -o, --open open downloaded document when complete"
+ echo " -- do not consider any following args as options"
+}
+
+show_version() {
+ echo "${0##*/} version $version"
+ echo "Licensed under the GNU Public License, v3+"
+ echo "written by Anders Damsgaard, anders@adamsgaard.dk"
+ echo "https://gitlab.com/admesg/dotfiles"
+}
+
+strip_doi() {
+ sed 's/^(http:\/\/|https:\/\/)?(dx\.)?(doi\.org\/)//'
+}
+
+browser_open() {
+ echo "$1"
+ if command -v tor-browser 2>/dev/null; then
+ [ "$verbose" = 1 ] && echo "attempting to launch tor-browser"
+ tor-browser "$1"
+ elif command -v xdg-open 2>/dev/null; then
+ [ "$verbose" = 1 ] && echo "launching default browser"
+ xdg-open "$1"
+ elif command -v open 2>/dev/null; then
+ [ "$verbose" = 1 ] && echo "launching default browser"
+ open "$1"
+ else
+ die 'Error: could not open a browser'
+ fi
+}
+
+shdl_fetch() {
+ shurl="$shhost/$doi"
+ [ "$verbose" = 1 ] && echo "processing $doi"
+ [ "$verbose" = 1 ] && echo "connecting to $shurl $prefix"
+ pdfurl="$($prefix curl --connect-timeout 30 --silent "$shurl" | \
+ grep location.href | grep -o '//.*pdf' | sed 's/^/http:/')"
+ [ "$verbose" = 1 ] && echo "fetching pdf from $pdfurl"
+ if ! file="$($prefix curl --connect-timeout 30 \
+ --write-out "filename: %{filename_effective}\n" -O -L -J "$pdfurl" |\
+ grep 'filename: ' | cut -d' ' -f2)"; then
+ if [ "$verbose" = 1 ]; then
+ [ "$notify" = 1 ] && notify-send "${0##*/}" "Error: could not fetch $doi"
+ (echo "Error: could not fetch $doi PDF from $pdfurl" >&2)
+ fi
+ browser_open "$shurl"
+ else
+ echo "saved to $file"
+ if [ ! -f "$file" ] || [ "$(file "$file" | cut -d' ' -f2)" = "HTML" ]; then
+ (echo "Error: downloaded file $file is not valid. Opening browser..." >&2)
+ [ -f "$file" ] && rm "$file"
+ browser_open "$shurl"
+ elif [ "$open" = 1 ]; then
+ xdg-open "$file"
+ fi
+ fi
+}
+
+handle_doi() {
+ if [ "$browser" = 1 ]; then
+ shurl="$shhost/$1"
+ browser_open "$shurl"
+ else
+ shdl_fetch "$1"
+ fi
+ if [ "$reference" = 1 ]; then
+ if command -v getref >/dev/null 2>&1; then
+ if [ "$verbose" = 1 ]; then
+ echo "adding $1 to bibliography using getref"
+ fi
+ getref "$1" >> "$BIB"
+ else
+ die 'Error: getref not found in PATH'
+ fi
+ fi
+}
+
+die() {
+ printf '%s\n' "$1" >&2
+ exit 1
+}
+
+returnstatus=0
+prefix=""
+verbose=0
+browser=0
+reference=0
+notify=0
+open=0
+
+while :; do
+ case "$1" in
+ -h|-\?|--help)
+ show_help
+ exit 0
+ ;;
+ -v|--version)
+ show_version
+ exit 0
+ ;;
+ -V|--verbose)
+ verbose=1
+ ;;
+ -b|--browser)
+ browser=1
+ ;;
+ -t|--tor-socks)
+ prefix="torsocks"
+ ;;
+ -r|--reference)
+ reference=1
+ ;;
+ -n|--notify)
+ notify=1
+ ;;
+ -o|--open)
+ open=1
+ ;;
+ --) # end all options
+ shift
+ break
+ ;;
+ -?*)
+ die 'Error: Unknown option specified'
+ ;;
+ *) # No more options
+ break
+ esac
+ shift
+done
+
+if [ $# -lt 1 ]; then
+ doi="$(cat)"
+ handle_doi "$doi"
+ exit 0
+else
+ for doi in "$@"; do
+ handle_doi "$doi"
+ [ "$notify" = 1 ] && notify-send "${0##*/}" "$doi complete"
+ done
+fi
+exit "$returnstatus"