tshdl - scholarref - tools for DOI and BiBTeX reference extraction, fetching, and parsing
(HTM) git clone git://src.adamsgaard.dk/scholarref
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) README
(DIR) LICENSE
---
tshdl (5073B)
---
1 #!/bin/sh
2 set -e
3
4 version=1.0
5 shhost="http://sci-hub.st"
6
7 show_help() {
8 echo "usage: ${0##*/} [OPTIONS] [DOIs ...]"
9 echo "will fetch each DOI from $shhost"
10 echo "If no DOI is specified, this program will expect DOIs as stdin."
11 echo "The program extracts the DOI if a full doi.org URL is passed to it."
12 echo "If the PDF download fails, e.g. due to captias, a tor-browser or "
13 echo "alternatively the system default browser will open sci-hub."
14 echo
15 echo "OPTIONS are one or more of the following:"
16 echo " -h, --help show this message"
17 echo " -v, --version show version and license information"
18 echo " -V, --verbose show verbose information"
19 echo " -t, --tor-socks use torsocks for requests to sci-hub"
20 echo " -b, --browser open sci-hub in browser immediately"
21 echo " -r, --reference add reference to bibliography using scholarref"
22 echo " -n, --notify send desktop notification when complete"
23 echo " -o, --open open downloaded document when complete"
24 echo " -- do not consider any following args as options"
25 }
26
27 show_version() {
28 echo "${0##*/} version $version"
29 echo "Licensed under the ISC License"
30 echo "written by Anders Damsgaard, anders@adamsgaard.dk"
31 echo "https://gitlab.com/admesg/dotfiles"
32 }
33
34 regexmatch() {
35 printf '%s' "$1" | grep -qE "$2"
36 }
37
38 browser_open() {
39 echo "$1"
40 if command -v tor-browser 2>/dev/null; then
41 [ "$verbose" = 1 ] && echo "attempting to launch tor-browser"
42 tor-browser "$1"
43 elif command -v xdg-open 2>/dev/null; then
44 [ "$verbose" = 1 ] && echo "launching default browser"
45 xdg-open "$1"
46 elif command -v open 2>/dev/null; then
47 [ "$verbose" = 1 ] && echo "launching default browser"
48 open "$1"
49 else
50 die 'Error: could not open a browser'
51 fi
52 }
53
54 shdl_fetch() {
55 shurl="$shhost/$doi"
56 [ "$verbose" = 1 ] && echo "processing $doi"
57 [ "$verbose" = 1 ] && echo "connecting to $shurl $prefix"
58 pdfurl="$($prefix curl --connect-timeout 30 --silent "$shurl" | \
59 grep location.href | grep -o '//.*pdf' | sed 's/^/http:/')"
60 [ "$verbose" = 1 ] && echo "fetching pdf from $pdfurl"
61 if ! file="$($prefix curl --connect-timeout 30 \
62 --write-out "filename: %{filename_effective}\n" -O -L -J "$pdfurl" |\
63 grep 'filename: ' | cut -d' ' -f2)"; then
64 if [ "$verbose" = 1 ]; then
65 [ "$notify" = 1 ] && notify-send "${0##*/}" "Error: could not fetch $doi"
66 (echo "Error: could not fetch $doi PDF from $pdfurl" >&2)
67 fi
68 browser_open "$shurl"
69 else
70 echo "saved to $file"
71 if [ ! -f "$file" ] || [ "$(file "$file" | cut -d' ' -f2)" = "HTML" ]; then
72 (echo "Error: downloaded file $file is not valid. Opening browser..." >&2)
73 [ -f "$file" ] && rm "$file"
74 browser_open "$shurl"
75 elif [ "$open" = 1 ]; then
76 xdg-open "$file"
77 fi
78 fi
79 }
80
81 handle_doi() {
82 # direct download of The Cryosphere pdfs
83 if regexmatch "$1" '10.5194/tc-'; then
84 year="$(printf '%s' "$1" | sed 's/.*-\([12][0-9][0-9][0-9]\)$/\1/')"
85 volume="$(printf '%s' "$1" | sed 's,.*/tc-\([0-9][0-9]*\)-.*,\1,')"
86 page="$(printf '%s' "$1" | sed 's,.*/tc-[0-9][0-9]*-\([0-9][0-9]*\).*,\1,')"
87 curl -O "https://tc.copernicus.org/articles/${volume}/${page}/${year}/tc-${volume}-${page}-${year}.pdf"
88 return
89 fi
90 if [ "$browser" = 1 ]; then
91 shurl="$shhost/$1"
92 browser_open "$shurl"
93 else
94 shdl_fetch "$1"
95 fi
96 if [ "$reference" = 1 ]; then
97 if command -v getref >/dev/null 2>&1; then
98 if [ "$verbose" = 1 ]; then
99 echo "adding $1 to bibliography using getref"
100 fi
101 getref "$1" >> "$BIB"
102 else
103 die 'Error: getref not found in PATH'
104 fi
105 fi
106 }
107
108 die() {
109 printf '%s\n' "$1" >&2
110 exit 1
111 }
112
113 returnstatus=0
114 prefix=""
115 verbose=0
116 browser=0
117 reference=0
118 notify=0
119 open=0
120
121 while :; do
122 case "$1" in
123 -h|-\?|--help)
124 show_help
125 exit 0
126 ;;
127 -v|--version)
128 show_version
129 exit 0
130 ;;
131 -V|--verbose)
132 verbose=1
133 ;;
134 -b|--browser)
135 browser=1
136 ;;
137 -t|--tor-socks)
138 prefix="torsocks"
139 ;;
140 -r|--reference)
141 reference=1
142 ;;
143 -n|--notify)
144 notify=1
145 ;;
146 -o|--open)
147 open=1
148 ;;
149 --) # end all options
150 shift
151 break
152 ;;
153 -?*)
154 die 'Error: Unknown option specified'
155 ;;
156 *) # No more options
157 break
158 esac
159 shift
160 done
161
162 if [ $# -lt 1 ]; then
163 doi="$(cat)"
164 handle_doi "$doi"
165 exit 0
166 else
167 for doi in "$@"; do
168 handle_doi "$doi"
169 [ "$notify" = 1 ] && notify-send "${0##*/}" "$doi complete"
170 done
171 fi
172 exit "$returnstatus"