url fetch improvements - annna - Annna the nice friendly bot.
(HTM) git clone git://bitreich.org/annna/ git://enlrupgkhuxnvlhsf6lc3fziv5h2hhfrinws65d7roiv6bfj7d652fid.onion/annna/
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) Tags
(DIR) README
---
(DIR) commit 38e087783a9d8a080204b5c7d4d37cf088531a67
(DIR) parent 0d34d3fa3e153cfe4996b76b30db1f2ce401b189
(HTM) Author: Annna Robert-Houdin <annna@bitreich.org>
Date: Sun, 9 Dec 2018 12:54:37 +0100
url fetch improvements
Separate url fetching in a separate function so it can be modified in
one place.
The title and content fetching used to use 2 different ways to fetch.
Use the new function fetch-url and do it in one request (via Tor).
Remove the control-character trimming in grabtitle, it is now done in
the binary.
Diffstat:
M annna-start-services | 10 +++++-----
A curl-grabtitle | 9 +++++++++
A fetch-url | 14 ++++++++++++++
3 files changed, 28 insertions(+), 5 deletions(-)
---
(DIR) diff --git a/annna-start-services b/annna-start-services
@@ -89,10 +89,9 @@ then
*)
if [ -n "$uri" ];
then
- urititle="$(curl-grabtitle "${uri}" \
- | tr '[:cntrl:]' ' ' \
- | sed 's@^ *@@' \
- | cut -c -200)"
+ tmpf=$(mktemp)
+ fetch-url "${uri}" > "${tmpf}"
+ urititle="$(grabtitle < "${tmpf}" | sed 's@^ *@@' | cut -c -200)"
if [ -n "$urititle" ];
then
case "${urititle}" in
@@ -107,12 +106,13 @@ then
then
annna-say -c "#bitreich-en" "HTML title: ${urititle}"
else
- purl="$(curl -sL "${uri}" | 9 htmlfmt | /br/bin/bitreich-paste)"
+ purl="$(9 htmlfmt < "${tmpf}" | /br/bin/bitreich-paste)"
annna-say -c "#bitreich-en" "content: ${purl} ; HTML title: ${urititle}"
fi
;;
esac
fi
+ rm -f "${tmpf}"
continue
fi
;;
(DIR) diff --git a/curl-grabtitle b/curl-grabtitle
@@ -0,0 +1,9 @@
+#!/bin/sh
+export PATH="$HOME/bin:$PATH"
+
+if test x"$1" = x""; then
+ echo "usage: $0 <url>" >&2
+ exit 1
+fi
+
+fetch-url "$1" | grabtitle
(DIR) diff --git a/fetch-url b/fetch-url
@@ -0,0 +1,14 @@
+#!/bin/sh
+
+if test x"$1" = x""; then
+ echo "usage: $0 <url>" >&2
+ exit 1
+fi
+
+curl \
+ --preproxy socks5://127.0.0.1:9100 \
+ -s \
+ -L --max-redirs 3 \
+ -m 5 \
+ -H 'User-Agent:' \
+ "$1" 2>/dev/null