Add archive.org URI retrieval. Add first support for URI shortening. - annna - Annna the nice friendly bot.
(HTM) git clone git://bitreich.org/annna/ git://enlrupgkhuxnvlhsf6lc3fziv5h2hhfrinws65d7roiv6bfj7d652fid.onion/annna/
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) Tags
(DIR) README
---
(DIR) commit b9b579a607ee73661922628578156f44e2fc23da
(DIR) parent a8df83437f63bfe2a68eafedcdb86502927aafe8
(HTM) Author: Annna Robert-Houdin <annna@bitreich.org>
Date: Sun, 12 Jan 2025 20:56:28 +0100
Add archive.org URI retrieval. Add first support for URI shortening.
Diffstat:
M annna-message-common | 25 +++++++++++++++----------
A archiveorg-uri | 32 +++++++++++++++++++++++++++++++
M fetch-uri | 14 --------------
M modules/tor/whitelist-from-tor | 1 +
4 files changed, 48 insertions(+), 24 deletions(-)
---
(DIR) diff --git a/annna-message-common b/annna-message-common
@@ -83,6 +83,8 @@ case "${text}" in
esac
# Set below and annna will concatenate at the end.
+ # HTML title.
+ urititle=""
# Subtitle URI.
sturi=""
# Replacement URI.
@@ -113,7 +115,17 @@ case "${text}" in
tmpf=$(mktemp)
fetch-uri "${uri}" > "${tmpf}"
- urititle="$(grabtitle < "${tmpf}")"
+ if [ ! -s "${tmpf}" ];
+ then
+ archiveorguri="$(archiveorg-uri "${uri}")"
+ if [ -n "${archiveorguri}" ];
+ then
+ sarchiveorguri="$(bitreich-uri-shortener "${archiveorguri}")"
+ nuris="archive: ${sarchiveorguri}"
+ fetch-uri "${archiveorguri}" > "${tmpf}"
+ fi
+ fi
+ [ -s "${tmpf}" ] && urititle="$(grabtitle < "${tmpf}")"
case "${urititle}" in
"")
@@ -181,10 +193,6 @@ case "${text}" in
nuris="$nuris metadata: gophers://codemadness.org/1/idiotbox.cgi?v=$ytid"
fi
;;
- *www.wsj.com/*|*www.ft.com/*|*www.nytimes.com/*)
- archvuri=$(fetch-uri -h "https://archive.is/newest/$uri" | awk 'NR == 1 && !/302/ { exit } /^location: / { print substr($2, 1, length($2)-1) }')
- [ -n "$archvuri" ] && nuris="archive: $archvuri"
- ;;
*www.reddit.com*)
nuri="$(printf '%s\n' "${uri}" | sed "s;www.reddit.com;old.reddit.com;")"
nuris="old.reddit: ${nuri}"
@@ -267,7 +275,7 @@ case "${text}" in
*)
mimetype="$(file -b --mime-type "${tmpf}")"
case "${mimetype}" in
- text/*)
+ text/*|application/javascript)
nocuri=0
;;
esac
@@ -278,10 +286,7 @@ case "${text}" in
then
if [ $nocuri -eq 0 ];
then
- if [ -z "${curi}" ];
- then
- curi="$(html2text < "${tmpf}" | /br/bin/bitreich-paste)"
- fi
+ [ -z "${curi}" ] && curi="$(html2text < "${tmpf}" | /br/bin/bitreich-paste)"
outputstr="${outputstr} content: ${curi} ;"
fi
(DIR) diff --git a/archiveorg-uri b/archiveorg-uri
@@ -0,0 +1,32 @@
+#!/bin/sh
+
+export PATH="$HOME/bin:$PATH"
+
+usage() {
+ printf "usage: %s [-h] URI\n" "$(basename "$0")" >&2
+ exit 1
+}
+
+if [ $# -lt 1 ] || [ $# -gt 2 ]
+then
+ usage
+fi
+
+if [ $# -eq 2 ]
+then
+ [ $1 = -h ] || usage
+ opth=-I
+ shift
+fi
+
+uri="$1"
+aiapiuri="http://archive.org/wayback/available?url=${uri}"
+usetor=0
+apiai="$(fetch-uri "$aiapiuri")"
+available="$(echo "$apiai" | grep 'available')"
+if [ -n "${available}" ];
+then
+ aiuri="$(echo "$apiai" | awk -F 'url": "' '{print $3}' 2>/dev/null | awk -F '", "' '{print $1}' | awk -F '"' '{print $1}')"
+ printf "%s\n" "${aiuri}"
+fi
+
(DIR) diff --git a/fetch-uri b/fetch-uri
@@ -31,17 +31,3 @@ esac
grep -qx "$host" "/home/annna/bin/modules/tor/whitelist-from-tor" || usetor=1
curl -qgsm 5 --fail -L --max-redirs 3 -A "$ua" $opth ${usetor:+--preproxy socks5h://127.0.0.1:9050 }"$uri"
-# Taken from: https://github.com/uriel1998/muna/blob/master/muna.sh
-if [ $? -eq 22 ];
-then
- aiapiuri="http://archive.org/wayback/available?url=${uri}"
- usetor=0
- apiai="$(curl -qgsm 5 --fail -L --max-redirs 3 -A "$ua" $opth ${usetor:+--preproxy socks5h://127.0.0.1:9050 }"$aiapiuri")"
- available="$(echo "$apiai" | grep 'available')"
- if [ -n "${available}" ];
- then
- aiuri="$(echo "$apiai" | awk -F 'url": "' '{print $3}' 2>/dev/null | awk -F '", "' '{print $1}' | awk -F '"' '{print $1}')"
- curl -qgsm 5 --fail -L --max-redirs 3 -A "$ua" $opth ${usetor:+--preproxy socks5h://127.0.0.1:9050 }"$aiuri"
- fi
-fi
-
(DIR) diff --git a/modules/tor/whitelist-from-tor b/modules/tor/whitelist-from-tor
@@ -1,3 +1,4 @@
www.forgottenweapons.com
forgottenweapons.com
archive.is
+archive.org