Fix title parsing for imdb. - annna - Annna the nice friendly bot.
(HTM) git clone git://bitreich.org/annna/ git://enlrupgkhuxnvlhsf6lc3fziv5h2hhfrinws65d7roiv6bfj7d652fid.onion/annna/
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) Tags
(DIR) README
---
(DIR) commit 0925485876164dc16cd6dd149ec600ba690b117a
(DIR) parent 81fd3449a3013908f44bfad761da51b99430101a
(HTM) Author: Annna Robert-Houdin <annna@bitreich.org>
Date: Sun, 5 Dec 2021 19:56:33 +0100
Fix title parsing for imdb.
Thanks Bob!
Diffstat:
M imdb2gopherbay | 23 +++++++++++++++++------
1 file changed, 17 insertions(+), 6 deletions(-)
---
(DIR) diff --git a/imdb2gopherbay b/imdb2gopherbay
@@ -6,16 +6,27 @@ then
exit 1
fi
+extractjson() {
+awk '
+/<script id="__NEXT_DATA__"/ {
+ match($0, "<script id=\"__NEXT_DATA__\"[^>]*>");
+ s = substr($0, RSTART + RLENGTH);OB
+ match(s, "</script>");
+ s = substr(s, 1, RSTART - 1);
+ print s;
+}'
+}
+
imdburi="$1"
title="$(curl -s "${imdburi}" \
- | xml2tsv 2>/dev/null \
- | grep __NEXT_ \
- | cut -f 4- \
- | sed 's,\\\\,\\,g' \
- | jshon -e head -e 9 -e 1 -e children -u \
- | sed 's, - IMDb,,')"
+ | extractjson \
+ | json2tsv \
+ | grep associatedTitle.originalTitleText.text \
+ | head -n 1 \
+ | cut -f 3)"
[ -z "${title}" ] && exit 1
printf "%s\n" "${title}"
+