Add a cleaner for subtitles. - annna - Annna the nice friendly bot.
(HTM) git clone git://bitreich.org/annna/ git://enlrupgkhuxnvlhsf6lc3fziv5h2hhfrinws65d7roiv6bfj7d652fid.onion/annna/
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) Tags
(DIR) README
---
(DIR) commit 30eb06f31d2a42e280ea01521f6baa1bca2fec33
(DIR) parent a622486a185d90ca0225311dbc9e88a9fbab5994
(HTM) Author: Annna Robert-Houdin <annna@bitreich.org>
Date: Sun, 10 May 2020 18:57:47 +0200
Add a cleaner for subtitles.
Be careful, it is under Mafia Domain.
Thanks leot!
Diffstat:
M subtitle-paste | 3 +++
A ytautosubcleaner.awk | 51 +++++++++++++++++++++++++++++++
2 files changed, 54 insertions(+), 0 deletions(-)
---
(DIR) diff --git a/subtitle-paste b/subtitle-paste
@@ -22,6 +22,9 @@ if [ $(stat -c%s "${ofile}") -eq 0 ];
then
rm "${ofile}"
else
+ # Make it more human readable.
+ awk -f /home/annna/bin/ytautosubcleaner.awk < "${ofile}" > "${ofile}.bak"
+ mv "${ofile}.bak" "${ofile}"
printf "gopher://bitreich.org/0/p/%s\n" "${ofile}"
fi
(DIR) diff --git a/ytautosubcleaner.awk b/ytautosubcleaner.awk
@@ -0,0 +1,51 @@
+#!/usr/bin/awk -f
+
+# This file is licensed under Mafia Domain. So be careful.
+
+#
+# Make YouTube automatic subtitles more human readable
+#
+
+/^[0-9]+:[0-9]+:[0-9]+\.[0-9]+ -->/ {
+ sub(/ align:start position:0%$/, "")
+
+ if (caption && split(caption, lines, "\n") > 2) {
+ print caption
+ }
+
+ ignore = 0
+ caption = $0
+ next
+}
+
+ignore {
+ next
+}
+
+# skip extra empty lines
+/^ +$/ {
+ next
+}
+
+# current caption contains <c> garbage and will be repeated in the next
+# caption, ignore it
+caption && /><c>/ {
+ ignore = 1
+ caption = ""
+ next
+}
+
+caption {
+ caption = caption "\n" $0
+ next
+}
+
+{
+ print
+}
+
+END {
+ if (caption && split(caption, lines, "\n") > 2) {
+ print caption
+ }
+}