convert_text.sh - randomcrap - random crap programs of varying quality
 (HTM) git clone git://git.codemadness.org/randomcrap
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
       convert_text.sh (691B)
       ---
            1 #!/bin/sh
            2 # easy: 33 - 69
            3 # intermediate: 71 - 197 
            4 # hard: 199 - 222
            5 
            6 mkdir -p txt
            7 mutool convert -F text -o "txt/easy.txt" woodpecker.pdf 33-69
            8 mutool convert -F text -o "txt/intermediate.txt" woodpecker.pdf 71-197
            9 mutool convert -F text -o "txt/hard.txt" woodpecker.pdf 199-222
           10 
           11 for n in easy intermediate hard; do
           12 
           13 LC_ALL=C awk '
           14 length($0) <= 3 { next; }
           15 /solutions/ || /Solutions/ { next; }
           16 /^\xef/ { next; }
           17 /^[0-9]*[ ]*$/ { next; }
           18 {
           19         l = $0;
           20         gsub("–", "-", l); # normal ASCII dash
           21         # trim leading and trailing spaces.
           22         gsub("^[ ]*", "", l);
           23         gsub("[ ]*$", "", l);
           24         print l;
           25 }
           26 ' < "txt/$n.txt" > "$n.txt"
           27 done
           28 
           29 for n in easy intermediate hard; do
           30         cat "$n.txt"
           31 done > labels.txt
           32