checklinks.sh - www.codemadness.org - www.codemadness.org saait content files
 (HTM) git clone git://git.codemadness.org/www.codemadness.org
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
       checklinks.sh (440B)
       ---
            1 #!/bin/sh
            2 # list unique links.
            3 
            4 for h in output/*.html; do
            5         # dup fd 3 to stdout, close fd 1: do not print normal output.
            6         webdump -x -b "http://codemadness.org/" < "$h" 3>&1 >&-
            7 done | \
            8 awk -F '\t' '
            9 # ignore these links.
           10 $2 ~ /codemadness\.(org|nl)/ ||
           11 $2 ~ /youtube\.com/ ||
           12 $2 ~ /man\.openbsd.org/ {
           13         next;
           14 }
           15 {
           16         link = $2;
           17         gsub("#.*$", "", link); # remove fragment.
           18         
           19         if (!links[link]) {
           20                 print $1 "\t" $2;
           21                 links[link] = 1;
           22         }
           23 }'