checklinks.sh - www.codemadness.org - www.codemadness.org saait content files
(HTM) git clone git://git.codemadness.org/www.codemadness.org
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) README
(DIR) LICENSE
---
checklinks.sh (440B)
---
1 #!/bin/sh
2 # list unique links.
3
4 for h in output/*.html; do
5 # dup fd 3 to stdout, close fd 1: do not print normal output.
6 webdump -x -b "http://codemadness.org/" < "$h" 3>&1 >&-
7 done | \
8 awk -F '\t' '
9 # ignore these links.
10 $2 ~ /codemadness\.(org|nl)/ ||
11 $2 ~ /youtube\.com/ ||
12 $2 ~ /man\.openbsd.org/ {
13 next;
14 }
15 {
16 link = $2;
17 gsub("#.*$", "", link); # remove fragment.
18
19 if (!links[link]) {
20 print $1 "\t" $2;
21 links[link] = 1;
22 }
23 }'