############################## -*- Mode: sh -*- #############################
## wwwoffle-chunks -- chop a wwwoffle index into smaller chunks
## Copyright       : http://www.fsf.org/copyleft/gpl.html
## Author          : Dan Jacobson -- http://jidanni.org/
## Created On      : Aug 2001
## Last Modified By: Dan Jacobson
## Last Modified On: Thu Jun 13 09:27:47 2002
## Update Count    : 186
## Status          : seems to work it seems
###############################################################################
# The newest copy of this program is on my website

: ${wwwoffle_cache=/var/spool/wwwoffle} #if not otherwise set by the user
test -d $wwwoffle_cache||{
    echo $0: $wwwoffle_cache: invalid wwwoffle_cache directory 1>&2
    exit 24;} #actually we don't really need it below, but it's nice
DIR=$HOME/wwwoffle-chunks size=32 defaultindex=lasttime index=${1-$defaultindex} \
defaultsortstyle=alpha #was domain but I want to sort inside domain too
: ${sortstyle=$defaultsortstyle} #hmmm, but e.g. mtime&monitor
#will put &'s into the HTML below... no big deal apparently
test $# -gt 1 -o "x$1" = x--help && { cat 1>&2 <<EOF
$0: usage:

After going offline run this program to break a too-big WWWOFFLE index
into chunks to save wear and tear and waiting each time one hits the
BACK key in their favorite browser, or if they don't read their Last
Time indexes before they drop off the back of the [1] [2] [3] ... list.
Then one points their browser to $DIR/...  optional argument is what
index to get, default is "$defaultindex".  One needn't be root to run
this program.  Sorting style can be adjusted thru \$sortstyle,
default=$defaultsortstyle.
EOF
cat 1>&2 <<\EOQ
Some useful things one can also do:
In .xsession to have it start up on the aa chunk of the latest batch:
	set -- $(ls -t wwwoffle-chunks/*) #for very oldest chunk: no -t,
	#for very youngest chunk: -r
	some_browser file://$HOME/$1& #or maybe just some_browser $1&
(some of these assume that we were able to access the wwwoffle cache tree when
making the filename)
In .bashrc:
 alias lynx-last-chunk='set -- `ls -t ~/wwwoffle-chunks/*`;lynx $1'
 alias lynx-lasttime='lynx http://localhost:8080/index/lasttime?sort=alpha'
EOQ
exit 1;}
test -d $DIR || mkdir $DIR
cd $DIR || exit 22
set -e #over paranoid
indexdir=$wwwoffle_cache/$index
ts=$indexdir/.timestamp
if test -r $ts
then #we can make much more useful dates
#   set -- `ls -l --full-time $ts`;shift 5; nice_date="$1 $2 $3 $4 $5"
    set -- $(perl -we 'use POSIX qw(strftime);use File::stat;$sb = stat("'$ts'");
    printf "%x %s",$sb->mtime,strftime "%a %b %e %H:%M:%S %Y", localtime $sb->mtime;')
    #gee, perl weakling me got that working... scary.
    datestr=$1 #trying hard to order the directory no matter prevtime, etc.
    #for lynx, netscape, now ls, ls -t will give same order... that of $ts date
    shift; nice_date=$*
else #too bad
    nice_date="[`date` saved]"
    datestr=$(printf %x $(date +%s))
fi
prefix=$datestr-$index- #don't mention $index else confusing...
#but wait... need it to avoid various collisions

wwwoffle -o http://localhost:8080/index/$index?sort=$sortstyle|
sed '/^<li>&nbsp;<a href=/!d'|
split -$size - $prefix
tmp=($prefix??); shortrefs=(${tmp[@]##*-}) #??: better than *
for i in ${tmp[*]}; do mv $i $i.html; done
refs=($prefix??.html)

i=0; while test ${refs[$i]}
do
	links[$i]="<a href=\"${refs[$i]}\">${shortrefs[$i]}</a> "
	: $((i++))
done

echo making ${refs[*]}|xargs -n 1 #neater
m=0; for i in ${refs[*]}
do
	: $((m++))
	oldlink=${links[$m-1]}; links[$m-1]=[${shortrefs[$m-1]}]" "
	test "${refs[$m]}" || sorry="(current=last page)"
	#well, maybe they read from last to first...
	frag=${i##*-}; frag=${frag%.*}
	ed -s $i <<!
1i
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/strict.dtd">
<TITLE>$nice_date $frag $sortstyle WWWOFFLE chunk</TITLE></HEAD><BODY>
<H1>Chunk $i of <a href="http://localhost:8080/#indexes">WWWOFFLE
indexes</a>, sorted by $sortstyle $nice_date</H1><ol>
.
\$a
</ol><p>
<a href=".">(up)</a> ${links[@]}
${refs[$m]+ . . . . . . Next:} <a href="${refs[$m]}">${shortrefs[$m]}</a>$sorry
</p></BODY></HTML>
.
w
q
!
links[$m-1]=$oldlink
#I wanted to use align=center, but I also want the strict DTD...

#with a mouse it feels better when "Next" is near the middle of the
#page.  With lynx, it being at the beginning of line is better, or hit ^E


#Dan Mercer, comp.unix.shell: You can also use:  : "${a[@]}"; echo $_
#I should add the standard footer WWWOFFLE links, but wait, I'm trying
#to correct a size & slowness problem, so i should make this as clean
#as possible.

#because WWWOFFLE 2.6a at least produces illegal html with raw & in URLs
#tidy -q $i | tr -d \\240 > /tmp/WWW$$; mv /tmp/WWW$$ $i
#also tidy causes \240 where nbsp was, this eats the next char, at
#least with lynx... 

test -r $ts && touch -r $ts $i
done
echo
echo OK, now point your browser at $DIR/$refs
#and after a while clean up the old *-chunk-* files by hand
