# need to add bang line, for bash ! # gopher server won't serve this file in its original state #!/usr/pkg/bin/bash # for extracting individual postings as single files # from input stream of bboard archive searches # captured with tee (or otherwise) # files are written to boardname directories under the working directory # debug=0; # uncomment for debug information echos # these didn't help [me]; couldn't get caseless matching to work with 'expr' matching #shopt -s nocaseglob #required for caseless matching #shopt -s nocasematch tempfile="/tmp/${LOGNAME}_tmp" #write to /tmp [ -f $tempfile ] && rm $tempfile theend='\(^<.*-0[0-9]>\)' #defines end of post thestart=TACKER.* #defines beginning of post thestart2=Tacker.* #defines beginning of post thesubject="SUBJECT: " thesubject2="Subject: " # boolean functions, to provide status of the current line, whether at beginning or end atstart () { expr "$theline" : ".*$thestart" >/dev/null \ || expr "$theline" : ".*$thestart2" >/dev/null } atend () { expr "$theline" : "$theend" >/dev/null } while ( atstart || read theline ) do # read a new line if we're not already at the start of a new post # echo + $theline until atstart # until we find the start of a new post do read theline if [ $? -ne 0 ]; then echo "Done!"; exit 0; fi # exit at EOF [ $debug ] && echo - $theline done theline=`expr "$theline" : ".*\($thestart\)"` # trim leading dots from $thestart line # echo $theline # now we have a start; look for end # if we find another start before we find an end, discard this post, and restart # this condition occurs with duplicated posts (which don't have an end delimiter) # enter with the startline .... # if [ -f $tempfile ]; then rm $tempfile; fi # echo $theline >> $tempfile echo $theline >$tempfile read theline subject=${theline#$thesubject} || subject=${theline#$thesubject2} [ $debug ] && echo "S: $subject" # now we don't enter the loop with atstart condition/line # and we must test for discovery of atstart before finding atend until ( atend || atstart ) do \ echo $theline >> $tempfile read theline if [ $? -ne 0 ]; then echo "Done!"; exit 0; fi # exit at EOF [ $debug ] && echo . $theline done # now we have either the end of the post, or the start of a new one if atstart then rm "$tempfile" echo "!restart $subject" # subject="" elif atend then # only now do we have the basis of a destination file dir/name # all this substring manipulation could probably be improved upon: destfile=`expr "$theline" : $theend` destfile=${destfile#\<} destfile=${destfile%\>} echo "$destfile" >> $tempfile; echo >> $tempfile destdir=`expr "$destfile" : '\(.*\.\)'` destdir=${destdir%\.} if [ ! -d "${destdir}" ]; then mkdir "${destdir}"; fi destfile=${destfile#${destdir}.} destfile="${destdir}/${destfile}_${subject}" echo "$destfile" # there can be multiple posts to common filename as derived cat $tempfile >> "${destfile}" fi done exit 0