program indexer; {$c-,e+,f-,i-,j-,m-,p+,r+,s+,t-,u+ } {-------------------------------------------------------------} { } { INDEX CREATION FROM THE KEYBOARD } { } { David E. Cortesi, 2340 Tasso St., Palo Alto CA 94301. } { (compuserve 72155,450) } { } { Accepts index entries for a book from the keyboard, sorts } { the entries and sub-entries, collates page references, } { and creates an ASCII file that can be printed or edited. } { } { Term Recall is an unusual feature of the user interaction. } { If, when entering an index term, the user hits the ESC key, } { the program will find the least term that matches the input } { to that point and fill in its characters on the input line. } { Hitting ESC again retracts those letters and displays the } { letters of the next-higher matching term. This can save } { considerable typing -- a long term can be entered as only } { a couple of letters plus ESC -- and it allows the user to } { review the terms entered to that point in alpha order. } { } { Creates files INDEXER.OUT, the index-document file, and } { INDEXER.TRE, an internal record of the tree which will be } { reloaded on the next run if it then exists. } {-------------------------------------------------------------} const nullch = 0; { the null, end-of-string } strmax = 65; { max size of a string (64,00h)} sbufsize = 2046; { page size of a string buffer } sbufnum = 16; { allow up to 32K of buffers } maxdepth = 20; { stack size for tree-walks } asciibel = 7; { names for ascii characters } asciibs = 8; asciilf = 10; asciicr = 13; asciiesc = 27; asciiblank = 32; asciidel = 127; type strindex = 1..strmax; { indices over strings } strlength= 0..strmax; { lengths of strings } relation = (less,equal,more); { result of comparisons } nchar = 0..255; { numeric characters are bytes } str = record { an independent string is } len : strlength; { ..a length and some bytes, } val : array[strindex] of nchar { ending in 00h } end; strbuff = record { a string buffer is a compact } free : 0..sbufsize; { collection of strings. } data : array[1..sbufsize] of nchar end; stref = record { an indirect string is the } nb : 1..sbufnum; { index of an strbuff's address} bo : 1..sbufsize { and an index into it. } end; page = record { a page on which a term is } next : ^page; { ..referenced, and ^next one } num : integer end; ppage = ^page; node = record { one node of a binary tree } lson, rson, { descendant trees } subt : ^node; { subtree of sub-terms } iref, uref : stref; { original and uppercase terms } phead : ppage; { head of chain of page-refs } skip : boolean; { phony node "M" starts a tree } end; pnode = ^node; treewalk = record { current state of an inorder } current : pnode; { ..walk of a tree: this node, } top : 0..maxdepth; { stack-top pointer, stacked } stack : array[1..maxdepth] of pnode;{ nodes, mark } goneleft : boolean { true when backing out of leaf} end; var sbufptrs : array[1..sbufnum] of ^strbuff; { blocks of bytes} sbufcnt : 0..sbufnum; { how many blocks are active } maintree : pnode; { root of the term-tree } initerm : str; { "M" term for starting trees } indlevel : 0..9; { subterm nesting (indent) lev.} outfile : text; { the output document } {-------------------------------------------------------------} { routines operating on independent strings } { Pascal/Z string type was avoided to maximize portability. } {-------------------------------------------------------------} function upcase(c:nchar) : nchar; { force character to uppercase } begin if (c>=ord('a')) and (c<=ord('z')) then upcase := c-32 else upcase := c end; procedure stucase(var a,b:str); { duplicate a string, forcing uppercase } var j : strlength; c : nchar; begin j := 0; repeat j := j+1; c := a.val[j]; b.val[j] := upcase(c); until c=nullch; b.len := j-1 end; {-------------------------------------------------------------} { routines operating on stored strings } { To keep all stored terms in string form (P/Z or our version)} { would use far too much storage. Here we pack strings into } { large blocks. The blocks are allocated as needed, to a max } { of 32K -- limit enforced by compiler range checking. } {-------------------------------------------------------------} procedure stput(var a:str; var b:stref); { stow string a in latest buffer, return indirect reference} var bp : ^strbuff; j : strindex; k : 1..sbufsize; begin bp := sbufptrs[sbufcnt]; { ^latest string buffer } if bp^.free<(a.len+1) then begin { not enough room! } new(bp); { make, count new buffer page } sbufcnt := sbufcnt+1; { range error here when full } sbufptrs[sbufcnt] := bp; bp^.free := sbufsize end; b.nb := sbufcnt; { save buffer-page number } j := 1; k := 1+sbufsize-bp^.free; b.bo := k; { save buffer-page offset } while j <= a.len do begin bp^.data[k] := a.val[j]; j := j+1; k := k+1 end; bp^.data[k] := nullch; { mark end of stored string } bp^.free := sbufsize-k { adjust bytes left in block } end; procedure stget(var b:stref; var a:str); { retrieve stored string from buffer into string-record } var bp : ^strbuff; j : strindex; k : 1..sbufsize; c : nchar; begin bp := sbufptrs[b.nb]; { point to the buffer page } k := b.bo; { ..and offset into it } j := 1; repeat { copy the stored string out } c := bp^.data[k]; a.val[j] := c; j := j+1; k := k+1; until (c=nullch); a.len := j-2 end; function sbcomp(var a:str; var b:stref) : relation; { EXACT comparison of a string to a stored string value -- if "a" is initially equal but shorter, it is "less." } var bp : ^strbuff; j : strindex; k : 1..sbufsize; x,y : nchar; r : relation; begin bp := sbufptrs[b.nb]; k := b.bo; j := 1; repeat x := a.val[j]; y := bp^.data[k]; j := j+1; k := k+1 until (x<>y) or (x=nullch); if x=y then r := equal else if xy) or (x=nullch); if (x=y) or (x=nullch) then r := equal else if xequal then if r=less then q := p^.lson else q := p^.rson else q := p; o := p; p := q until (r=equal) or (p=nil); if r=equal then insert := p else begin { term doesn't exist in the tree } q := makenode(a,ua); if r=less then o^.lson := q else o^.rson := q; insert := q end; end; {-------------------------------------------------------------} { routines for tree-walking. These routines abstract the } { idea of an in-order tour of the tree into a single record. } { The usual algorithm for a walk is recursive (see J&W 11.5), } { which is not convenient for this program. } {-------------------------------------------------------------} procedure initwalk(t:pnode; var w:treewalk); { initialize for a walk over the given tree } begin w.current := t; { start at the top node, } w.goneleft := false; { ..but descend left first off } w.top := 0 { stack is empty } end; procedure push(pn: pnode; var w: treewalk); { push a given node onto the walk-stack } begin if w.top0 then begin pop := w.stack[w.top]; w.top := w.top-1 end else pop := nil end; function treestep(var w:treewalk) : pnode; { step to the next node in lexical order in a tree. return that node as result, and save it in the walk record as "current." Return nil if end of tree. } var t : pnode; begin t := w.current; repeat if not w.goneleft then begin { descend to the left } if t<> nil then while t^.lson<>nil do begin push(t,w); t := t^.lson end; w.goneleft := true { t^ a left-leaf of tree } end else { been down; have handled current; go up/right} if t<> nil then if t^.rson <> nil then begin t := t^.rson; { jog right, then } w.goneleft := false { drop down again } end else { nowhere to go but up } t := pop(w) until w.goneleft; { repeats when we jog right } w.current := t; treestep := t end; function setscan(tree: pnode; var w: treewalk; var a: str) : pnode; { given a partial term "a," a tree "tree," and a tree- walk record "w," set up w so that a series of calls on function treestep will return all the nodes that are initially equal to a in ascending order. If there are none such, return nil. This function sets up for Term Recall when the escape key is pressed during input. The algorithm is to find the matching term that is highest in the tree, then use treestep to find the lexically-least node under that term (which may not be a match) and then to treestep to the first match.} var ua : str; p,t : pnode; r : relation; quit : boolean; begin stucase(a,ua); initwalk(tree,w); t := tree; if t=nil then setscan := nil { no matches possible } else begin { step 1 is to find any part-equal node at all } quit := false; repeat r := sxcomp(ua,t^.uref); case r of less : if t^.lson<>nil then t := t^.lson else quit := true; more : if t^.rson<>nil then t := t^.rson else quit := true; equal : quit := true end until quit; { If we have a match, it may not be the least one. If this node has a left-son, there can be lesser matches (and nonmatches) down that branch. } if r<>equal then setscan := nil { no match a-tall } else begin w.current := t; if t^.lson=nil then w.goneleft := true else begin { zoom down in tree } w.goneleft := false; repeat t := treestep(w); r := sxcomp(ua,t^.uref) until r=equal end; setscan := t end end end; {-------------------------------------------------------------} { routines for phase 1 -- input } {-------------------------------------------------------------} procedure indent; { indent the cursor for the current nesting level } var i : 0..9; begin for i := 1 to indlevel do write('. . ') end; function readnc : nchar; { get one byte from the keyboard, bypassing the usual pascal procedures and going straight to CP/M } const bdos=5; inchar=1; asciicr=13; asciilf=10; type regs = record a : 0..255; bc,de,hl : integer end; var r : regs; procedure call(var x:regs; addr:integer); external; begin r.bc := inchar; call(r,bdos); readnc := r.a end; procedure getterm(tree: pnode; var a:str; var cont: boolean); { get a term from the user, with control keys used thus: cr : end the term. lf : end the term, begin a subterm of it. esc: try to complete the term with the next (first) matching term from the present tree-context. del: cancel esc-completion, return to original entry. } var c : nchar; j, oj : strindex; k : strlength; x,ua : str; quit : boolean; tw : treewalk; p : pnode; procedure backup; { backup the screen and the "a" string to the original term that was entered. } var qj : strindex; begin for qj := j downto (oj+1) do write(chr(asciibs),chr(asciiblank),chr(asciibs)); j := oj; a.val[j] := nullch end; procedure startscan; { set up for an alphabetical scan over all terms that are an initial match to user entry thus far. Setscan does most of the work. } begin stucase(a,ua); { for stepscan's benefit } p := setscan(tree,tw,a); if p<>nil then { phony node only if a.len=0 } if p^.skip then p := treestep(tw); if p<>nil then begin { this node has to be equal } stget(p^.iref,x); k := x.len+1 end else k := 0 end; procedure stepscan; { find the next match to the original string, leaving its value in x, or k=0 if there is none. } begin k := 0; p := treestep(tw); if p<>nil then if p^.skip then p := treestep(tw); if p<>nil then if equal=sxcomp(ua,p^.uref) then begin stget(p^.iref,x); k := x.len+1 end end; begin { the main Get Term procedure } indent; write('term: '); j := 1; oj := j; { no data in the a-string } k := 0; { no esc-scan working } quit := false; { not finished yet (hardly!) } repeat a.val[j] := nullch; { keep "a" a finished string } a.len := j-1; { ..at all times } c := readnc; case c of asciibs : { destructive backspace } if j>1 then begin write(chr(asciiblank),chr(asciibs)); j := j-1; oj := j; { the current scan is accepted } k := 0; { ..and no scan is underway } end; asciicr : { normal completion } begin write(chr(asciilf)); quit := true end; asciilf : { complete, move on to subterm } begin write(chr(asciicr)); quit := true end; asciiesc : { automatic scan for match } begin backup; { wipe rejected match if any } if k=0 then startscan else stepscan; if k=0 then { no (further) match found } write(chr(asciibel)) else { next (first?) match found } while j it } begin if pg>p1^.num then makepage(p1^.next,pg); goto 103 end; 102: {p1^.num <= pg p1^.num then begin makepage(p3,pg); p3^.next := p2; p1^.next := p3 end end; 103: ; end end; procedure load(var atree:pnode); { input control: load terms into a tree from the keyboard. the code is recursive; if the user wants to do a subterm this routine calls itself to load the sub-tree of the superior term's node. A page number of zero is a disaster when we reload the saved tree, so one is converted to -1.} var aterm : str; anode : pnode; apage : integer; cont : boolean; begin repeat getterm(atree,aterm,cont); if aterm.len>0 then begin anode := insert(atree,aterm); if not cont then begin getpage(apage); if apage=0 then apage := 32767; addpage(anode,apage) end else begin { user hit lf, wants to recurse } if anode^.subt=nil then startree(anode^.subt); indlevel := indlevel+1; load(anode^.subt); indlevel := indlevel-1 end end; until (aterm.len=0) or (indlevel>0) end; {-------------------------------------------------------------} { routines for phase 2 -- output } {-------------------------------------------------------------} procedure filenode(np: pnode; var oc: nchar); { write one node's contents, term + pages, to the output. It is at this level that we insert a blank line on a break in the sequence of main-term initial letters. Once more, a loop over an ordered chain is cleaner with Goto. } label 99; var a : str; p : ppage; i : 0..9; j : strindex; k1, k2 : integer; ic : nchar; begin if not np^.skip then begin { ignore phony nodes } stget(np^.iref,a); ic := upcase(a.val[1]); if (indlevel=0) and { main-term initial change? } (oc<>ic) then writeln(outfile); oc := ic; for i := 1 to indlevel do write(outfile,' '); for j := 1 to a.len do write(outfile,chr(a.val[j])); p := np^.phead; while p<>nil do begin write(outfile,' '); k1 := p^.num; k2 := k1+1; 99:p := p^.next; { elide sequential numbers } if p<>nil then if p^.num=k2 then begin k2 := k2+1; goto 99 end; write(outfile,k1:1); { write "17" or "17-19" } if (k1+1)nil then write(outfile,','); end; writeln(outfile); end end; procedure filetree(intree: pnode); { walk through a (sub-) tree and write each node } var tree : pnode; tw : treewalk; oc : nchar; begin oc := nullch; initwalk(intree,tw); tree := treestep(tw); while tree<>nil do begin filenode(tree,oc); if tree^.subt<>nil then begin indlevel := indlevel+1; filetree(tree^.subt); indlevel := indlevel-1 end; tree := treestep(tw) end end; procedure dump; begin rewrite('INDEXER.OUT',outfile); filetree(maintree) end; {-------------------------------------------------------------} { routines for phase 0 -- initialization } {-------------------------------------------------------------} procedure init; { initialize the various mechanisms } begin indlevel := 0; new (sbufptrs[1]); sbufcnt := 1; sbufptrs[1]^.free := sbufsize; initerm.val[1] := ord('M'); initerm.val[2] := nullch; initerm.len := 1; startree(maintree); end; procedure loadall; { if a saved-tree file INDEXER.TRE exists, load its values into the tree. } var loadtree : file of nchar; x : str; j : strindex; p : pnode; k : integer; k1,k2 : 0..255; procedure reload(t:pnode); { reload one (sub-)tree from the saved-tree file } { the recorded form of one node of a tree is: termlength (1..strmax-1), that many term bytes in reverse order, page numbers as high byte, low byte, page number of (zero,zero). the file is a sequence of terms as above. a tree ends with a byte of zero. a sub-tree is introduced with a byte of strmax. } begin {$r- range checks off during byte i/o } read(loadtree,j); while j<>nullch do begin x.len := j; for j := j downto 1 do read(loadtree,x.val[j]); x.val[x.len+1] := nullch; p := insert(t,x); repeat read(loadtree,k1,k2); k := (k1*256)+k2; if k<>0 then addpage(p,k) until k=0; read(loadtree,j); if j=strmax then begin { a sub-tree } startree(p^.subt); reload(p^.subt); read(loadtree,j) end end end; {$r+ } begin reset('INDEXER.TRE',loadtree); if not eof(loadtree) then reload(maintree) end; {-------------------------------------------------------------} { routines for phase 3 -- termination } {-------------------------------------------------------------} procedure saveall; { save the term-tree in the file INDEXER.TRE so it can be reloaded for additions later, if need be. } var savetree : file of nchar; x : str; procedure unload(t:pnode); { dump the contents of a (sub-) tree to disk in "preorder," a sequence such that the exact layout of the tree will be reconstructed if the tree is reloaded from the file. } label 99; var j : strindex; p : ppage; k : integer; k1, k2 : nchar; begin {$r- range checks off during byte i/o } if t^.skip then goto 99; { dump not the phony node } stget(t^.iref,x); write(savetree,x.len); for j:=x.len downto 1 do write(savetree,x.val[j]); p := t^.phead; while p<>nil do begin k := p^.num; k1 := k div 256; k2 := k mod 256; write(savetree,k1,k2); p := p^.next end; write(savetree,nullch,nullch); { flag end of pages } if t^.subt<>nil then begin write(savetree,strmax);{ flag start of subtree } unload(t^.subt); write(savetree,nullch) { flag end of subtree } end; 99: if t^.lson<>nil then unload(t^.lson); if t^.rson<>nil then unload(t^.rson); end; {$r+ } begin rewrite('INDEXER.TRE',savetree); unload(maintree); write(savetree,nullch) { flag end of main tree } end; {-------------------------------------------------------------} { The main program, at last..... } {-------------------------------------------------------------} begin init; loadall; load(maintree); saveall; dump end. .