{$R-}    {Range checking off}
{$B-}    {Boolean short circuiting off}
{$S+}    {Stack checking on}
{$I+}    {I/O checking on}
{$N-}    {No numeric coprocessor}

program refs;

	(*REFS-- find and list references in manuscripts

  COPYRIGHT 1985 by Ross A. Alford
  All commercial rights reserved.  This software is released for
  nonprofit distribution only.	Any commercial distribution should be
  undertaken only with the express consent of the copyright holder:

		    Ross A. Alford
		    Department of Zoology
		    Duke University
		    Durham, NC 27706
		    ...[decvax, ihnp4, akgua]!mcnc!ecsvax!alford

  REFS finds references in scientific manuscripts.  It will list references
  found and the number of times they are occur to a file, a printer, or
  the system console.  It  should work with references of the forms:

   Smith, 1980                   |Smith (1980)
	 Smith, 1980a                  |Smith (1980a)
	 Smith, 1980a, b               |Smith (1980a, b)
	 Smith, 1980a, 1980b           |Smith (1980a, 1980b)
	 Smith and Smith, 1980         |Smith and Smith (1980)
	 Smith et al., 1980            |Smith et al. (1980)
	 Smith's 1980                  |Smith's (1980)
	 Smith, Smith, and Smith, 1980 |Smith, Smith, and Smith (1980)
	 Smith-Smythe and Smith 1980

	 Smith {\it et al.} (1980)			( Added by JM-M, see below )
	 \fnote{Smith (1980)}           (           ditto          )

  and with most any similar style.  It also allows the last digit of the year
  to be replaced by a letter, as Smith, 198x, for cases where the exact date
  is uncertain.  It may not work entirely properly on references in tabular
  formats, specifically if a reference of the form Smith 1980a,b is split
  between lines so that the 'b' is widely separated from the 'a'.

  Month, year dates, as July, 1980, also are treated as references.  You never
  know when some person might have the same name as a month.

  Operating the program is simple, and is documented in the msgexit function.
  Just run the program with no parameters for a description.  I apologize for
  the paucity of comments, but after all this is self-documenting Pascal :-)

  Please let me know of any bugs found, bug fixes made, or improvements
  made.

	Ross Alford*)

(* REVISED

  Version:       1.5j
  Revisions by:  Jeff_MacKie-Mason@um.cc.umich.edu
                 Dept. of Economics
                 Univ. of Michigan
                 Ann Arbor, MI 48109

  Revision date: 18 November 1987
  							  6 November 1989

  Revision abstract:
     1) Upgraded for compilation with Turbo Pascal v. 4.0.
          Actually, I used the Turbo3 standard unit, so version 3.0
          definitions are mostly in effect, but compiling with v. 4.0
          leads to a smaller executable.

          Had to change DOS device names, and add a result parameter
          to blockread to handle incomplete reads.

     2) Rewrote GetArg function to use ParamStr (especially since
					it is illegal to use CSeg in an absolute declaration)

		 3) Changed MemAvail to longint, converted returned bytes to paragraphs,
				deleted reference to Turbo3 unit, recompiled with Turbo5. (11/6/89)

		 4) Modified logic to recognize refs that begin \fnote's,
				and refs of the form Baker {\it et al.} (1980) in TeX. (11/6/89)

		 5) Modified logic to recognize \footnote and refs of form
				Baker {\em et al.} (1980) for LaTex.  (11/7/89)
*)

Uses
	Crt;

const charsect = 128;
      namelen = 60;
			version = '1.3';
			jversion = '5.1j';
 
type fnamestr = string[14];
     msgstr = string[80];
     tabletyp = array[0..127] of boolean;
     buftype = array[1..CHARSECT] of byte;
     nametyp = string[NAMELEN];
     datetyp = string[5];
     refptr = ^reference;
     reference = record
		   next : refptr;
		   name : string[NAMELEN];
		   number : integer
		 end;
     sectptr = ^sectrec;
     sectrec = record
		next : sectptr;
		previous : sectptr;
		data : buftype
	      end;

var inf : file;
    i,j,ptrsave,sinceref : integer;
    c : byte;
    oldyear,year : datetyp;
    xtra,name,tempname,oldname : nametyp;
    closeparen,notfound : boolean;
    outfname,infile : fnamestr;
    reflist : refptr;
    result : word;                             { new in v.1.3j}

{intentional global variables- to speed things up}
 
    outf : text;
    lowcase,isupcase,otherbad,letter,number : tabletyp;
    cursectnum,numinfile : integer;
    infopen,outfopen,hitnumber,comma,done : boolean;
    sector,savesect : sectptr;		{current sector in use}
    ptr,saveptr : integer;		{location within sector}


{---------exit gracefully with info---------------------------------------}

procedure msgexit(msg : msgstr);
 
begin
  if infopen then close(inf);
  if outfopen then close(outf);
  writeln;
  if msg <> '' then
    begin
      writeln(chr(7),msg);
      writeln
    end;
  writeln('REFS finds references in the name, date form in manuscripts.');
  writeln;
  writeln('To run REFS enter a command line like:');
  writeln;
  writeln('A>refs infile {outfile}');
  writeln;
  writeln('Where infile is a DOS filename of the form drive:filename.ext');
  writeln;
  writeln('and outfile can be either a disk file, given in the same format ');
  writeln('as infile, or can be given as CON to send output to the CRT screen');
  writeln('or LPT1 to send output to the DOS list device.');
  writeln;
  writeln('If outfile is not specified, a file of the same base name as infile');
  writeln('but with the extension .REF, will be created on the same drive that');
  writeln('infile is read from.');
  writeln;
  halt
end;
 
 
{---------------------read a sector into a sector buffer-------------------}
 
procedure readsector(var sector : sectptr);

begin
  if cursectnum < numinfile then
    begin
      blockread(inf,sector^.data,1);
      cursectnum := succ(cursectnum)
    end
    else done := TRUE
end;
 

{------------------------get a new sector buffer node-------------------}

procedure getsectnode(var sector : sectptr);
 
var n : longint;

begin
	n := memavail div 16;
  if ((n and $7fff) < 512) then msgexit('Out of memory during sector read');
  new(sector);
  sector^.next := NIL;
  sector^.previous := NIL
end;


{----------------return the character currently pointed to-------------}

function curbyte : byte;	       {uses globals sector and ptr}
 
begin
  curbyte := sector^.data[ptr] and $7f
end;


{---------get next character, read a new sector if needed--------------}

function nextbyte : byte;	       {uses globals sector and ptr}

var tempsec : sectptr;
    t : byte;
 
begin
  ptr := succ(ptr);
  if ptr > 128 then
    if sector^.next = NIL then
      begin
	tempsec := sector^.previous;
	if tempsec = NIL then getsectnode(tempsec);
	readsector(tempsec);
	if not done then
	  begin
	    tempsec^.previous := sector;
	    sector^.previous := NIL;
	    sector^.next := tempsec;
	    tempsec^.next := NIL;
	    sector := tempsec;
	    ptr := 1
	  end
      end
      else begin
	tempsec := sector^.next;
	tempsec^.next := NIL;
	tempsec^.previous := sector;
	sector^.previous := NIL;
	sector := tempsec;
	ptr := 1;
      end;
  t := sector^.data[ptr];
  if t = 26 then done := TRUE;
  nextbyte := t and $7f
end;


{--------------------return previous character--------------------}

function prevbyte : byte;	       {uses globals sector and ptr}

var tempsec : sectptr;
    ch : byte;

begin
  ptr := pred(ptr);
  if ptr < 1 then
    begin
      if sector^.previous <> NIL then
	begin
	  tempsec := sector^.previous;
	  tempsec^.next := sector;
	  tempsec^.previous := NIL;
	  sector^.next := NIL;
	  sector := tempsec;
	  ptr := 128
	end
    end;
  if (ptr < 1) then
    prevbyte := 0
    else prevbyte := sector^.data[ptr] and $7f
end;


{return previous alphabetic word.  Set the global 'comma'=TRUE if a comma
  follows it.  Set the global 'hitnumber' TRUE if a digit is encountered.
  Return no word if any of the characters for which corresponding entries
  in the tables 'otherbad' or 'number' have been set true is encountered.}

function prevword : nametyp;

var c,d : byte;
    i : integer;
    gotalet : boolean;
    name : nametyp;

begin
  i := 0;
  comma := FALSE;
  gotalet := FALSE;
  hitnumber := FALSE;
  name := '';
  repeat
    c := prevbyte;
    i := succ(i);
    if (c = ord(',')) then comma := TRUE;
    if letter[c] then
      begin
	if (not number[prevbyte]) then
	  gotalet := TRUE;
	d := nextbyte				 {readjust pointer}
      end;
    if number[c] then hitnumber := TRUE;
    if otherbad[c] then i := 126
  until gotalet or (i = 126);
  if gotalet then
    while letter[c] do
      begin
	name := chr(c) + name;
	c := prevbyte
      end;
  c := nextbyte;
  prevword := name
end;


{--------------save position in file before backwards scan-----------}

procedure saveposition;

begin
  savesect := sector;
  saveptr := ptr
end;


{------------restore position in file after backwards scan------------}

procedure restoreposition;

begin
  sector := savesect;
  ptr := saveptr
end;


{Set up truth tables for membership in sets of characters.  Indexing into
 these tables is much faster than using the standard set notation.}

procedure inittables;

var i : integer;

begin
  for i := 0 to 127 do
    begin
      letter[i] := (((i > $40) and (i < $5b)) or ((i > $60) and (i < $7b)));
      number[i] := ((i >= ord('0')) and (i <= ord('9')));
      isupcase[i] := ((i > $40) and (i < $5b));
      lowcase[i] := ((i > $60) and (i < $7b));
      otherbad[i] := FALSE;
      if chr(i) in ['=','<','>',':'] then otherbad[i] := TRUE
    end;
	letter[39] := TRUE;				  {apostrophe is a letter}
	letter[ord('{')] := TRUE;		{open curly bracket is a letter, for TeX: JMM}
	letter[ord('\')] := TRUE;		{so is backslash: JMM}
	letter[ord('-')] := TRUE;		{so is hyphen}
end;


{Write the accumulated linked list of references onto the defined output file}

procedure writelist(var outf : text; list : refptr);

var current : refptr;
    totcites,totrefs : integer;

begin
  totcites := 0;
  totrefs := 0;
  writeln(outf,'References from file ',infile);
  writeln(outf);
  writeln(outf,'Author(s) and date; number of times cited');
  writeln(outf);
  current := list^.next;
  while current <> NIL do
    begin
      writeln(outf,current^.name,';  ',current^.number);
      totrefs := succ(totrefs);
      totcites := totcites + current^.number;
      current := current^.next
    end;
  writeln(outf);
  writeln(outf,'Total count of citations in text = ',totcites);
  writeln(outf,'Total number of references cited = ',totrefs);
  close(outf)
end;


{-------------return an initialized storage node for a reference-----------}

procedure getrefnode(var x : refptr);

var i : longint;

begin
	i := memavail div 16;
  if ((i > 0) and (i < 512)) then
      msgexit('Out of memory--too many references--try splitting input file');
  new(x);
  x^.next := NIL;
  x^.name := 'A';
  x^.number := 1
end;

{-------------------------------------------------------------------
function getarg reads a series of characters from the DOS command line buffer.
It returns everything up to the next space it encounters and saves what's left
of the buffer.	If there's nothing left, it returns the empty string.  This
function is VERY Turbo-specific: it relies on static variable allocation
to preserve the command string between calls}

(*function getarg : fnamestr;        { commented out to upgrade to TP4 }

const called : boolean = FALSE; 		 {used while debugging only}
      i : integer = 1;
      j : integer = 1;

{var cmdbuf : string[127] absolute $80;}    	    {to run after compiling to
	                                               memory, comment out the
                                                  'absolute $80'}

var cmdbuf : string[127] absolute CSeg:$80;     {for MS/PC DOS command-line}

begin

  if not called then				 {used for debugging}
    begin					 {with memory compilation}
      write('Enter command line: ');
      readln(cmdbuf);				 {remove comments to use}
      called := TRUE				 {after compiling to memory}
    end;

  while cmdbuf[i] = ' ' do			 {skip leading blanks}
    i := succ(i);
  j := i;					 {point to start}
  while (not(cmdbuf[i] = ' ') and (i <= length(cmdbuf))) do
    begin
      cmdbuf[i] := upcase(cmdbuf[i]);		 {all commands upper case}
      i := succ(i)				 {find end}
    end;
  getarg := copy(cmdbuf,j,i - j);		 {assign return value}
  j := i					 {new starting location}
end;
*)

function getarg(param : integer) : fnamestr;
var i   : integer;
    arg : string;
begin
  arg := ParamStr(param);
  if (length(arg) >= 1) then
    for i := 1 to length(arg) do
      arg[i] := UpCase(arg[i]);
  getarg := arg;
end { of function getarg } ;


{Add a new reference to the list of references, maintaining sorted order}

procedure addtolist(list : refptr; name : nametyp);

var current, newnode, last : refptr;

begin
  current := list;
  last := list;
  while ((current^.next <> NIL) and (name > current^.name)) do
    begin
      last := current;
      current := current^.next
    end;
  if name <> current^.name then
    begin
      getrefnode(newnode);
      newnode^.name := name;
      if name > current^.name then
	current^.next := newnode
	else begin
	  newnode^.next := current;
	  last^.next := newnode;
	  newnode^.name := name
	end
    end
    else current^.number := succ(current^.number)
end;

begin
  HighVideo;
  writeln;
  writeln('REFS version ',VERSION);
  writeln('Copyright 1985 by Ross A. Alford');
	writeln('All commercial rights reserved');
	writeln;
	writeln('Revised version ', JVersion);
	writeln('Revisions 1987, 1989 by Jeff MacKie-Mason');
  writeln; NormVideo;
  inittables;
  ptr := 0;
  year := '';
  name := '';
  comma := FALSE;
  closeparen := FALSE;
  infopen := FALSE;
  outfopen := FALSE;
  getsectnode(sector);
  getrefnode(reflist);
  infile := GetArg(1);
  if infile = '' then msgexit('Input filename not specified');
  assign(inf,infile);
{$I-}
  reset(inf);
{$I+}
  if ioresult <> 0 then msgexit('Input file not found');
  infopen := TRUE;
  numinfile := filesize(inf);
  outfname := GetArg(2);
  if outfname = '' then
    begin
      i := pos('.',infile) - 1;
      if i = 0 then i := length(infile);
      outfname := copy(infile,1,i);
      outfname := outfname + '.REF';
    end;
  assign(outf,outfname);
  if ((outfname <> 'CON') and (outfname <> 'LPT1')) then
    begin
{$I-}
			reset(outf);
{$I+}
      if ioresult = 0 then msgexit('Output file exists--cannot overwrite')
    end;
  rewrite(outf);
  outfopen := TRUE;
  blockread(inf,sector^.data,1,result);
  cursectnum := 1;
  done := FALSE;
  repeat
    if nextbyte = ord('1') then 		   {CHECK FOR POTENTIAL DATE}
      begin					   {add check for '2' in 1999}
	year := chr(curbyte);			   { :-) }
	comma := false;
	if number[nextbyte] then
	  begin
	    year := year + chr(curbyte);
	    if number[nextbyte] then
	      begin
		year := year + chr(curbyte);
		if (number[nextbyte] or letter[curbyte]) then
		  begin
		    year := year + chr(curbyte);
		    if (not number[nextbyte]) then
		      if lowcase[curbyte] then year := year + chr(curbyte);
		  end
	      end
	  end
      end;
    if (length(year) = 4) or (length(year) = 5) then
      begin				    {got a date}
	saveposition;
	closeparen := FALSE;		    {for later reference}
	comma := FALSE; 		    {for later reference}
	c := prevbyte;			    {skip back four to avoid date}
	c := prevbyte;
	c := prevbyte;
	c := prevbyte;
	name := '';
	xtra := '';

{*	Logic in this Repeat loop has been somewhat modified by JMM in order  *}
{*	to catch two reference types in TeX files.  See notes in header.      *}
	repeat
	  notfound := TRUE;
	  repeat
	    tempname := prevword
		until ((tempname = '') or (length(tempname) > 1));
	  if ((hitnumber) and (name <> '')) then tempname := '';
	  if tempname[length(tempname) - 1] = chr(39) then   {fix posessives}
	    tempname := copy(tempname,1,length(tempname) - 2);
	  if tempname[length(tempname)] = chr(39) then	 {fix other posessives}
	    tempname := copy(tempname,1,length(tempname) - 1);
	  if isupcase[ord(tempname[2])] then tempname := '';	 {no abbrevs}
	  if ((tempname[1] = '-') or (tempname[length(tempname)] = '-'))
	    then tempname := '';   {no leading/trailing hyphens}
		if (tempname <> '') and ((isupcase[ord(tempname[1])])
				or (Pos('\fnote{',tempname) = 1)
				or (Pos('\footnote{',tempname) = 1)) then begin
			if Pos('\fnote{',tempname) = 1 then
				tempname := copy(tempname,8,length(tempname)-7);
			if Pos('\footnote{',tempname) = 1 then
				tempname := copy(tempname,11,length(tempname)-10);
			if ((name =  '') or (xtra <> '') or comma) then	begin
				name := tempname + ' ' + xtra + name;
				xtra := '';
				notfound := FALSE
				end
			end
		else if ((tempname = 'and')
				or (tempname = 'et')
				or (tempname = 'al}')
				or (tempname = 'al')) then
			begin
				xtra := tempname + ' ' + xtra;
				notfound := FALSE
				end
			else if ((tempname = '{\it') or (tempname = '{\em')) then
				notfound := FALSE;
	until notfound;
	if name <> '' then
	  begin
	    oldname := name;
	    oldyear := year;
	    name := name + year;
	    addtolist(reflist,name);
	    sinceref := 0
	  end;
	restoreposition;
	year := '';
      end
      else if sinceref < 4 then 	 {check for the Smith 1980a, b form}
	if (lowcase[curbyte] and (length(oldyear) = 5))
	  then begin
	    if ((not letter[nextbyte]) and comma and (not closeparen)) then
	      begin
		addtolist(reflist,oldname + copy(oldyear,1,4) + chr(prevbyte));
		sinceref := 0
	      end
	      else begin
		c := prevbyte;
		sinceref := 10
	      end
	  end;
    comma := (comma or (curbyte = ord(',')));
    closeparen := (closeparen or (curbyte = ord(')')));
    sinceref := succ(sinceref);
  until done;
  NormVideo;
  writelist(outf,reflist);
end.
