{
@abstract(provides simplified Pascal scanner)
@created(24 Feb 1999)
@lastmod(6 Apr 2000)
@author(Marco Schmidt (marcoschmidt@geocities.com))

The scanner object @link(TScanner) returns tokens from a Pascal language
character input stream.
It uses the @link(Tokenizi) unit to get tokens, regarding conditional
directives that might lead to including another files or will add or
delete conditional directives.
So, this scanner is a combined tokenizer and pre-processor.
}

unit Scanning;

{$I platform.inc}

interface

uses
  SysUtils,

  Objects,
  Streams,
  Tokenizi;

const
  { maximum number of streams we can recurse into; first one is the unit
    stream, any other stream an include file; current value is 32, increase
    this if you have more include files recursively including others }
  MAX_TOKENIZERS = 32;

type
  { subrange type that has the 26 lower case letters from a to z }
  TLowerCaseLetter = 'a'..'z';
  { an array of boolean values, index type is @link(TLowerCaseLetter) }
  TSwitchOptions = array[TLowerCaseLetter] of Boolean;
  { a pointer to a @link(TScanner) object }
  PScanner = ^TScanner;
  { This class scans one unit using one or more @link(TTokenizer) objects
    to scan the unit and all nested include files.
  }
  TScanner = object(TObject)
    BufferedToken: PToken;
    CT: Integer;
    DirectiveLevel: Integer;
    Directives: PStringCollection;
    ErrorMessage: string;
    IncludeFilePaths: PCollection;
    IsTokenBuffered: Boolean;
    SwitchOptions: TSwitchOptions;
    Tokenizers: array[0..MAX_TOKENIZERS - 1] of PTokenizer;
    { Creates a TScanner object that scans the given input stream. }
    constructor Init(s: PInputStream);
    destructor Done; virtual;
    { Adds parameter string to the list of directives.
      Returns true on success, false on failure (e.g. in case that there
      wasn't enough memory. }
    function AddDirective(n: string): Boolean;
    { Adds all directives in the parameter string collection by calling
      @link(AddDirective) for each of the strings in that collection. }
    function AddDirectives(DL: PStringCollection): Boolean;
    { Gets next token and throws it away. }
    procedure ConsumeToken;
    { Removes directive N from the internal list of directives.
      If N was not in that list, nothing is done. }
    procedure DeleteDirective(n: string);
    { Returns the name of the file that is currently processed and the line
      number. Good for meaningful error messages. }
    function GetStreamInfo: string;
    { Returns next token as parameter. Returns true on success, false on error. }
    function GetToken(var t: PToken): Boolean;
    { Returns if a given directive N is defined at the moment. }
    function IsDirectiveDefined(n: string): Boolean;
    function IsSwitchDefined(n: string): Boolean;
    function OpenInclude(n: string): Boolean;
    function PeekToken(var t: PToken): Boolean;
    function SkipUntilElseOrEndif(out FoundElse: Boolean): Boolean;
    procedure UnGetToken(t: PToken);
    procedure UpdateSwitchOptions(o: string);
  end;

implementation

uses
  Arrays,
  FileStre,
  Msg,
  Texts;

type
  { all directives a scanner is going to regard }
  TDirectiveType = (DT_DEFINE, DT_ELSE, DT_ENDIF, DT_IFDEF, DT_IFNDEF,
    DT_IFOPT, DT_INCLUDE_FILE, DT_UNDEF, DT_UNKNOWN);

const
  NUM_DIRECTIVES = 8;
  DirectiveNames: array[0..NUM_DIRECTIVES - 1] of string[31] =
  ('DEFINE', 'ELSE', 'ENDIF', 'IFDEF', 'IFNDEF', 'IFOPT', 'I', 'UNDEF');

  { this function recognizes only those directives we'll need for the scanner }
function SplitDirective(t: PText; var DirectiveName, Params: string): Boolean;
var
  i: LongInt;
begin
  SplitDirective := False;
  i := 0;
  DirectiveName := '';
  Params := '';
  { find dollar sign }
  while (i < t^.Content) and (t^.Data[i] <> '$') do
    Inc(i);
  if (i = t^.Content) then Exit;
  Inc(i);
  { get directive name }
  while (i < t^.Content) and (t^.Data[i] <> #32) and
    (t^.Data[i] <> '*') and (t^.Data[i] <> '}') do
    begin
      DirectiveName := DirectiveName + UpCase(t^.Data[i]);
      Inc(i);
    end;
  SplitDirective := True;
  if (t^.Data[i] = '*') or (t^.Data[i] = '}') then Exit;
  { skip spaces }
  while (i < t^.Content) and (t^.Data[i] = ' ') do
    Inc(i);
  if (i = t^.Content) then Exit;
  { get parameters - no conversion to uppercase here, it could be an include
    file name whose name need not be changed (platform.inc <> PLATFORM.INC) }
  while (i < t^.Content) and (t^.Data[i] <> #32) and
    (t^.Data[i] <> '*') and (t^.Data[i] <> '}') do
    begin
      Params := Params + t^.Data[i];
      Inc(i);
    end;
end;

function IdentifyDirective(t: PText; var dt: TDirectiveType; var p: string): Boolean;
var
  i: Integer;
  s: string;
begin
  IdentifyDirective := False;
  if (not SplitDirective(t, s, p)) then Exit;
  StringToUpper(s, s);
  for i := 0 to NUM_DIRECTIVES - 1 do
    begin
      if (s = DirectiveNames[i]) then
        begin
          dt := TDirectiveType(i);
          IdentifyDirective := True;
          Exit;
        end;
    end;
end;

{ TScanner }

constructor TScanner.Init(s: PInputStream);
var
  c: TLowerCaseLetter;
begin
  inherited Init;
  CT := 0;
  DirectiveLevel := 0;
  Directives := nil;
  ErrorMessage := '';
  IsTokenBuffered := False;
  for c := Low(TLowerCaseLetter) to High(TLowerCaseLetter) do
    SwitchOptions[c] := False;
  Tokenizers[0] := New(PTokenizer, Init(s));
  if (not Assigned(Tokenizers[0])) then Fail;
end;

destructor TScanner.Done;
var
  i: Integer;
begin
  i := 0;
  while (i <= CT) do
    begin
      if Assigned(Tokenizers[i]) then Dispose(Tokenizers[i], Done);
      Inc(i);
    end;
  inherited Done;
end;

function TScanner.AddDirective(n: string): Boolean;
var
  s: PString;
begin
  StringToUpper(n, n);
  if (not IsDirectiveDefined(n)) then
    begin
      Result := False;
      if (not Assigned(Directives)) then
        begin
          Directives := New(PStringCollection, Init(16, 16));
          if (not Assigned(Directives)) then Exit;
        end;
      s := NewStr(n);
      if (not Assigned(s)) then Exit;
      Directives^.Insert(s);
      {WriteLn('added directive "' + N + '"');}
      Result := True;
    end
  else
    begin
      {WriteLn('directive "' + N + '" already defined');}
      Result := True;
    end;
end;

function TScanner.AddDirectives(DL: PStringCollection): Boolean;
var
  s: PString;
  i: LongInt;
begin
  AddDirectives := True;
  if (not Assigned(DL)) or (DL^.Count < 1) then Exit;
  for i := 0 to DL^.Count - 1 do
    begin
      s := DL^.At(i);
      if Assigned(s) then
        begin
          if (not AddDirective(s^)) then
            begin
              AddDirectives := False;
              Exit;
            end;
        end;
    end;
end;

procedure TScanner.ConsumeToken;
begin
  IsTokenBuffered := False;
end;

procedure TScanner.DeleteDirective(n: string);
var
  i: LongInt;
begin
  if (not Assigned(Directives)) then Exit;
  StringToUpper(n, n);
  i := Directives^.IndexOf(@n);
  if (i <> -1) then
    begin
      Directives^.AtFree(i);
      PrintLn(6, 'Deleting Directive (' + n + ')');
    end
  else
    PrintLn(6, 'Deleting Directive (' + n + ') is not defined');
end;

function TScanner.GetStreamInfo: string;
begin
  if (CT >= 0) then
    GetStreamInfo := Tokenizers[CT]^.GetStreamInfo
  else
    GetStreamInfo := '';
end;

function TScanner.GetToken(var t: PToken): Boolean;
var
  dt: TDirectiveType;
  Finished: Boolean;
  FoundElse: Boolean;
  p: string;
begin
  if IsTokenBuffered then
    begin
      { we have a token buffered, we'll return this one }
      t := BufferedToken;
      IsTokenBuffered := False;
      Result := True;
      Exit;
    end;
  Result := False;
  Finished := False;
  repeat
    { check if we have a tokenizer left }
    if (CT = -1) then
      begin
        PrintLn(1, 'Error: End of stream reached while trying to get next token.');
        Exit;
      end;
    if Tokenizers[CT]^.HasData then
      begin
        { get next token from tokenizer }
        if (not Tokenizers[CT]^.GetToken(t)) then
          begin
            {writeln('tscanner.gettoken: could not get token  from tokenzier');}
            ErrorMessage := Tokenizers[CT]^.ErrorMessage;
            Exit;
          end;
        { check if token is a directive }
        if (t^.MyType = TOK_DIRECTIVE) then
          begin
            if (not IdentifyDirective(t^.Data, dt, p)) then
              begin
                { writeln(GetStreamInfo + 'tscanner.gettoken:
                  could not identify directive');}
                Dispose(t, Done);
                Continue;
              end;
            case dt of
              DT_DEFINE:
                begin
                  PrintLn(6, 'DEFINE encountered (' + p + ')');
                  AddDirective(p);
                end;
              DT_ELSE:
                begin
                  PrintLn(5, 'ELSE encountered');
                  if (DirectiveLevel > 0) then
                    begin
                      // RJ Dec(DirectiveLevel);
                      if not SkipUntilElseOrEndif(FoundElse) then Exit;
                      if not FoundElse then Dec(DirectiveLevel); // RJ
                    end
                  else
                    begin
                      ErrorMessage := GetStreamInfo + ': unexpected $ELSE directive.';
                      Exit;
                    end;
                end;
              DT_ENDIF:
                begin
                  PrintLn(5, 'ENDIF encountered');
                  if (DirectiveLevel > 0) then
                    begin
                      Dec(DirectiveLevel);
                      PrintLn (6, 'DirectiveLevel = ' + inttostr (DirectiveLevel));
                    end
                  else
                    begin
                      ErrorMessage := GetStreamInfo + ': unexpected $ENDIF directive.';
                      Exit;
                    end;
                end;
              DT_IFDEF:
                begin
                  Print(6, 'IFDEF encountered (' + p + '):');
                  if IsDirectiveDefined(p) then
                    begin
                      Inc(DirectiveLevel);
                      PrintLn(6, ' defined. Level ' + IntToStr (DirectiveLevel));
                    end
                  else
                    begin
                      PrintLn(6, ' not defined.');
                      if not SkipUntilElseOrEndif(FoundElse) then Exit;
                      if FoundElse then
                      begin
                        Inc(DirectiveLevel);
                        PrintLn (6, 'DirectiveLevel = ' + inttostr (DirectiveLevel));
                      end;
                    end;
                end;
              DT_IFNDEF:
                begin
                  Print(6, 'IFNDEF encountered (' + p + '):');
                  if not IsDirectiveDefined(p) then
                    begin
                      Inc(DirectiveLevel);
                      PrintLn(6, ' not defined. Level ' + IntToStr (DirectiveLevel));
                    end
                  else
                    begin
                      PrintLn(6, ' defined.');
                      if not SkipUntilElseOrEndif(FoundElse) then Exit;
                      // if FoundElse then Inc(DirectiveLevel);
                    end;
                end;
              DT_IFOPT:
                begin
                  if (not IsSwitchDefined(p)) then
                    begin
                      if (not SkipUntilElseOrEndif(FoundElse)) then Exit;
                      if FoundElse then Inc(DirectiveLevel);
                    end
                  else
                    Inc(DirectiveLevel);
                end;
              DT_INCLUDE_FILE:
                if not OpenInclude(p) then
                  begin
                    PrintLn(2, GetStreamInfo +
                      ': Error, could not open include file "' + p + '"');
                    Exit;
                  end;
              DT_UNDEF:
                begin
                  PrintLn(6, 'UNDEF encountered (' + p + '):');
                  DeleteDirective(p);
                end;
            end;
          end;
        if (t^.MyType = TOK_DIRECTIVE) then
          Dispose(t, Done)
        else
          Finished := True;
      end
    else
      begin
        PrintLn(5, 'Closing stream "' + Tokenizers[CT]^.GetStreamInfo + '"');
        Dispose(Tokenizers[CT], Done);
        Tokenizers[CT] := nil;
        Dec(CT);
      end;
  until Finished;
  Result := True;
end;

function TScanner.IsDirectiveDefined(n: string): Boolean;
begin
  StringToUpper(n, n);
  IsDirectiveDefined := Assigned(Directives) and
    (Directives^.IndexOf(@n) <> -1);
end;

function TScanner.IsSwitchDefined(n: string): Boolean;
var
  b1: Boolean;
  b2: Boolean;
  l: TLowerCaseLetter;
begin
  { we expect a length 2 string like 'I+' or 'A-', first character a letter,
    second character plus or minus }
  if ((n[1] >= 'A') and (n[1] <= 'Z')) then n[1] := Chr(Ord(n[1]) + 32);
  if (Length(n) < 2) or
    ((n[1] < 'a') and (n[1] > 'z')) or
    ((n[2] <> '-') and (n[2] <> '+')) then
    begin
      PrintLn(2, GetStreamInfo + ': Warning - invalid $ifopt parameters.');
      IsSwitchDefined := False;
    end
  else
    begin
      { look up switch from current table }
      l := TLowerCaseLetter(Ord(n[1]) - Ord('a'));
      b1 := SwitchOptions[l];
      { get status from parameter }
      b2 := (n[2] = '+');
      IsSwitchDefined := (b1 = b2);
    end;
end;

function TScanner.OpenInclude(n: string): Boolean;
var
  i: Integer;
  Name: string;
  NumAttempts: Integer;
  p: PString;
  s: PFileInputStream;
begin
  OpenInclude := False;
  { check if maximum number of tokenizers has been reached }
  if (CT = MAX_TOKENIZERS - 1) then
    begin
      ErrorMessage := GetStreamInfo + ': maximum number of tokenizers reached.';
      Exit;
    end;
  { determine how many names we can check; number is 1 + IncludeFilePaths^.Count }
  NumAttempts := 1;
  if Assigned(IncludeFilePaths) and (IncludeFilePaths^.Count > 0) then
    Inc(NumAttempts, IncludeFilePaths^.Count);
  s := nil;
  { loop until we have checked all names or one attempt was successful }
  for i := 0 to NumAttempts - 1 do
    begin
      if (i = 0) then
        Name := n
      else
        begin
          p := IncludeFilePaths^.At(i - 1);
          if Assigned(p) then
            Name := p^ + n
          else
            Continue; { next loop iteration }
        end;
      PrintLn(5, 'Trying to open include file "' + Name + '"...');
      s := New(PFileInputStream, Init(Name));
      if Assigned(s) then Break;
    end;
  { if we still don't have a valid open stream we failed }
  if (not Assigned(s)) then
    begin
      ErrorMessage := GetStreamInfo + ': could not open include file ' + n;
      Exit;
    end;
  { create new tokenizer with stream }
  Tokenizers[CT + 1] := New(PTokenizer, Init(s));
  if (not Assigned(Tokenizers[CT + 1])) then
    begin
      ErrorMessage := GetStreamInfo + ': could not create tokenizer.';
      Exit;
    end;
  Inc(CT);
  OpenInclude := True;
end;

function TScanner.PeekToken(var t: PToken): Boolean;
begin
  if IsTokenBuffered then
    begin
      t := BufferedToken;
      PeekToken := True;
    end
  else
    begin
      if GetToken(t) then
        begin
          IsTokenBuffered := True;
          BufferedToken := t;
          PeekToken := True;
        end
      else
        PeekToken := False;
    end;
end;

function TScanner.SkipUntilElseOrEndif(out FoundElse: Boolean): Boolean;
var
  dt: TDirectiveType;
  Level: Integer;
  p: string;
  t: PToken;
  TT: TTokenType;
begin
  Result := False;
  Level := 1;
  repeat
    if (not Tokenizers[CT]^.GetToken(t)) then
      begin
        WriteLn('error skipuntilelse gettoken');
        Exit;
      end;
    {writeln('token ', T^.Data^.GetString);}
    if (t^.MyType = TOK_DIRECTIVE) then
      begin
        if IdentifyDirective(t^.Data, dt, p) then
          begin
            PrintLn(6, 'SkipUntilElseOrFound: encountered directive ' + DirectiveNames[Ord(dt)]);
            case dt of
              DT_IFDEF,
                DT_IFNDEF,
                DT_IFOPT: Inc(Level);
              { RJ: We must jump over all nested $IFDEFs until its $ENDIF is
                encountered, ignoring all $ELSEs. We must therefore not
                decrement Level at $ELSE if it is part of such a nested $IFDEF.
                $ELSE must decrement Level only for the initial $IFDEF.
                That's why whe test Level for 1 (initial $IFDEF) here. }
              DT_ELSE:
                if Level = 1 then Dec(Level);
              DT_ENDIF: Dec(Level);
            end;
          end;
      end;
    TT := t^.MyType;
    Dispose(t, Done);
  until (Level = 0) and (TT = TOK_DIRECTIVE) and ((dt = DT_ELSE) or (dt = DT_ENDIF));
  FoundElse := (dt = DT_ELSE);
  if FoundElse then
    p := 'ELSE'
  else
    p := 'ENDIF';
  PrintLn(6, 'Skipped code, last token ' + p);
  Result := True;
end;

procedure TScanner.UnGetToken(t: PToken);
begin
  if IsTokenBuffered then
    begin
      PrintLn(1, GetStreamInfo + ': FATAL ERROR - CANNOT UNGET MORE THAN ONE TOKEN.');
      Halt(1);
    end;
  IsTokenBuffered := True;
  BufferedToken := t;
end;

procedure TScanner.UpdateSwitchOptions(o: string);
begin
  { assume dollar sign has been removed }
end;

begin
end.

