{
 BUSINESS CONSULTING
 s a i n t - p e t e r s b u r g

         Components Library for Borland Delphi 4.x, 5.x
         Copyright (c) 1998-2000 Alex'EM

}
unit DCSyntaxData;

interface

uses
  Windows, SysUtils, Classes;

type
  TLexemType = (lxWhitespace, lxKeyWord, lxIdentifier, lxString, lxNumber,
    lxComment, lxSymbol, lxSysWord);

  PLexemItem = ^TLexemItem;
  TLexemItem = packed record
    Item: TLexemType;
    Length: WORD;
  end;

  PLexemItems = ^TLexemItems;
  TLexemItems = packed array[0..0] of TLexemItem;

  PLineDataItem  = ^TLineDataItem;
  TLineDataItem = packed record
    FString: string;
    FObject: TObject;
    Comment: DWORD;
    PrevComment: DWORD;
    SyntaxType: DWORD;
    Capacity: WORD;
    Count: WORD;
    Lexems: PLexemItems;
  end;

  PLineDataItems = ^TLineDataItems;
  TLineDataItems = packed array[0..0] of TLineDataItem;

  TDCCustomSyntaxData = class;

  TSyntaxDataClass = class of TDCCustomSyntaxData;

  TDCCustomSyntaxData = class(TObject)
  private
    FSyntaxType: integer;
    FOpenComment: DWORD;
    function GetCommentLen(Comment: integer): integer;
  protected
    FKeyWords: string;
    FSysWords: string;
    FSymbols: string;
    FQuotes: string;
    FNumbers: string;
    FOpenComment1: DWORD;
    FCloseComment1: DWORD;
    FOpenComment2: DWORD;
    FCloseComment2: DWORD;
    FEOLComment1: DWORD;
    FEOLComment2: DWORD;
    function GetCloseComment(OpenComment: DWORD): DWORD;
    function IsKeyWord(Value: string): boolean;
    function IsSysWord(Value: string): boolean;
    function IsIdentChar(Value: Char; lHeading: boolean): boolean; virtual;
    function GetBlockComment(Source: PChar; OpenComment: DWORD; var LexemItem: TLexemItem;
           AInc: boolean = True): boolean;
    function GetEOLComment(Source: PChar; var LexemItem: TLexemItem; Comment: DWORD): boolean;
    function GetIdent(Source: PChar; var LexemItem: TLexemItem): boolean;
    function GetLex(Source: PChar; var LexemItem: TLexemItem): boolean;
    function GetNumber(Source: PChar; var LexemItem: TLexemItem): boolean; virtual;
    function GetString(Source: PChar; var LexemItem: TLexemItem): boolean; virtual;
    function GetHexNumber(Source: PChar; var LexemItem: TLexemItem): boolean;
    function GetDecNumber(Source: PChar; var LexemItem: TLexemItem): boolean;
  public
    constructor Create; virtual;
    destructor Destroy; override;
    function BuildComment(Value: PChar): DWORD;
    procedure ParseLine(pLineItems: PLineDataItem);
    function IsDelimiter(Value: Char): boolean;
  end;

  TDCDelphiSyntaxData = class(TDCCustomSyntaxData)
  public
    constructor Create; override;
    function GetNumber(Source: PChar; var LexemItem: TLexemItem): boolean; override;
    function GetString(Source: PChar; var LexemItem: TLexemItem): boolean; override;
  end;

  TDCSQLSyntaxData = class(TDCCustomSyntaxData)
  public
    constructor Create; override;
    function GetNumber(Source: PChar; var LexemItem: TLexemItem): boolean; override;
    function GetString(Source: PChar; var LexemItem: TLexemItem): boolean; override;
  end;

function LISetCapacity(var LexemItems: PLexemItems; Capacity: WORD): PLexemItems;

implementation

{ TDCCustomSyntaxData }

function LISetCapacity(var LexemItems: PLexemItems; Capacity: WORD): PLexemItems;
begin
  ReallocMem(LexemItems, Capacity * SizeOf(TLexemItem));
  Result := LexemItems;
end;

function TDCCustomSyntaxData.BuildComment(Value: PChar): DWORD;
begin
  Result := PDWORD(Value)^;
end;

constructor TDCCustomSyntaxData.Create;
begin
  inherited Create;
  FSysWords := '';
  FKeyWords := '';
  FSyntaxType := 0;
end;

destructor TDCCustomSyntaxData.Destroy;
begin
  inherited;
end;

function TDCCustomSyntaxData.GetBlockComment(Source: PChar;
  OpenComment: DWORD; var LexemItem: TLexemItem; AInc: boolean = True): boolean;
 var
  CloseComment: DWORD;
  pValue: PChar;
  i, j: DWORD;
begin
  LexemItem.Item   := lxComment;

  LexemItem.Length := GetCommentLen(OpenComment);
  CloseComment   := GetCloseComment(OpenComment);

  i := GetCommentLen(CloseComment);
  j := 8 * (Sizeof(DWORD) - i);

  pValue := Source;
  Result := True;

  if AInc then Inc(pValue, LexemItem.Length);

  while (pValue^ <> #0) and
        ((PDWORD(pValue)^ shl j) shr j <> CloseComment) do Inc(pValue);

  LexemItem.Length := pValue - Source;

  if pValue^ = #0 then
    FOpenComment := OpenComment
  else begin
    LexemItem.Length := LexemItem.Length + i;
    FOpenComment := 0;
  end;
end;

function TDCCustomSyntaxData.GetCloseComment(OpenComment: DWORD): DWORD;
begin
  if OpenComment = FOpenComment1 then Result := FCloseComment1 else
  if OpenComment = FOpenComment2 then Result := FCloseComment2 else Result := 0;
end;

function TDCCustomSyntaxData.GetCommentLen(Comment: integer): integer;
begin
  Result := 0;
  while Comment > 0 do
  begin
    Comment := Comment shr 8;
    inc(Result);
  end;
end;

function TDCCustomSyntaxData.GetDecNumber(Source: PChar;
  var LexemItem: TLexemItem): boolean;

 type
  TNumericPart = (npIntegral, npDecimal, npExponent);

 var
  pValue: PChar;
  NumericPart:  TNumericPart;
  Values: array[TNumericPart] of string[30];
  ESigns: array[TNumericPart] of ShortInt;
begin
  LexemItem.Item := lxNumber;

  Result   := True;
  pValue   := Source;

  ESigns[npIntegral] := 0;
  ESigns[npDecimal ] := -1;
  ESigns[npExponent] := 0;

  for NumericPart := npIntegral to npExponent do Values[NumericPart] := '';
  NumericPart := npIntegral;

  while (Source^ <> #0) do
  begin
    case Source^ of
      '+', '-':
        if (ESigns[NumericPart] = 0) and (Values[NumericPart] = '') then
          ESigns[NumericPart] := ESigns[NumericPart] + 1
        else
          Break;
      'E', 'e':
        if (NumericPart <> npExponent) and
           ((NumericPart = npIntegral) and (Values[NumericPart] <> '') or
            (NumericPart = npDecimal ) and (Values[NumericPart] <> '')) then
          NumericPart := npExponent
        else
          Break;
      '0'..'9':
        Values[NumericPart] := Values[NumericPart] + Source^;
      else
        if (Source^ = {DecimalSeparator}'.') and
           (NumericPart = npIntegral)
        then
          NumericPart := npDecimal
        else
          Break;
    end;
    Inc(Source);
  end;

  LexemItem.Length := Source - pValue;
end;


function TDCCustomSyntaxData.GetEOLComment(Source: PChar;
  var LexemItem: TLexemItem; Comment: DWORD): boolean;
begin
  Result   := True;
  LexemItem.Item   := lxComment;
  LexemItem.Length := StrLen(Source);
end;

function TDCCustomSyntaxData.GetHexNumber(Source: PChar;
  var LexemItem: TLexemItem): boolean;
 var
  pValue: PChar;
begin
  LexemItem.Item := lxNumber;

  Result   := True;
  pValue   := Source;

  Inc(Source);
  while (Source^ >= '0') and (Source^ <= '9') or
        (Source^ >= 'A') and (Source^ <= 'F') or
        (Source^ >= 'a') and (Source^ <= 'f') do  Inc(Source);

  LexemItem.Length := Source - pValue;
end;

function TDCCustomSyntaxData.GetIdent(Source: PChar;
  var LexemItem: TLexemItem): boolean;
 var
  pValue: PChar;
  IdentValue: String;
begin
  Result  := True;
  pValue  := Source;
  Inc(pValue);

  while (pValue^ <> #0) and IsIdentChar(pValue^, False) do Inc(pValue);

  LexemItem.Length := pValue - Source;
  SetString(IdentValue, Source, LexemItem.Length);

  if IsKeyWord(IdentValue) then
    LexemItem.Item := lxKeyWord
  else
    if IsSysWord(IdentValue) then
      LexemItem.Item := lxSysWord
    else
      LexemItem.Item := lxIdentifier;
end;

function TDCCustomSyntaxData.GetLex(Source: PChar;
  var LexemItem: TLexemItem): boolean;
 var
  pValue: PChar;
  C: Char;
  I, W: DWORD;

 function BeginOpenComment(AComment, W: DWORD): boolean;
  var
   i, j: DWORD;
 begin
   i := GetCommentLen(AComment);
   j := 8 * (Sizeof(DWORD) - i);
   Result := (W shl j) shr j = AComment;
 end;

begin
  FOpenComment := 0;

  if Source^ = #0
  then begin
    Result := False;
    Exit;
  end;

  Result := True;
  pValue := Source;

  while Byte(pValue^) = VK_SPACE do Inc(pValue);

  if pValue > Source then
  begin
     LexemItem.Item   := lxWhiteSpace;
     LexemItem.Length := pValue - Source;
     Exit;
  end;

  while pValue^ <> #0 do
  begin
    C := pValue^;
    I := Byte(C);
    W := PDWORD(pValue)^;

    if (I = FOpenComment1) or BeginOpenComment(FOpenComment1, W)
    then begin
      if pValue > Source then Break;
      Result := GetBlockComment(pValue, FOpenComment1, LexemItem);
      Exit;
    end;
    if (I = FOpenComment2) or BeginOpenComment(FOpenComment2, W)
    then begin
      if pValue > Source then Break;
      Result := GetBlockComment(pValue, FOpenComment2, LexemItem);
      Exit;
    end;

    if (I = FEOLComment1) or BeginOpenComment(FEOLComment1, W)
    then begin
      if pValue > Source then Break;
      Result := GetEOLComment(pValue, LexemItem, FEOLComment1);
      Exit;
    end;
    if (I = FEOLComment2) or BeginOpenComment(FEOLComment2, W)
    then begin
      if pValue > Source then Break;
      Result := GetEOLComment(pValue, LexemItem, FEOLComment2);
      Exit;
    end;

    if StrScan(PChar(FQuotes), C) <> nil
    then begin
      if pValue > Source then break;
      Result := GetString(pValue, LexemItem);
      Exit;
    end;

    if ((C >= '0') and (C <= '9')) or (StrScan(PChar(FNumbers), C) <> nil)
    then begin
      if pValue > Source then break;
      Result := GetNumber(pValue, LexemItem);
      Exit;
    end;

    if StrScan(PChar(FSymbols), C) <> nil then
    begin
      if pValue > Source then Break;
      Inc(pValue);
      while (pValue^ <> #0) and (StrScan(PChar(FSymbols), pValue^) <> nil) do
      begin
        I := Byte(pValue^);
        W := PWORD(pValue)^;
        if (I = FOpenComment1) or (I = FOpenComment2) or (I = FEOLComment1) or (I = FEOLComment2) or
           (W = FOpenComment1) or (W = FOpenComment2) or (W = FEOLComment1) or (W = fEOLComment2) or
           (StrScan(PChar(FQuotes), pValue^) <> nil) then
          Break;
        Inc(pValue);
      end;
      LexemItem.Item   := lxSymbol;
      LexemItem.Length := pValue - Source;
      Exit;
    end;

    if IsIdentChar(C, True) then
    begin
      if pValue > Source then Break;
      Result := GetIdent(pValue, LexemItem);
      Exit;
    end;

    Inc(pValue);
  end;

  if pValue > Source then
  begin
     LexemItem.Item   := lxWhiteSpace;
     LexemItem.Length := pValue - Source;
  end;

end;

function TDCCustomSyntaxData.GetNumber(Source: PChar;
  var LexemItem: TLexemItem): boolean;
 var
  pValue: PChar;
begin
  LexemItem.Item := lxNumber;

  Result   := True;
  pValue   := Source;

  Inc(Source);
  while (Source^ >= '0') and (Source^ <= '9') do  Inc(Source);

  LexemItem.Length := Source - pValue;
end;

function TDCCustomSyntaxData.GetString(Source: PChar;
  var LexemItem: TLexemItem): boolean;
 var
  Quote : Char;
  pValue: PChar;
begin
   LexemItem.Item := lxString;

   Result   := True;
   Quote    := Source^;
   pValue   := Source;

   Inc(Source);

   while (Source^ <> #0) and (Source^ <> Quote) do Inc(Source);

   LexemItem.Length := Source - pValue;

   if Source^ <> #0 then Inc(LexemItem.Length);
end;

function TDCCustomSyntaxData.IsDelimiter(Value: Char): boolean;
begin
  Result := (Value = #32) or (StrScan(PChar(FSymbols), Value) <> nil);
end;

function TDCCustomSyntaxData.IsIdentChar(Value: char;
  lHeading: boolean): boolean;
begin
  Result := ((Value >= 'a') and (Value <= 'z')) or
            ((Value >= 'A') and (Value <= 'Z')) or
            (Value = '_') or
            ((not lHeading) and (Value >= '0') and (Value <= '9'));
end;

function TDCCustomSyntaxData.IsKeyWord(Value: string): boolean;
 var
  UpperValue: string;
  pValue: PChar;
begin
  UpperValue := ' ' + AnsiUpperCase(Value)+' ';
  pValue := StrPos(PChar(FKeyWords), PChar(UpperValue));
  Result := pValue <> nil;
end;
                                                 
function TDCCustomSyntaxData.IsSysWord(Value: string): boolean;
 var
  UpperValue: string;
begin
  UpperValue := ' ' + AnsiUpperCase(Value)+' ';
  Result := StrPos(PChar(FSysWords), PChar(UpperValue)) <> nil;
end;

procedure TDCCustomSyntaxData.ParseLine(pLineItems: PLineDataItem);
 var
  LexemItem: TLexemItem;
  Source: PChar;

  procedure AddItem;
  begin
    with pLineItems^ do
    begin
      if (Count > 0) and (Lexems^[Count-1].Item = LexemItem.Item)  then
        Inc(Lexems^[Count-1].Length, LexemItem.Length)
      else begin
        if Count = Capacity then
        begin
          if Capacity > 16 then
            Inc(Capacity, 16)
          else
            if Capacity > 8 then
              Inc(Capacity, 8)
            else
              Inc(Capacity, 4);
          Lexems := LISetCapacity(Lexems, Capacity);
        end;
        Lexems^[Count].Item   := LexemItem.Item;
        Lexems^[Count].Length := LexemItem.Length;
        Inc(Count);
      end;
    end;

    Inc(Source, LexemItem.Length);
    if (LexemItem.Item = lxComment) and (FOpenComment <> 0) then
      pLineItems^.Comment := FOpenComment;
  end;

begin
  with pLineItems^ do
  begin
    Count   := 0;
    Comment := 0;
    Source  := PChar(FString);
  end;

  if pLineItems^.PrevComment <> 0
  then begin
     if GetBlockComment(Source, pLineItems^.PrevComment, LexemItem, False) then
     begin
       AddItem;
       if FOpenComment <> 0 then
       begin
         pLineItems^.PrevComment := FOpenComment;
         Exit;
       end;
     end;
  end;

  while GetLex(Source, LexemItem) do AddItem;

end;

{ TDCDelphiSyntaxData }

constructor TDCDelphiSyntaxData.Create;
begin
  inherited;
  FKeyWords := ' AND ARRAY AS ASM BEGIN CASE CLASS CONST CONSTRUCTOR' +
               ' DESTRUCTOR DISPINTERFACE DIV DO DOWNTO ELSE END EXCEPT' +
               ' EXPORTS FILE FINALIZATION FINNALY FOR FUNCTION GOTO IF' +
               ' IMPLEMENTATION IN INHERITED INITIALIZATION INLINE INTERFACE' +
               ' IS LABEL LIBRARY MOD NIL NOT OBJECT OF OR OUT PACKED'  +
               ' PROCEDURE PROGRAM PROPERTY RAISE RECORD REPEAT RESOURCESTRING' +
               ' SET SHL SHR STRING THEN THREADVAR TO TRY TYPE UNIT UNTIL USES' +
               ' VAR WHILE WITH XOR MESSAGE' +
               {property keywords}
               ' PRIVATE PROTECTED PUBLIC PUBLISHED READ WRITE DEFAULT STORED' +
               {special}
               ' ON ';

  FSymbols  := '`~!@#$%^&*()-+=|\{[}]:;<,>.?/"''';
  FQuotes    := '#''';
  FNumbers   := '$';

  FOpenComment1  := BuildComment('{');
  FCloseComment1 := BuildComment('}') ;
  FOpenComment2  := BuildComment('(*');
  FCloseComment2 := BuildComment('*)');

  FEOLComment1  := BuildComment('//');
  FEOLComment2  := 0;
end;

function TDCDelphiSyntaxData.GetNumber(Source: PChar;
  var LexemItem: TLexemItem): boolean;
begin
  if Source^ = '$' then
    Result := GetHexNumber(Source, LexemItem)
  else
    Result := GetDecNumber(Source, LexemItem)
end;

function TDCDelphiSyntaxData.GetString(Source: PChar;
  var LexemItem: TLexemItem): boolean;
 var
  Quote : Char;
  pValue: PChar;
  W, EscapeValue: WORD;
begin
   LexemItem.Item := lxString;

   Result   := True;
   Quote    := Source^;
   pValue   := Source;

   if Source^ = '#' then
   begin
     Inc(Source);
     while (Source^ <> #0) and (Source^ >= '0') and (Source^ <= '9') do Inc(Source);
   end
   else begin
     Inc(Source);
     EscapeValue := Ord('''') shl 8 or Ord('''');
     while (Source^ <> #0) do begin
       W := PWORD(Source)^;
       if W = EscapeValue then
         Inc(Source)
       else
         if Source^ = Quote then Break;
       Inc(Source)
     end;
     if Source^ <> #0 then Inc(Source);
   end;

   LexemItem.Length := Source - pValue;
end;

{ TDCSQLSyntaxData }

constructor TDCSQLSyntaxData.Create;
begin
  inherited;
  FKeyWords := ' ADD ALL ALTER ANY AS ASC AUTHORIZATION AVG BACKUP BEGIN' +
               ' BETWEEN BREAK BROWSE BULK BY CASCADE CASE CHAECK CHECKPOINT' +
               ' CLOSE CLUSTERED COLESCE COLUMN COMMIT COMMITED COMPUTE' +
               ' CONFIRM CONSTRAINT CONTAINS CONTAINSTABLE CONTINUE' +
               ' CONTROLROW CONVERT COUNT CREATE CROSS CURRENT CURRENT_DATE' +
               ' CURRENT_TIME CURRENT_TIMESTAMP CURRENT_USER CURSOR DATABASE' +
               ' DBCC DEALLOCATE DECLARE DEFAULT DELETE DENY DESC DISK' +
               ' DISTINCT DISTRIBUTED DOUBLE DROP DUMMY DUMP ELSE END ERRLVL' +
               ' ERROREXIT EXCEPT EXEC EXECUTE EXISTS' +

               ' EXIT FETCH FILE FILLFACTOR FLOPPY FOR FOREIGN FREETEXT' +
               ' FREETEXTTABLE FROM FULL GOTO GRANT GROUP HAVING HOLDLOCK' +
               ' IDENTITY IDENTITY_INSERT IDENTITYCOL IF INDEX INNER' +
               ' INSERT INTERSECT INTO IS ISOLATION JOIN KEY KILL LEFT' +
               ' LEVEL LIKE LINENO LOAD MAX MIN MIRROREXIT NATIONAL NOCHECK' +
               ' NONCLUSTERED NULL NULLIF OF OFF OFFSETS ON ONCE ONLY' +
               ' OPEN OPENDATASOURCE OPENQUERY OPENROWSET OPTION ORDER' +
               ' OUTER OVER PERCENT PERM PERMANENT PIP PLAN PRECISION' +

               ' PRIMARY PRINT PRIVILEGES PROC PROCEDURE PROCESSEXIT PUBLIC' +
               ' RAISERROR READ READTEXT RECONFIGURE REFERENCES REPEATABLE' +
               ' REPLICATION RESTORE RESTRICT RETURN REVOKE RIGHT ROLLBACK' +
               ' ROWCOUNT ROWGUIDCOL RULE SAVE SCHEMA SELECT SERIALIZABLE' +
               ' SESSION_USER SET SETUSER SHUTDOWN SOME STATISTICS SUM' +
               ' SYSTEM_USER TABLE TAPE TEMP TEMPORARY TEXTSIZE THEN TO TOP' +
               ' TRAN TRANSACTION TRIGGER TRUNCATE TSEQUAL UNCOMMITTED UNION' +
               ' UNIQUE UPDATE UPDATETEXT USE USER VALUES VARYING VIEW WAITFOR' +
               ' WHEN WHERE WHILE WITH WORK WRITETEXT PREPARE ';

  FSysWords := ' SYSLOGINS SYSOBJECTS SYSCOLUMNS SYSINDEXES SYSUSERS SYSMEMBERS' +
               ' SYSTEM IN OR AND NOT ';

  FSymbols  := '`~!@#$%^&*()-+=|\{[}]:;<,>.?/"''';
  FQuotes    := '#''"';
  FNumbers   := '$';

  FOpenComment1  := BuildComment('/*');
  FCloseComment1 := BuildComment('*/');
  FOpenComment2  := 0;
  FCloseComment2 := 0;

  FEOLComment1  := BuildComment('--');
  FEOLComment2  := 0;
end;

function TDCSQLSyntaxData.GetNumber(Source: PChar;
  var LexemItem: TLexemItem): boolean;
begin
  if Source^ = '$' then
    Result := GetHexNumber(Source, LexemItem)
  else
    Result := GetDecNumber(Source, LexemItem)
end;

function TDCSQLSyntaxData.GetString(Source: PChar;
  var LexemItem: TLexemItem): boolean;
 var
  Quote : Char;
  pValue: PChar;
  W, EscapeValue: WORD;
begin
   LexemItem.Item := lxString;

   Result   := True;
   Quote    := Source^;
   pValue   := Source;

   Inc(Source);
   EscapeValue := Ord('''') shl 8 or Ord('''');
   while (Source^ <> #0) do begin
     W := PWORD(Source)^;
     if W = EscapeValue then
       Inc(Source)
     else
       if Source^ = Quote then Break;
     Inc(Source)
   end;
   if Source^ <> #0 then Inc(Source);

   LexemItem.Length := Source - pValue;
end;

end.
