{
Some routines for string handling on a higher level than those
provided by the RTS.

Copyright (C) 1999-2000 Free Software Foundation, Inc.

Author: Frank Heckenbach <frank@pascal.gnu.de>

This file is part of GNU Pascal.

GNU Pascal is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.

GNU Pascal is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with GNU Pascal; see the file COPYING. If not, write to the
Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
02111-1307, USA.

As a special exception, if you link this file with files compiled
with a GNU compiler to produce an executable, this does not cause
the resulting executable to be covered by the GNU General Public
License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General
Public License.
}

{$gnu-pascal}
{$if __GPC_RELEASE__ < 20000814}
{$error This unit requires GPC release 20000814 or newer.}
{$endif}

unit StringUtils;

interface

uses GPC;

{ Returns the number of disjoint occurences of SubStr in s. Returns
  0 if SubStr is empty. }
function  StrCount (const SubStr : String; s : String) : Integer;

{ Returns s, with all disjoint occurences of Source replaced by
  Dest. }
function  StrReplace (const s, Source, Dest : String) : TString;

{ Converts a digit character to its numeric value. Handles every
  base up to 36 (0 .. 9, a .. z, upper and lower case recognized).
  Returns -1 if the character is not a digit at all. If you want to
  use it for a base < 36, you have to check if the result is smaller
  than the base and not equal to -1. }
function  Char2Digit (ch : Char) : Integer;

{ Encode a string in a printable format (quoted printable and
  surrounded with `"'). All occurences of `"' within the string are
  encoded, so the result string contains exactly two `"' characters
  (at the beginning and ending). This is useful to store arbitrary
  strings in text files while keeping them as readable as possible
  (which is the goal of the quoted printable encoding in general,
  RFC 1521, section 5.1) and being able to read them back losslessly
  (with UnQuoteString). }
function  QuoteString (const s : String) : TString;

{ Decode a string encoded by QuoteString (removing the `"' and
  expanding quoted printable encoded characters). Returns True if
  successful and False if the string has an invalid form. A string
  returned by QuoteString is always valid. }
function  UnQuoteString (var s : String) : Boolean;

{ Decode a quoted-printable string (not enclosed in `"', unlike for
  UnQuoteString). Returns True if successful and False if the string
  has an invalid form. }
function  UnQPString (var s : String) : Boolean;

{ Replaces all tab characters in s with the appropriate amount of
  spaces, assuming tab stops at every TabSize columns. Returns True
  if successful and False if the expanded string would exceed the
  capacity of s. In the latter case, some, but not all of the tabs
  in s may have been expanded. }
function  ExpandTabs (var s : String; TabSize : Integer) : Boolean;

{ Returns s, with all occurences of C style escape sequences (e.g.
  `\n') replaced by the characters they mean. If AllowOctal is True,
  also octal character specifications (e.g. `\007') are replaced. If
  RemoveQuoteChars is True, any other backslashes are removed (e.g.
  `\*' -> `*' and `\\' -> `\'), otherwise they are kept, and also
  `\\' is left as two backslashes then. }
function  ExpandCEscapeSequences (const s : String; RemoveQuoteChars, AllowOctal : Boolean) : TString;

{ String hash table }

const
  DefaultHashSize = 1403;

type
  THash = Cardinal;

  PStrHashList = ^TStrHashList;
  TStrHashList = record
    Next : PStrHashList;
    s : PString;
    i : Integer;
    p : Pointer
  end;

  PStrHashTable = ^TStrHashTable;
  TStrHashTable (Size : Cardinal) = record
    CaseSensitive : Boolean;
    Table : array [0 .. Size - 1] of PStrHashList
  end;

function  HashString          (const s : String) : THash;
function  NewStrHashTable     (Size : Cardinal; CaseSensitive : Boolean) : PStrHashTable;
procedure AddStrHashTable     (HashTable : PStrHashTable; s : String; i : Integer; p : Pointer);
procedure DeleteStrHashTable  (HashTable : PStrHashTable; s : String);
function  SearchStrHashTable  (HashTable : PStrHashTable; const s : String; var p : Pointer) : Integer; { p may be null }
procedure DisposeStrHashTable (HashTable : PStrHashTable);

implementation

{$B-,I-}

function StrCount (const SubStr : String; s : String) : Integer;
var c, p : Integer;
begin
  if SubStr = '' then
    StrCount := 0
  else
    begin
      c := 0;
      p := 1;
      repeat
        p := PosFrom (SubStr, s, p);
        if p <> 0 then
          begin
            Inc (c);
            Inc (p, Length (SubStr))
          end
      until p = 0;
      StrCount := c
    end
end;

function StrReplace (const s, Source, Dest : String) = Result : TString;
var c : Integer;
begin
  Result := s;
  for c := Length (Result) - Length (Source) + 1 downto 1 do
    if Copy (Result, c, Length (Source)) = Source then
      begin
        Delete (Result, c, Length (Source));
        Insert (Dest, Result, c)
      end
end;

function Char2Digit (ch : Char) : Integer;
begin
  case ch of
    '0' .. '9': Char2Digit := Ord (ch) - Ord ('0');
    'A' .. 'Z': Char2Digit := Ord (ch) - Ord ('A') + $a;
    'a' .. 'z': Char2Digit := Ord (ch) - Ord ('a') + $a;
    else        Char2Digit := -1
  end
end;

function QuoteString (const s : String) : TString;
const HexChars : array [0 .. $f] of Char = '0123456789ABCDEF';
var
  q, t : TString;
  i : Integer;
begin
  q := s;
  i := 0;
  repeat
    i := CharPosFrom ([#0 .. Pred (' '), '"', '=', #127 .. High (Char)], q, i + 1);
    if i = 0 then Break;
    t := HexChars [Ord (q [i]) div $10] + HexChars [Ord (q [i]) mod $10];
    Insert (t, q, i + 1);
    q [i] := '=';
    Inc (i, Length (t))
  until False;
  QuoteString := '"' + q + '"'
end;

function UnQPString (var s : String) : Boolean;
var i, j : Integer;
begin
  UnQPString := False;
  repeat
    i := Pos (' ' + NewLine, s);
    if i = 0 then Break;
    j := i;
    while (j > 1) and (s [j - 1] = ' ') do Dec (j);
    Delete (s, j, i - j + 1)
  until False;
  i := 0;
  repeat
    i := PosFrom ('=', s, i + 1);
    if i = 0 then Break;
    if (i <= Length (s) - 2) and (s [i + 1] in ['0' .. '9', 'A' .. 'F', 'a' .. 'f'])
                             and (s [i + 2] in ['0' .. '9', 'A' .. 'F', 'a' .. 'f']) then
      begin
        s [i] := Chr ($10 * Char2Digit (s [i + 1]) + Char2Digit (s [i + 2]));
        Delete (s, i + 1, 2)
      end
    else if (i <= Length (s) - 1) and (s [i + 1] = NewLine) then
      begin
        Delete (s, i, 2);
        Dec (i)
      end
    else
      Exit
  until False;
  UnQPString := True
end;

function UnQuoteString (var s : String) : Boolean;
begin
  UnQuoteString := False;
  if (Length (s) < 2) or (s [1] <> '"') or (s [Length (s)] <> '"') then Exit;
  Delete (s, 1, 1);
  Delete (s, Length (s), 1);
  UnQuoteString := UnQPString (s)
end;

function ExpandTabs (var s : String; TabSize : Integer) : Boolean;
const chTab = #9;
var i, TabSpaces : Integer;
begin
  ExpandTabs := True;
  repeat
    i := Pos (chTab, s);
    if i = 0 then Break;
    TabSpaces := TabSize - (i - 1) mod TabSize;
    if Length (s) + TabSpaces - 1 > High (s) then
      begin
        ExpandTabs := False;
        Break
      end;
    Delete (s, i, 1);
    Insert (StringOfChar (' ', TabSpaces), s, i)
  until False
end;

function ExpandCEscapeSequences (const s : String; RemoveQuoteChars, AllowOctal : Boolean) = r : TString;
const chEsc = #27;
var
  i, c, Digit, v : Integer;
  DelFlag : Boolean;
begin
  r := s;
  i := 1;
  while i < Length (r) do
    begin
      if r [i] = '\' then
        begin
          DelFlag := True;
          case r [i + 1] of
            'n' : r [i + 1] := "\n";
            't' : r [i + 1] := "\t";
            'r' : r [i + 1] := "\r";
            'f' : r [i + 1] := "\f";
            'b' : r [i + 1] := "\b";
            'v' : r [i + 1] := "\v";
            'a' : r [i + 1] := "\a";
            'e',
            'E' : r [i + 1] := chEsc;
            'x' : begin
                    v := 0;
                    c := 2;
                    while i + c <= Length (r) do
                      begin
                        Digit := Char2Digit (r [i + c]);
                        if (Digit < 0) or (Digit >= $10) then Break;
                        v := $10 * v + Digit;
                        Inc (c)
                      end;
                    Delete (r, i + 1, c - 2);
                    r [i + 1] := Chr (v)
                  end;
            '0' .. '7' : if AllowOctal then
                           begin
                             v := 0;
                             c := 1;
                             repeat
                               v := 8 * v + Ord (r [i + c]) - Ord ('0');
                               Inc (c)
                             until (i + c > Length (r)) or (c > 3) or not (r [i + c] in ['0' .. '7']);
                             Delete (r, i + 1, c - 2);
                             r [i + 1] := Chr (v)
                           end
                         else
                           DelFlag := False;
            else DelFlag := False
          end;
          if DelFlag or RemoveQuoteChars then
            Delete (r, i, 1)
          else
            Inc (i)
        end;
      Inc (i)
    end
end;

function HashString (const s : String) : THash;
var Hash, i : THash;
begin
  Hash := Length (s);
  for i := 1 to Length (s) do
    (*@@$localR-*) Hash := Hash shl 2 + Ord (s [i]); (*@@$endlocal*)
  HashString := Hash
end;

function NewStrHashTable (Size : Cardinal; CaseSensitive : Boolean) = HashTable : PStrHashTable;
var i : Cardinal;
begin
  New (HashTable, Size);
  HashTable^.CaseSensitive := CaseSensitive;
  for i := 0 to HashTable^.Size - 1 do HashTable^.Table [i] := nil
end;

procedure AddStrHashTable (HashTable : PStrHashTable; s : String; i : Integer; p : Pointer);
var
  Hash : THash;
  pl : PStrHashList;
begin
  if not HashTable^.CaseSensitive then LoCaseString (s);
  Hash := HashString (s) mod HashTable^.Size;
  New (pl);
  pl^.s := NewString (s);
  pl^.i := i;
  pl^.p := p;
  pl^.Next := HashTable^.Table [Hash];
  HashTable^.Table [Hash] := pl
end;

procedure DeleteStrHashTable (HashTable : PStrHashTable; s : String);
var
  Hash : THash;
  pl : PStrHashList;
  ppl : ^PStrHashList;
begin
  if not HashTable^.CaseSensitive then LoCaseString (s);
  Hash := HashString (s) mod HashTable^.Size;
  ppl := @HashTable^.Table [Hash];
  while (ppl^ <> nil) and (ppl^^.s^ <> s) do ppl := @ppl^^.Next;
  if ppl^ <> nil then
    begin
      pl := ppl^;
      ppl^ := pl^.Next;
      Dispose (pl^.s);
      Dispose (pl)
    end
end;

function SearchStrHashTable (HashTable : PStrHashTable; const s : String; var p : Pointer) : Integer;
var
  Hash : THash;
  pl : PStrHashList;
  ps : ^const String;
  sl : String (Length (s));
begin
  if HashTable^.CaseSensitive then
    ps := @s
  else
    begin
      sl := LoCaseStr (s);
      ps := @sl
    end;
  Hash := HashString (ps^) mod HashTable^.Size;
  pl := HashTable^.Table [Hash];
  while (pl <> nil) and (pl^.s^ <> ps^) do pl := pl^.Next;
  if pl = nil then
    begin
      if @p <> nil then p := nil;
      SearchStrHashTable := 0
    end
  else
    begin
      if @p <> nil then p := pl^.p;
      SearchStrHashTable := pl^.i
    end
end;

procedure DisposeStrHashTable (HashTable : PStrHashTable);
var
  i : Cardinal;
  pl, pt : PStrHashList;
begin
  for i := 0 to HashTable^.Size - 1 do
    begin
      pl := HashTable^.Table [i];
      HashTable^.Table [i] := nil;
      while pl <> nil do
        begin
          pt := pl;
          pl := pl^.Next;
          Dispose (pt^.s);
          Dispose (pt)
        end
    end
end;

end.
