unit FSortUnit;
{-------------------------------------------------------------------------------
  Fast sorter unit
  ----------------
  resource (C) 1998, 1999 Victor Kasenda / gruv
  http://members.tripod.com/~gruv/resource

  Fast sort unit.
  Algos:
    DJ Wheeler from his June 1989 report and
    Kunihiko Sadakane's Suffix sort.

  coded by gruv

  Notes:
  Sort the index, not the block.
  SadaSort compares group indexes not block.

  Sort rev 4:
  Radix on symbol pairs.
  Sadakane's Suffix sort.

------------------------------------------------------------------------------}


(**) interface (**)
uses SysUtils, Forms, dialogs, StructsUnit;

const
  STRIDE = 4;
  MAXDEPTH = 20;
  NUMOVERSHOOT = MAXDEPTH + 100;


type
  {THead = array[0..65535] of Longint;
  PHead = ^THead;}


  TFastSorter = class
  private
    block: PBlock;                // block to sort

    index: PLongintBlock;         // index to the block to sort. each index pos is a string
    block_length: longint;        // length of the block

    last_index: integer;

    head: P64kBlock;              // head of the linked list
    link: PLongintBlock;         // links in the linked list
    //link_count: PHead;            // Number of links in each head
    //index_head: PHead;            // start of each group in index

    group: PLongintBlock;         // group of suffix s
    size: PLongintBlock;
    {For SadaSort: from the paper
    I -> index
    V -> group
    S -> size}

    procedure RadixSortOnSymbolPairs;
    procedure InitIndexFromLink;

    procedure SadaSort;
    procedure SortGroup(const stlo, sthi, depth: integer);
  public
    constructor Create;
    destructor Destroy; override;

    procedure SortBlock(const _block: PBlock; const _index: PLongintBlock; const _block_length: longint);
  end;


(**) implementation (**)
uses ErrorUnit, Main;

{-------------------------------------------------------------------------------
  Create/Destroy
  --------------

  Allocates and frees the memory structures used for sorting.
-------------------------------------------------------------------------------}
constructor TFastSorter.Create;
begin
  inherited Create;
  {New(head);
  New(link);
  //New(link_count);
  //New(index_head);

  New(group);
  New(size);}
end;

destructor TFastSorter.Destroy;
begin
  {Dispose(size);
  Dispose(group);

  //Dispose(index_head);
  //Dispose(link_count);
  Dispose(link);
  Dispose(head);}
  inherited Destroy;
end;

{-------------------------------------------------------------------------------
  SortBlock
  ---------

  Main procedure to call.
  Initializes the block then calls the respective procedures to sort the block.
-------------------------------------------------------------------------------}
procedure TFastSorter.SortBlock(const _block: PBlock; const _index: PLongintBlock; const _block_length: longint);

  procedure Initialize;
  var
    i: integer;
  begin
    {Initialize}
    block := _block;
    index := _index;
    block_length := _block_length;
    last_index := block_length-1;

    // sizes array need not be cleared. it will be init.

    // assign block memory
    // index -> longintblock1
    head := BlockMan.k64Block;
    link := BlockMan.longintblock2;
    group := BlockMan.longintblock2;
    size := BlockMan.longintblock3;

    {Clear Arrays}
    for i := 0 to 65535 do
      head[i] := -1;


  end;




var
  head_idx, cur_head: longword;
  first_char: byte;
  i, numkeys, first_head: integer; // numkeys: total number of keys with first_char
  t: longword;
  totalbytes: integer; // for progress bar

begin
  totalbytes := 0;

  Initialize;
  RadixSortOnSymbolPairs;   // fill up head and link
  InitIndexFromLink;        // get index in semi sorted order and index_head
  SadaSort;
end;

{-------------------------------------------------------------------------------
  RadixSortOnSymbolPairs
  ----------------------

  Radix sort: Run through the block array in words to get the buckets and dump
  the indexes into their respective bucket.

  Initializes long_block with each long integer straddling 4 bytes.

  OUT Assertion:
  head/link are linked lists to the sort.
  long_block is initialized
-------------------------------------------------------------------------------}
procedure TFastSorter.RadixSortOnSymbolPairs;
var
  i: integer;
  w: word;
begin
  {Init w with the first character}
  w := block^[0];

  for i := 0 to last_index-1 do
  begin
    w := word(w shl 8) or block^[i+1];

    {if there is no entry in head then set the pos as the head.
    Otherwise link the pos in by making it the head and setting its link}
    if (head^[w] = -1) then
    begin
      head^[w] := i;
      link^[i] := -1;
      {Set link^[i] to -1 as the terminator}
    end
    else
    begin
      link^[i] := head^[w];
      head^[w] := i;
    end;
  end; {for}


end;



{-------------------------------------------------------------------------------
  InitIndexFromLink
  -----------------

  Out Assertion:
  Inits index, index_head and link_count.
  index_head will point to the head of each symbol pair in index.
  link_count is the count for each symbol pair corresponding in head.
  index will contain a continuous list of indexes. These indexes are in groups
  with their head pointed to by index_head and counts in link_count.
  Quicksort will sort the index.
  head no more used.

  Desc:
  This will run through the head array.
  It will fill in the index_head with all valid entries from head.
  It is therefore possible that index_head be smaller than head, because all
  -1 entries are removed.

  The current index position is then filled with the head value.
  If there is a head, there may be links. So the links are filled in trailing
  after the head value until a -1 terminator is reached.

  Note:
  link_count includes the head node and all other link nodes.
  link_count corresponds to the new def. of head, NOT the old one.
  link_count[i] is the count for index_head[i].

  All -1 or 'no entries' in index_head have been removed. index_head is a continuous list
  of heads in index.
  The end of index_head is marked by a -1.

  New:
  use link and head to init index, index_head, link_count, size
  index_pos walks through to fill in index with the semi sorted indexes.
  after this, link and head are no more used.
  link and group share the same memory location
-------------------------------------------------------------------------------}

procedure TFastSorter.InitIndexFromLink;
var
  i, index_pos, head_pos, cur_node, t: longint;

  group_num: integer;
  group_first_index: integer;
  group_size: integer;

  w: word;
begin
  index_pos := 1;  // start from 1 for virtual smallest character. for circular start from 0
  head_pos := 0;

  // due to the last char being the smallest char, we must fill in manually
  // link for that one.
  // if actual last is 'e', then we get 'e$00' and we add to the head.
  w := word(block[last_index] shl 8);
  {if there is no entry in head then set the pos as the head.
  Otherwise link the pos in by making it the head and setting its link}
  if (head^[w] = -1) then
  begin
    head^[w] := last_index;
    link^[last_index] := -1;
    // Set link[i] to -1 as the terminator
  end
  else
  begin
    link^[last_index] := head^[w];
    head^[w] := last_index;
  end;



  {go through each radix bucket}
  for i := 0 to 65535 do
  begin
    cur_node := head^[i];

    if (i = w) then
    begin
      // the link with the virtual smallest char is the first one
      // we give it it's own group number, remove it from the linked list
      // and continue as if this never happened
      // cur_node is the index
      // index_pos is the group number
      Assert(cur_node = last_index);
      index[index_pos] := cur_node;
      size[index_pos] := 1;
      // link and group share the same memory location. update cur_node then
      // assign the group number because we'll never access that link again.
      cur_node := link[cur_node];     // take out the memory contents
      group[last_index] := index_pos; // override it
      inc(index_pos);
    end;


    if (cur_node <> -1) then
    begin
      {Head now points to the head of a symbol pair linked list in index}
      //index_head^[head_pos] := index_pos;
      //link_count^[head_pos] := 0;

      // walk the linked list
      group_num := index_pos;  // group_num is i
      group_first_index := cur_node;
      group_size := 0;

      repeat
        // collate the nodes in index
        index[index_pos] := cur_node;
        t := cur_node;   // save the cur_node
        cur_node := link[cur_node];
        // fill in the group number for index_pos
        // override previous memory location in link with the group_num
        group[t] := group_num;  // group[index[index[pos]] or V[I[i]]

        inc(index_pos);
        inc(group_size); // inc(link_count[head_pos]);
      until (cur_node = -1);

      // fill in the group size in size[group_num]
      size[group_num] := group_size;
      //size[group_num] := link_count[head_pos];

      inc(head_pos);
    end;
  end;

  //index_head^[head_pos] := -1;


  // init the virtual smallest character
  block[block_length] := 0;
  index[0] := block_length;
  size[0] := -1;          // sorted, 1 char only
  group[index[0]] := 0;   // first group}
end;

{Notes:
group and index init from 1 to block_size
0 is the virtual smallest char. compare with index[0]=block_size should
be greater. note that index[0] may not contain block_size}
procedure TFastSorter.SortGroup(const stlo, sthi, depth: integer);

  {Swap - swaps 2 values v1 and v2 }
  procedure Swap(var v1, v2: longword); overload;
  var
    t: longword;
  begin
    t := v1;
    v1 := v2;
    v2 := t;
  end;

  {Swap - swaps 2 values v1 and v2 }
  procedure Swap(var v1, v2: longint); overload;
  var
    t: longword;
  begin
    t := v1;
    v1 := v2;
    v2 := t;
  end;

  {Vector swap}
  procedure VecSwap(p1, p2, n: longword);
  {var
    t: longword;}
  begin
    while (n > 0) do
    begin
      {Swap p1, p2}
      {t := p1;
      p1 := p2;
      p2 := t;}
      Swap(index[p1], index[p2]);

      inc(p1); inc(p2); dec(n);
    end;
  end;

  {Median of 3}
  function Med3(a, b, c: byte): byte; overload;
  var
    t: byte;
  begin
    if (a > b) then
    begin
      {Swap a, b}
      t := a; a := b; b := t;
    end;
    if (b > c) then
    begin
      {Swap b, c}
      t := b;
      b := c;
      c := t;
    end;
    if (a > b) then b := a;
    result := b;
  end;

  function Min(a, b: integer): integer;
  begin
    if (a < b) then
      result := a
    else
      result := b;
  end;

  function Med3(a, b, c: longword): longword; overload;
  var
    t: longword;
  begin
    if (a > b) then
    begin
      {Swap a, b}
      t := a; a := b; b := t;
    end;
    if (b > c) then
    begin
      {Swap b, c}
      t := b;
      b := c;
      c := t;
    end;
    if (a > b) then b := a;
    result := b;
  end;

  function Med3(a, b, c: integer): integer; overload;
  var
    t: integer;
  begin
    if (a > b) then
    begin
      {Swap a, b}
      t := a; a := b; b := t;
    end;
    if (b > c) then
    begin
      {Swap b, c}
      t := b;
      b := c;
      c := t;
    end;
    if (a > b) then b := a;
    result := b;
  end;

  {function NormIdx(idx: integer): integer;
  begin
    repeat
      if (idx > last_index) then
        dec(idx, last_index)
      else
      begin
        result := idx;
        exit;
      end;
    until false;
  end;}

  procedure QSort3(lo, hi: integer);
  {lo, hi: first and last element
  Note: we will compare group numbers
  the depth of comparison is constant througout the recursion}
  var
    a, b, c, d: integer; // may become negative?
    r: integer;
    med: integer; // byte
    i, group_num: integer;
  begin
    if (hi-lo < 1) then
    begin
      // 1 item only. assign it a group
      if (hi = lo) then
      begin
        group[index[hi]] := hi;
        size[hi] := 1;
      end;
      exit;
    end;

    med := Med3(group[index[lo] + depth],
                group[index[hi] + depth],
                group[index[(lo + hi) shr 1] + depth]);


    a := lo;
    b := lo;
    c := hi;
    d := hi;

    while true do
    begin

      { = < }
      { find item greater than med, while swapping equal items to the left }
      while (b <= c) and (group[index[b] + depth] <= med) do
      begin
        if (group[index[b] + depth] = med) then
        begin
          Swap(index[a], index[b]);
          inc(a);
        end;
        inc(b);
      end;

      { > = }
      { find item smaller than med, while swapping equal items to the right }
      while (b <= c) and (group[index[c] + depth] >= med) do
      begin
        if (group[index[c] + depth] = med) then
        begin
          Swap(index[c], index[d]);
          dec(d);
        end;
        dec(c);
      end;

      if (b > c) then break;

      // swap b and c
      Swap(index[b], index[c]);
      inc(b);
      dec(c);
    end;

    {b = c+1 once we are out}
    Assert(b = c+1);
    //if b <> (c+1) then ShowMessage('bc');

    {final arrangment:
    lo  a  c  b  d  hi
    d is next avail pos. d+1 to hi: = items
    a is next avail pos. lo to a-1: = items}

    {left centre right}

    {swap enough to get left from '= <' to '< ='
    a-lo: num of = items
    b-a: num of < items
    r gives the min items to swap}
    r := min(a-lo, b-a);
    VecSwap(lo, b-r, r);

    {swap enough to get right from '> =' to '= >'
    d-c: num of > items
    hi-d: num of = items}
    r := min(d-c, hi-d);
    VecSwap(b, hi-r+1, r);

    // sort from higher to lower
    // for equal items update their group numbers to the same group

    r := d-c;                          // num of '>' items
    QSort3(hi-r+1, hi);  // sort right

    r := (a-lo) + (hi-d);
    {QSort3(lo+b-a, lo+b-a+r-1);      // sort middle}
    group_num := lo+b-a;
    for i := lo+b-a to lo+b-a+r-1 do   // give the '=' items the same group number
      group[index[i]] := group_num;
    size[group_num] := r;

    r := b-a;                          // size of '<' items
    QSort3(lo, lo + r - 1);     // sort left

  end; {QSort3}

begin
  QSort3(stlo, sthi);
end;

procedure TFastSorter.SadaSort;
var
  i, k: integer;
  first_i: integer;
  group_size: integer;

begin
  // sort unsorted groups
  // go through the size array. anything with size 1 we ignore and add to the
  // previous group size
  // if first_i = -1 that means first_i not avail and next sorted group can
  // be first_i

  // blocksize has increased by 1 because of the vitual char
  inc(block_length);

  // keep sorting until all has been sorted
  k := 2;
  while (abs(size[0]) < (block_length-1)) do
  begin
    i := 0;//i := abs(size[0]);
    first_i := -1;

    repeat

      if (size[i] < 0) then
      begin
        if (first_i = -1) then
        begin
          first_i := i; // we can add further sorted groups to this group
          inc(i, abs(size[i])); // skip this group
        end
        else
        begin
          Assert(size[first_i] < 0);
          inc(size[first_i], size[i]); // add to the first_i
          inc(i, abs(size[i]));        // skip, because it is sorted and group has been combined
        end;
      end
      else if (size[i] = 1) then
      begin
        if (first_i = -1) then
        begin
          first_i := i; // we can add further sorted groups to this group
          size[first_i] := -1;  // make this the head sorted group
        end
        else
        begin
          Assert(size[first_i] < 0);
          dec(size[first_i]);   // add this group to the first_i
        end;
        inc(i);
      end
      else
      begin
        // group size > 1 sort it
        group_size := size[i];
        SortGroup(i, i + size[i]-1, k);

        inc(i, group_size); // size[i] may change after sort group
        first_i := -1;
      end;
    until (i >= block_length);  // while (i < block_length-1)

    k := k * 2;
  end;

end;


end.

