unit BWTCompressUnit;
{-------------------------------------------------------------------------------
  Burrows Wheeler Transformation
  Block Compression Unit
  ------------------------------
  resource (C) 1998, 1999 Victor Kasenda / gruv
  http://members.tripod.com/~gruv/resource

  Desc:
    This is the class that brings all the engines together.
    It uses the FSortUnit, MTFEncoder, StrucAriEncoder.
    The whole compression for a block consists of:
      1) Burrows Wheeler Transformation (Sort + Retrieve last column)
      2) Move To Front encoding
      3) Structured Arithmetic encoding

    Brief Explanation:
      1) BWT is the trick to the high performance compression
      2) Move to Front coding is done to transform the block into a series
         of numbers. The more frequantly appearing characters will thus be
         transformed to lower numbers, resulting a low numbers dominating the
         block (0 and 1s especially). This aids Arithmetic coding.
      3) Arithmetic coding is performed with a structured or hierarchical model.
         Read the system doc for more information about the structured
         arithmetic model.
      For a more in depth discussion of the compression process, refer
      to the system doc.

  Usage:
    - just create the object and call CompressBlockToFile
      CompressBlockToFile writes out the data header and the data
    - to not use the structured arithmetic encoder, undefine USE_STRUC_ARI

  Notes:
    - read notes.txt for information about the block swapping technique used
    - certain debug procedures have been commented out to prevent hints
    - the general rule is pass only what is needed for the engine wrappers
-------------------------------------------------------------------------------}


(**) interface (**)
uses // delphi
     Classes, Forms, SysUtils, Dialogs,
     // general
     OFile, StructsUnit, ArchiveFileUnit, ArchiveHeadersUnit,
     // engine
     RLEUnit, FSortUnit, MTFEncoderUnit, MTFDecoderUnit, BWTExpandUnit,
     FileStrucAriEncoderUnit,
     // base
     BWTBaseUnit;

type
  TCompressor = class(TBWTBase)
  private
    //block1, block2: PBlock;
    index: PLongintBlock;

    // Debug
    {original_block: PBlock;
    recovered_block: PBlock;}
    {Expander: TExpander;}

    // Classes
    FastSorter: TFastSorter;
    MTFEncoder: TMTFEncoder;
    FileStrucAriEncoder: TFileStrucAriEncoder;
    {RunLengthEncoder: TRunLengthEncoder;}

    // Main compression routines
    {procedure AllocateStructs;
    procedure FreeStructs;}

    procedure InitStructs;
    procedure SortBlock(var block_length: longint);
    procedure MTFGetTransformedBlock(var block_length, first_sym_index, virtual_char_index: longint);
    procedure AriEncodeBlock(ArchiveFile: TArchiveFile; block_length: longint);
    {procedure GetTransformedBlock(var first_sym_index, virtual_char_index: longint);
    procedure MTFEncodeBlock;}
    {procedure RLEEncode;}



    // Debug
    {procedure DoBlockRecover;
    procedure DumpBlock(var b; bsize: longint; FileName: string);
    procedure DumpSortedBlock;
    procedure DumpTransformedBlock;
    procedure DumpRecoveredBlock;
    procedure CheckSortedBlock;
    {procedure CheckRecoveredBlock;}

    // Debug output
    procedure DebugShowDoingSorting;
    procedure DebugShowDoingTransform;
    {procedure DebugShowDoingMTF;}
    procedure DebugShowDoingAriCompress;

  public
    constructor Create;
    destructor Destroy; override;

    function GetInBlock: PBlock;  // fill the inblock then compress it
    procedure CompressInBlockToFile(block_length: longint; ArchiveFile: TArchiveFile;
                                    var packed_size: integer);

  end;


(**) implementation (**)
uses ErrorUnit, main, DebugFormUnit,
     ConfigUnit;


constructor TCompressor.Create;
begin
  inherited Create;
  //AllocateStructs;
  FastSorter := TFastSorter.Create;
  MTFEncoder := TMTFEncoder.create;
  FileStrucAriEncoder := TFileStrucAriEncoder.Create;

  {Debug}
  {Expander := TExpander.Create;}
end;

destructor TCompressor.Destroy;
begin
  {Debug}
  {Expander.Free;}

  FileStrucAriEncoder.Free;
  MTFEncoder.Free;
  FastSorter.Free;
  //FreeStructs;
  inherited Destroy;
end;

(*
{-------------------------------------------------------------------------------
  AllocateStructs
  ---------------

  Allocate memory for the block transformation and assign in_block and out_block
-------------------------------------------------------------------------------}
procedure TCompressor.AllocateStructs;
begin
  New(block1);
  New(block2);
  New(index);

  // Debug
  {New(recovered_block);
  New(original_block);}
end;

{-------------------------------------------------------------------------------
  FreeStructs

  Free whatever memory that was allocated by AllocateStructs
-------------------------------------------------------------------------------}
procedure TCompressor.FreeStructs;
begin
  // Debug
  {Dispose(original_block);
  Dispose(recovered_block);}

  Dispose(index);
  Dispose(block2);
  Dispose(block1);
end;
*)

{-------------------------------------------------------------------------------
  InitStructs

  inits swap block structures.
  pass the block1 to be assigned
  inits the index.
  Assigns an index to every position in block. Each entry in index indicates the
  start of a string.
-------------------------------------------------------------------------------}
procedure TCompressor.InitStructs;
begin
  // Assign block pointers for the swapblocks system
  // in_block was assigned when GetInBlock was called. it took block1
  out_block := BlockMan.block2;
  index := BlockMan.longintblock1;
end;

function TCompressor.GetInBlock: PBlock;  // fill the inblock then compress it
begin
  in_block := BlockMan.block1;
  result := BlockMan.block1;
end;

{-------------------------------------------------------------------------------
  CompressBlockToFile
  -------------------
  Writes out the data header + data

  IN Assertion: ArchiveFile has been seeked to the next write position
  OUT Assertion: ArchiveFile is seeked to the next output position
-------------------------------------------------------------------------------}
procedure TCompressor.CompressInBlockToFile;
var
  data_header_offset,             // offset of the data header
  next_free_pos: integer;         // the next output position when ArchiveFile is returned
  ari_data_size: longword;        // size of the arithmetic data
  crc: longword;                  // crc calculated for this block
  first_sym_index, virtual_char_index: longint;
  DataBlockHeader: TDataBlockHeader;             // the data header
begin
  {Compression process:
  Sort
  Transform
  Move To Front
  Ari Code}

  // reserve space for the block size first
  data_header_offset := ArchiveFile.Position;
  ArchiveFile.ReserveSpace(DATA_HEADER_SIZE);

  ArchiveFile.ResetBuffer;

  InitStructs;
  CalculateCRC32(in_block, block_length, crc);
  SortBlock(block_length);
  MTFGetTransformedBlock(block_length, first_sym_index, virtual_char_index);
  AriEncodeBlock(ArchiveFile, block_length);

  // save the current position
  next_free_pos := ArchiveFile.Position;

  // some calculations
  ari_data_size := next_free_pos - data_header_offset -DATA_HEADER_SIZE;

  // seek back to start of data block to write the data header of this block
  ArchiveFile.SmartSeek(data_header_offset, soFromBeginning);

  DataBlockHeader := TDataBlockHeader.Create;
  with DataBlockHeader do
  begin
    crc32 := crc;
    compressed_size := ari_data_size;
  end;
  DataBlockHeader.first_sym_index := first_sym_index;
  DataBlockHeader.virtual_char_index := virtual_char_index;
  DataBlockHeader.WriteToFile(ArchiveFile);
  DataBlockHeader.Free;

  // seek back to where we left off
  ArchiveFile.SmartSeek(next_free_pos, soFromBeginning);

  // allow screen update
  Application.ProcessMessages;

  // return values
  packed_size := ari_data_size;
end;


{-------------------------------------------------------------------------------
  RLEEncode

  Run Length Encode the block for faster sorting.
  OUT Assertion: block_length is set to the new length
-------------------------------------------------------------------------------}

{procedure TCompressor.RLEEncode;
var
  RLEEncoder: TRunLengthEncoder;
begin
  RLEEncoder := TRunLengthEncoder.Create;
  RLEEncoder.EncodeBlock(in_block, out_block, block_length, block_length);
  RLEEncoder.Free;
  SwapBlocks;
end;}


{-------------------------------------------------------------------------------
  SortBlock
-------------------------------------------------------------------------------}
procedure TCompressor.SortBlock(var block_length: longint);
var
  i: longint;
begin
  for i := 0 to block_length-1 do
    index[i] := i;

  DebugShowDoingSorting;
  FastSorter.SortBlock(in_block, index, block_length);

  // SadaSort adds a virtual char
  inc(block_length);

  // debug check
  {DumpSortedBlock;}
  {CheckSortedBlock;}
  // in_block is not changed, only Index is created.
  // swapblocks need not be called
end;

{-------------------------------------------------------------------------------
  GetTransformedBlock and MTF encode

  Get the last column l
-------------------------------------------------------------------------------}
procedure TCompressor.MTFGetTransformedBlock(var block_length, first_sym_index, virtual_char_index: longint);
var
  i, j: longint;
begin
  DebugShowDoingTransform;
  MTFEncoder.Init;

  // mirror the last byte in block to block[-1], so when i = 0, block^[i-1] works
  in_block^[-1] := in_block^[block_length-1];

  // sada sort account for vitual. don't pass it to the mtf.
  // we remove it from out_block and store its index.
  i := 0;  // in_block index
  j := 0;  // out_block index
  virtual_char_index := -2;

  while (i < block_length) do
  begin
    if (index[i] = 1) then
      first_sym_index := i;

    // the virtual char is accessed when in_block[-1] is accessed
    if ((index[i]-1) = -1) then
      virtual_char_index := j // we skip the virtual char
    else
    begin
      out_block[j] := MTFEncoder.Encode(in_block[index[i]-1]);
      inc(j);
    end;

    inc(i);
  end;

  if (virtual_char_index = -2) then
  begin
    // fatal error: virtual_char_index may not have been initialized at all
    ShowError('virtual_char_index not initialized.');
  end;

  // we have taken out the virtual char, so we dec block_length
  dec(block_length);

  SwapBlocks;
end;

{-------------------------------------------------------------------------------
  AriEncodeBlock

  Notes:
  Arithmetic compress block and output block
-------------------------------------------------------------------------------}
procedure TCompressor.AriEncodeBlock(ArchiveFile: TArchiveFile; block_length: longint);
begin
  DebugShowDoingAriCompress;
 // FileStrucAriEncoder := TFileStrucAriEncoder.Create;  
  FileStrucAriEncoder.EncodeBlock(ArchiveFile, in_block, block_length);

  // debug check
  {DecodeBlock(recovered_block, rsize);
  CompareBlocks(mtf_block, recovered_block, block_length, 'Decompression error.');}
end;


(*
procedure TCompressor.GetTransformedBlock(var first_sym_index, virtual_char_index: longint);
var
  i, j: longint;
begin
  DebugShowDoingTransform;

  // mirror the last byte in block to block[-1], so when i = 0, block^[i-1] works
  in_block^[-1] := in_block^[block_length-1];

  // sada sort account for vitual. don't pass it to the mtf.
  // we remove it from out_block and store its index.
  i := 0;  // in_block index
  j := 0;  // out_block index
  virtual_char_index := -2;

  while (i < block_length) do
  begin
    if (index^[i] = 1) then
      first_sym_index := i;

    // the virtual char is accessed when in_block[-1] is accessed
    if ((index^[i]-1) = -1) then
      virtual_char_index := j // we skip the virtual char
    else
    begin
      out_block^[j] := in_block^[longint(index[i])-1];
      inc(j);
    end;

    inc(i);
  end;

  //ShowMessage('Virtual char index: ' + IntToStr(virtual_char_index));

  if (virtual_char_index = -2) then
  begin
    // fatal error: virtual_char_index may not have been initialized at all
    ShowError('virtual_char_index not initialized.');
  end;

  // we have taken out the virtual char, so we dec block_length
  dec(block_length);

  // debug check
  {DumpTransformedBlock;}
  {DoBlockRecover;
  CheckRecoveredBlock;}
  SwapBlocks;
end;

{-------------------------------------------------------------------------------
  MTFEncodeBlock
-------------------------------------------------------------------------------}
procedure TCompressor.MTFEncodeBlock;
var
  MTFEncoder: TMTFEncoder;
  {MTFDecoder: TMTFDecoder;}
begin
  DebugShowDoingMTF;

  MTFEncoder := TMTFEncoder.create;
  MTFEncoder.EncodeBlock(in_block, out_block, block_length);
  MTFEncoder.free;

  SwapBlocks;

  // debug check
  {MTFDecoder := TMTFDecoder.create;
  MTFDecoder.DecodeBlock(mtf_block, recovered_block, block_length);
  MTFDecoder.free;}
end;
*)


(*******************************************************************************
  Debuging routines
*******************************************************************************)

(*
procedure TCompressor.DoBlockRecover;
{var
  RecoveredBlockLength: Longint;}
begin
  //Expander.ExpandBlock(block, recovered_block, first_sym_index, block_length, RecoveredBlockLength);
  //Expander.ExpandBlock(transformed_block, recovered_block, first_sym_index, block_length, RecoveredBlockLength);
end;


{-------------------------------------------------------------------------------
  DumpSortedBlock
  ---------------
  Dumps the data in block sorted in alphabetical order.
  Used to visually confirm the reliability of the sorting algorithm.
-------------------------------------------------------------------------------}
procedure TCompressor.DumpSortedBlock;
var
  f: text;
  i: integer;
begin
  AssignFile(f, 'c:\ctest\SortedBlockDump.txt');
  Rewrite(f);
  writeln(f, 'Sorted Block Dump file');
  writeln(f, 'reSource eXperimental (C) 1997 F-inc');
  writeln(f, '=======================================');
  writeln(f, 'block_length: ', block_length);
  writeln(f, '=======================================');
  for i := 0 to block_length-1 do
    {if (index^[i] = block_length) then
      write(f, '?')
    else}
      //write(f, char(block^[index^[i]]));
  Close(f);
end;

{-------------------------------------------------------------------------------
  DumpBlock
  ---------
  Dumps the block, b to a file.
  Used by DumpTransformedBlock
-------------------------------------------------------------------------------}
procedure TCompressor.DumpBlock(var b; bsize: longint; FileName: string);
var
  f: TOFile;
begin
  f := TOFile.create(FileName);
  f.Rewrite(1);
  f.BlockWrite(b, block_length);
  f.free;
end;

procedure TCompressor.DumpRecoveredBlock;
begin
  DumpBlock(recovered_block^, block_length, 'c:\ctest\out Recovered Block.txt');
end;

{-------------------------------------------------------------------------------
  DumpTransformedBlock
  --------------------
  Dumps the transformed block to file.
  This is actually L, or the last column in the transformation matrix.

  IN Assertion: DoBlockTransform was called.
-------------------------------------------------------------------------------}
procedure TCompressor.DumpTransformedBlock;
begin
//  DumpBlock(block^, block_length, 'c:\ctest\out Transformed Block.txt');
end;

*)

{-------------------------------------------------------------------------------
  CheckSortedBlock
  ----------------
  Checks the sorted block for ascending order.
  Only displays an error when one has occured.
-------------------------------------------------------------------------------}
(*
procedure TCompressor.CheckSortedBlock;
var
  i: integer;
begin
  {Checks: INBLOCK
  Assertion: Index has been created}
  
  i := 1;
  while (i < block_length-1) and (in_block^[Index^[i]] >= in_block^[Index^[i-1]]) do
    inc(i);

  {An error has occured if i did not reach the end of block}
  if (i < block_length-1) then
    ShowError('Block not sorted correctly');
end;


{-------------------------------------------------------------------------------
  CheckRecoveredBlock
  -------------------
  Does a byte to byte comparison of the recovered block and the original block.
  Shows an error and the position where the first different byte was found.
-------------------------------------------------------------------------------}
procedure TCompressor.CheckRecoveredBlock;
var
  i: longint;
begin
  //DumpRecoveredBlock;

   {recovered_block must be the same as original block}
   for i := 0 to block_length-1 do
     if recovered_block^[i] <> original_block^[i] then
     begin
       ShowError('Recovered block differs from original block at ' + IntToStr(i));
       break;
     end;


   {Alternate way of comparing using CompareMem.
   Position of difference start will not be shown.

   if not CompareMem(recovered_block, block, block_length-1) then
   ShowError('Recovered block differs from original block');}
end;
*)

(*******************************************************************************
  Debug Output routines
*******************************************************************************)
procedure TCompressor.DebugShowDoingSorting;
begin
  if ConfigMan.ShowDebugForm then DebugForm.DoingSorting;
end;

procedure TCompressor.DebugShowDoingTransform;
begin
  if ConfigMan.ShowDebugForm then DebugForm.DoingTransform;
end;

{procedure TCompressor.DebugShowDoingMTF;
begin
  if ConfigMan.ShowDebugForm then DebugForm.DoingMTF;
end;}

procedure TCompressor.DebugShowDoingAriCompress;
begin
  if ConfigMan.ShowDebugForm then DebugForm.DoingAriCompress;
end;


end.


