unit RLEUnit;
{-------------------------------------------------------------------------------
  Run Length Encoder Unit
  -----------------------
  resource (C) 1998, 1999 Victor Kasenda / gruv
  http://members.tripod.com/~gruv/resource

  Desc:
  This is the run length encoder for preprocessing the file before the sorting
  phase.

  Naming convention notes:
  ix: input index
  oix: output index
-------------------------------------------------------------------------------}


(**) interface (**)
uses StructsUnit;

const
  {RunThreshold number of bytes signifies the start of a run.
  4 = 4 + 0
  5 = 4 + 1
  6 = 4 + 1 bytes
  4 will expand to 5 bytes, 5 will retain, 6 will compress to 5 bytes}
  RUN_THRESHOLD = 100;

type
  TRunLengthEncoder = class
  private
    in_block, out_block: PBlock;
    block_length: longint;               // in_block length
    oix: longint;                        // index into out_block
    run_length: longint;                 // current run count
    last_symbol: byte;                   // the symbol that has a run

    procedure PutByte(const b: byte);
    procedure PutRunCount;
  public
    procedure EncodeBlock(_in_block, _out_block: PBlock; _block_length: longint;
                          var out_block_length: longint);

  end;



  TRunLengthDecoder = class
    in_block, out_block: PBlock;
    block_length: longint;                 // length of in_block
    ix, oix: longint;                      // index into input and output block

    function GetRunCount: longint;
    procedure ExpandRun;
  public
    procedure DecodeBlock(_in_block, _out_block: PBlock; _block_length: longint;
                          var out_block_length: longint);
  end;


(**) implementation (**)



////////////////////////////////////////////////////////////////////////////////
// Run Length Encoder
////////////////////////////////////////////////////////////////////////////////

{-------------------------------------------------------------------------------
  PutByte
  -------
  output a byte to out_block and increment the output index (oix)
-------------------------------------------------------------------------------}
procedure TRunLengthEncoder.PutByte(const b: byte);
begin
  out_block^[oix] := b;
  inc(oix);
end;

{-------------------------------------------------------------------------------
  PutRunCount
  -----------

  Desc:
  The count is encoded in as many 6 bit codes as needed, up to a max of 30 bits.
  The 7th bit is set if more codes follow.
  The most significant 6 bits are transmitted first.
-------------------------------------------------------------------------------}
procedure TRunLengthEncoder.PutRunCount;
var
  d: byte;
  bits_shift: shortint;
  must_put: boolean;
begin
  // Start by getting bits 25-30, then 19-24, 13-19 etc.
  // if a bigger value was set eg. 25-30, then the rest of the values must be
  // put although they may be 0
  dec(run_length, RUN_THRESHOLD);
  bits_shift := 24;
  must_put := false;
  repeat
    d := ((run_length shr bits_shift) and $3F);

    if (d > 0) or must_put then
    begin
      d := d or $40;
      PutByte(d);
      must_put := true;
    end;
    dec(bits_shift, 6);
  until (bits_shift = 0);

  // Put last byte (terminator) without the 7th bit set
  d := (run_length and $3F);
  PutByte(d);
end;

{-------------------------------------------------------------------------------
  EncodeBlock
  -----------

  Algo:
  Maintain 2 index, ix and oix into the input and output block respectively.
  curr_symbol: current symbol
  1) Read curr_symbol from the block
  2) If curr_symbol equals the previous symbol then
     a) increase run count
     ELSE
     a) If it is the end of a run (run count > run threshold) then
       i) output the run length
       ii) reset run length
  3) Only output the curr_symbol if the run length is below run threshold
  4) Repeat (1)

  Notes:
  If the run goes all the way to the end of the block, we must output the
  run length in the end.
-------------------------------------------------------------------------------}
procedure TRunLengthEncoder.EncodeBlock(_in_block, _out_block: PBlock; _block_length: longint;
                                        var out_block_length: longint);


  {
    Initialize resets the variables to process a new block
  }
  procedure Initialize;
  begin
    out_block := _out_block;
    in_block := _in_block;
    block_length := _block_length;
    oix := 0;
  end;


var
  curr_symbol: byte;
  ix: longint;
begin
  Initialize;

  {Init out_block with the first byte in in_block}
  run_length := 1;
  last_symbol := in_block^[0];
  PutByte(last_symbol);

  for ix := 1 to block_length-1 do
  begin
    curr_symbol := in_block^[ix];

    if (curr_symbol = last_symbol) then
      inc(run_length)
    else
    begin
      {A different symbol indicates an end of run}
      if (run_length >= RUN_THRESHOLD) then
        PutRunCount;
      run_length := 1;
    end;

    if (run_length <= RUN_THRESHOLD) then
      PutByte(curr_symbol);

    last_symbol := curr_symbol;
  end;

  {If there were more than RunThreshold bytes at the end of the block,
  then we must terminate the run at the end}
  if (run_length >= RUN_THRESHOLD) then PutRunCount;


  out_block_length := oix;
end;




////////////////////////////////////////////////////////////////////////////////
// Run Length Decoder
////////////////////////////////////////////////////////////////////////////////

{-------------------------------------------------------------------------------
  DecodeBlock
  -----------
  Decode a block.

  Algo:
  Maintain 2 indexes, ix and oix into the input and output block.
  1) Read in a character
  2) If the character is repeated, then increase run length
  3) If run length hits run threshold, (a run length follows)
     a) decode the run length
     b) expand the run (fill output block with run length number of thbe char curr_symbol)
     b) reset run length to zero
  4) Repeat (1)

  Notes:
  We start counting from index 1 so that previous char is init to the char at
  index 0.
-------------------------------------------------------------------------------}
procedure TRunLengthDecoder.DecodeBlock(_in_block, _out_block: PBlock; _block_length: longint;
                                        var out_block_length: longint);


  procedure Initialize;
  begin
    out_block := _out_block;
    in_block := _in_block;
    block_length := _block_length;
  end;


var
  run_length: byte;
  curr_symbol, last_symbol: byte;
begin
  Initialize;
  run_length := 1;
  last_symbol := in_block^[0];
  out_block^[0] := last_symbol;

  oix := 1;
  ix := 1;
  while (ix < block_length) do
  begin
    curr_symbol := in_block^[ix];
    out_block^[oix] := curr_symbol;

    inc(ix);        {The next index could point to a run length or another char}
    inc(oix);

    if (curr_symbol = last_symbol) then
    begin
      inc(run_length);
      if (run_length = RUN_THRESHOLD) then
      begin
        ExpandRun;
        run_length := 1;
      end;
    end
    else
      run_length := 1;

    last_symbol := curr_symbol;
  end; {while}

  out_block_length := oix;
end;

{-------------------------------------------------------------------------------
  GetRunCount
  -----------
  gets the run count by reading as many bits as necessary that represent the
  run length. The run length is represented in 7 bits per byte.
-------------------------------------------------------------------------------}
function TRunLengthDecoder.GetRunCount: longint;
var
  count: longint;
  b: byte;
begin
  count := 0;

  repeat
    b := in_block^[ix];
    count := (count shl 6) or (b and $3F);  // extract last 6 bits from b
    inc(ix);
  until ((b and $40) = 0);                  // continue if 7th bit set

  result := count;
end;


{-------------------------------------------------------------------------------
  ExpandRun
  ---------
  Expand the run with length pointed to by ix.
  ix-1 is the symbol used to expand.

  GetRunCount will inc ix to get the run count.
  ExpandRun itself will inc oix accordingly.

  IN and OUT assertion:
  ix and oix point to the next pos to input and output respectively.
-------------------------------------------------------------------------------}
procedure TRunLengthDecoder.ExpandRun;
var
  run_symbol: byte;
  expand_count: longint;
  expand_limit: longint;
begin
  run_symbol := in_block^[ix-1];
  expand_count := GetRunCount;
  expand_limit := oix + expand_count;

  while (oix < expand_limit) do
  begin
    out_block^[oix] := run_symbol;
    inc(oix);
  end;

end;


end.
