/*****************************************************************************/
/* File: lzw.c                                                               */
/* Author: David Chatenay                                                    */
/* Last Modified: Mon Dec 23 1996                                            */
/*                                                                           */
/* A standard interface for Lzw encoding of file, plus low-level routines    */
/*****************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "common.h"
#include "crunch.h"
#include "buffer.h"
#include "header.h"


/*****************************************************************************/
/*****************************************************************************/
/*****************************************************************************/
/*                               MISCELLANOUS                                */
/*****************************************************************************/
/* DEFINES */
#define NOT_FOUND  65535  /* Do not touch this!!! */
#define EOF_WORD    256
#define RESET_WORD  257
#define SIZEUP_WORD 258


/*****************************************************************************/
/*****************************************************************************/
/*****************************************************************************/
/*                           LOW-LEVEL FUNCTIONS                             */
/*****************************************************************************/
/* Compute Hash value for string made with cells  */
/* The Hash function is the same as in hash.c,    */
/* except that, due to data format, it's computed */
/* in reverse order...                            */
static dword CellHash(LzwCell *c)
{
    dword h=0;

    while (c) {
	h = ~((h << 1) ^ c->last);
	c = c->prev;
    }
    return h;
}


/* Find the first char. of a linked cell */
static byte FirstByte(LzwCell *c)
{
    if (c->prev)
        return FirstByte(c->prev);
    else
        return c->last;
}


/* Init single vars (done multiple times) */
void LzwReset(LzwContext *h)
{
    /* Init misc values */
    h->CodeSize = 9;
    h->MaxEntries = 512;
    h->CellNumber = 259;
    /* Build special codes */
    h->EofCode.code_len = h->ResetCode.code_len = h->SizeUpCode.code_len = 9;
}


/* Initialization of Lzw Context (one time) */
void LzwInitContext(LzwContext *h)
{
    int i;

    for (i=0; i<256; i++) {
	/* Init dictionnary */
	h->Dictionnary[i].last  = (byte)i;
	h->Dictionnary[i].index = (byte)i;
	h->Dictionnary[i].prev  = NULL;
	/* Init hash table */
	h->HashTable[i].hash = ~((dword)i);
	h->HashTable[i].pnt  = h->Dictionnary + i;
	h->HashTable[i].next = NULL;
    }
    LzwReset(h);
    h->EofCode.code[0] = EOF_WORD & 0xff;
    h->EofCode.code[1] = (EOF_WORD >> 8) & 0xff;
    h->ResetCode.code[0] = RESET_WORD & 0xff;
    h->ResetCode.code[1] = (RESET_WORD >> 8) & 0xff;
    h->SizeUpCode.code[0] = SIZEUP_WORD & 0xff;
    h->SizeUpCode.code[1] = (SIZEUP_WORD >> 8) & 0xff;
}


/* Routine to increment code size */
static void IncrementCodeSize(LzwContext *h)
{
    /* Incr code size */
    h->CodeSize++;
    /* Max entry * 2 */
    h->MaxEntries = (word)(h->MaxEntries << 1);
    /* This so we can r/w the command codes */
    h->EofCode.code_len = h->ResetCode.code_len = h->SizeUpCode.code_len =
        h->CodeSize;
}


/* Compare two strings made with cells. */
/* RETURN VALUE: 0 if they're equal     */
/*              -1 if they're different */
static int Compare(LzwCell *c1, LzwCell *c2)
{
    /* No comment... */
    do {
	if (c1->last != c2->last)
	    return -1;
	c1 = c1->prev;
	c2 = c2->prev;
	if ((c1 && !c2) || (!c1 && c2))
	    return -1;
    } while (c1 || c2);

    return 0;
}


/* Try to find a Cell in the hash table  */
/* RETURN VALUE: NOT_FOUND if not found  */
/*               The index if found      */
static word HashTableSearch(LzwContext *h, LzwCell *tofind, dword hash)
{
    LzwIndex *i;

    /* First, compute hash table index */
    i = h->HashTable[hash % 256].next;
    /* Next, search in the hash table entry */
    while (i) {
	if (i->hash == hash)
	    if (Compare(tofind, i->pnt) == 0)
	        /* Found! */
	        return i->pnt->index;
	i = i->next;
    }
    /* Not found */
    return NOT_FOUND;
}


/* Insert a Cell into the hash table */
static void HashTableInsert(LzwContext *h, LzwCell *c, dword hash)
{
    LzwIndex *current, *new;

    current = h->HashTable + (hash % 256);
    /* Go to last entry */
    while (current->next)
        current = current->next;
    /* Insert in the linked list */
    new = (LzwIndex*)Malloc(sizeof(LzwIndex));
    new->hash = hash;
    new->pnt = c;
    new->next = NULL;
    current->next = new;
}


/* Free all allocated cells */
    /*** Subroutine to free recursively */
    static void _FreeLinkedIndex(LzwIndex *i)
    {
        if (i->next)
	    _FreeLinkedIndex(i->next);
	free(i);
    }
void LzwFreeHashTable(LzwContext *h)
{
    int i;

    for (i=0; i<256; i++)
        if (h->HashTable[i].next) {
	    _FreeLinkedIndex(h->HashTable[i].next);
	    h->HashTable[i].next = NULL;
	}
}


/* The low-level function to compress files using lzw */
static int LzwRawEncode(LzwContext *h, Buffer *in, Buffer *out)
{
    word latent=NOT_FOUND, found;
    byte *buffer;
    LzwCell str;
    dword hash;
    Code emit;
    int i;

    /* Read the first bytes */
    ReadBuffer(in);
    buffer = in->buffer;
    emit.code_len = h->CodeSize;
    while (in->buff_length > 0) {
	for (i=0; i<in->buff_length; i++) {
	    if (latent != NOT_FOUND) {
		/* Build the new string */
		str.last = buffer[i];
		str.prev = h->Dictionnary + latent;
		/* Compute it's hash value */
		hash = CellHash(&str);
		/* Is it in the table? */
		found = HashTableSearch(h, &str, hash);
		if (found == NOT_FOUND) {
		    /* Nope. We emit the latent code */
		    emit.code[0] = latent & 0xff;
		    emit.code[1] = (latent >> 8) & 0xff;
		    TORI(AddCode(&emit, out));
		    /* Insert the new string in the table */
		    h->Dictionnary[h->CellNumber].last = buffer[i];
		    h->Dictionnary[h->CellNumber].index = h->CellNumber;
		    h->Dictionnary[h->CellNumber].prev = h->Dictionnary+latent;
		    /* Insert it in the hash table */
		    HashTableInsert(h, h->Dictionnary + h->CellNumber, hash);
		    h->CellNumber++;
		    if (h->CellNumber == DICT_SIZE) {
			/* The dictionnary is full, */
			/* we empty it completely.  */
			/* Emit read char. */
			emit.code[0] = buffer[i] & 0xff;
			emit.code[1] = 0x00;
			TORI(AddCode(&emit, out));
			/* Emit a Reset command */
			TORI(AddCode(&h->ResetCode, out));
			/* Free the hash table cells */
			DEBUG0("    [FreeHashTable]\n" );
			LzwFreeHashTable(h);
			/* Reset the tables */
			/* -Dictionnary     */
			/* -Hash Table      */
			DEBUG0("    [ResetTable]\n" );
			LzwReset(h);
			latent = NOT_FOUND;
			emit.code_len = h->CodeSize;
		    } else
			latent = buffer[i];
		    /* Shall we increment the code size? */
		    if (h->CellNumber == h->MaxEntries) {
			/* Yes */
			DEBUG1("    [SizeUp [%d]]\n", h->CodeSize+1);
			TORI(AddCode(&h->SizeUpCode, out));
			IncrementCodeSize(h);
			emit.code_len = h->CodeSize;
		    }
		} else
		    latent = found;
	    } else
	        latent = buffer[i];
	}
	ReadBuffer(in);
    }
    /* Emit the last character */
    emit.code[0] = latent & 0xff;
    emit.code[1] = (latent >> 8) & 0xff;
    TORI(AddCode(&emit, out));
    TORI(AddCode(&h->EofCode, out));
    /* Flush the out buffer */
    FlushBuffer(out);
    return 0;
}


/* The low-level function to compress blocks using lzw */
int LzwBlockEncode(LzwContext *h, byte *in, dword sin, byte *out, dword *sout)
{
    word latent=NOT_FOUND, found, bit_count=0;
    dword hash, i, byte_count=0, size_out;
    LzwCell str;
    Code emit;

    /* Init */
    size_out = *sout;
    emit.code_len = h->CodeSize;

    /* For each character to encode... */
    for (i=0; i<sin; i++) {
	if (latent != NOT_FOUND) {
	    /* Build the new string */
	    str.last = in[i];
	    str.prev = h->Dictionnary + latent;
	    /* Compute it's hash value */
	    hash = CellHash(&str);
	    /* Is it in the table? */
	    found = HashTableSearch(h, &str, hash);
	    if (found == NOT_FOUND) {
		/* Nope. We emit the latent code */
		emit.code[0] = latent & 0xff;
		emit.code[1] = (latent >> 8) & 0xff;
		TORI(AddCodeToBlock(&emit, out, size_out, &byte_count,
				    &bit_count));
		/* Insert the new string in the table */
		h->Dictionnary[h->CellNumber].last = in[i];
		h->Dictionnary[h->CellNumber].index = h->CellNumber;
		h->Dictionnary[h->CellNumber].prev = h->Dictionnary+latent;
		/* Insert it in the hash table */
		HashTableInsert(h, h->Dictionnary + h->CellNumber, hash);
		h->CellNumber++;
		if (h->CellNumber == DICT_SIZE) {
		    /* The dictionnary is full, */
		    /* we empty it completely.  */
		    /* Emit read char. */
		    emit.code[0] = in[i] & 0xff;
		    emit.code[1] = 0x00;
		    TORI(AddCodeToBlock(&emit, out, size_out, &byte_count,
					&bit_count));
		    /* Emit a Reset command */
		    TORI(AddCodeToBlock(&h->ResetCode, out, size_out,
					&byte_count, &bit_count));
		    /* Free the hash table cells */
		    DEBUG0("    [FreeHashTable]\n" );
		    LzwFreeHashTable(h);
		    /* Reset the tables */
		    /* -Dictionnary     */
		    /* -Hash Table      */
		    DEBUG0("    [ResetTable]\n" );
		    LzwReset(h);
		    latent = NOT_FOUND;
		    emit.code_len = h->CodeSize;
		} else
		    latent = in[i];
		/* Shall we increment the code size? */
		if (h->CellNumber == h->MaxEntries) {
		    /* Yes */
		    DEBUG1("    [SizeUp [%d]]\n", h->CodeSize+1);
		    TORI(AddCodeToBlock(&h->SizeUpCode, out, size_out,
					&byte_count, &bit_count));
		    IncrementCodeSize(h);
		    emit.code_len = h->CodeSize;
		}
	    } else
	        latent = found;
	} else
	    latent = in[i];
    }
    /* Emit the last character */
    emit.code[0] = latent & 0xff;
    emit.code[1] = (latent >> 8) & 0xff;
    TORI(AddCodeToBlock(&emit, out, size_out, &byte_count, &bit_count));
    /* Emit an End Of Block code (same as End Of File) */
    TORI(AddCodeToBlock(&h->EofCode, out, size_out, &byte_count, &bit_count));
    /* Return the length encoded */
    if (bit_count > 0)
        byte_count++;
    *sout = byte_count;

    return 0;
}


/* Output a linked cell to out file    */
/* Recursive function, because we need */
/* to go to the first char to emit it. */
static int EmitCell(LzwCell *c, Buffer *out)
{
    if (c->prev)
	TORI(EmitCell(c->prev, out));
    Put(out, c->last);
    return 0;
}


/* The low-level function to decode a lzw-encoded file */
static int LzwRawDecode(LzwContext *h, Buffer *in, Buffer *out)
{
    word latent=NOT_FOUND, code;
    Code current;
    byte t=0;

    /* Initialization */
    current.code_len = h->CodeSize;
    ResetBuffer(in);
    ReadBuffer(in);
    /* Read the first code */
    TORI(ReadCode(&current, in));
    code = (current.code[0] + (current.code[1] << 8));

    while (code != EOF_WORD) {
	if (latent != NOT_FOUND) {
	    if (code < 256) {
		/* It's a basic code (a single character) */
		/* Emit it */
		Put(out, code&0xff);
		/* Insert the new string */
		h->Dictionnary[h->CellNumber].last = code & 0xff;
		h->Dictionnary[h->CellNumber].prev = h->Dictionnary + latent;
		h->CellNumber++;
		latent = code;
		t = (byte)code;
	    } else {
		/* It's an extended code */
		switch (code) {
		case RESET_WORD:
		    /* It's a reset command */
		    DEBUG0("    [ResetCommand]\n" );
		    LzwReset(h);
		    latent = NOT_FOUND;
		    current.code_len = h->CodeSize;
		    break;
		case SIZEUP_WORD:
		    /* It's a size-up command */
		    DEBUG0("    [SizeUpCommand]\n" );
		    IncrementCodeSize(h);
		    current.code_len++;
		    break;
		default:
		    /* It's a complex code */
		    if (code != h->CellNumber) {
			/* The normal case */
			TORI(EmitCell(h->Dictionnary + code, out));
			t = FirstByte(h->Dictionnary + code);
		    } else {
			/* The not-so-normal case (last char = first) */
			TORI(EmitCell(h->Dictionnary + latent, out));
			t = FirstByte(h->Dictionnary + latent);
			Put(out, t);
		    }
		    /* Insert the new string */
		    h->Dictionnary[h->CellNumber].last = t;
		    h->Dictionnary[h->CellNumber].prev = h->Dictionnary +
		      latent;
		    h->CellNumber++;
		    latent = code;
		}
	    }
	} else {
	    /* This happens only when the dictionnary */
	    /* is empty. So code is < 256 (usually)   */
	    if (code > 256)
	        DEBUG1("[BUG: latent=%d]\n", code);
	    latent = code;
	    Put(out, latent & 0xff);
	}
	/* Read the next code. This automagically */
	/* read the input file in buffers...      */
	TORI(ReadCode(&current, in));
	code = (current.code[0] + (current.code[1] << 8));
    }
    DEBUG0("    [EofCommand]\n" );
    /* Flush the out buffer */
    FlushBuffer(out);
    return 0;
}


static int EmitCellToBlock(LzwCell *c, byte *out, dword size, dword *count)
{
    if (c->prev)
	TORI(EmitCellToBlock(c->prev, out, size, count));
    out[(*count)] = c->last;
    (*count)++;
    if ((*count) > size) { DEBUG2("OoS! [%d/%d]\n",*count,size); return -1; }

    return 0;
}


/* The low-level function to decode a lzw-encoded block */
int LzwBlockDecode(LzwContext *h, byte *in, dword sin, byte *out, dword *sout)
{
    dword byte_count=0, size, out_count=0, max_out;
    word latent=NOT_FOUND, code, bit_count=0;
    Code current;
    byte t=0;

    /* Initialization */
    current.code_len = h->CodeSize;
    size = sin;
    max_out = (*sout);
    /* Read the first code */
    TORI(ReadCodeFromBlock(&current, in, size, &byte_count, &bit_count));
    code = (current.code[0] + (current.code[1] << 8));

    while (code != EOF_WORD) {
	if (latent != NOT_FOUND) {
	    if (code < 256) {
		/* It's a basic code (a single character) */
		/* Emit it */
		out[out_count] = code & 0xff;
		out_count++;
		if (out_count > max_out) { DEBUG0("OoD!\n"); return -1; }
		/* Insert the new string */
		h->Dictionnary[h->CellNumber].last = code & 0xff;
		h->Dictionnary[h->CellNumber].prev = h->Dictionnary + latent;
		h->CellNumber++;
		latent = code;
		t = (byte)code;
	    } else {
		/* It's an extended code */
		switch (code) {
		case RESET_WORD:
		    /* It's a reset command */
		    DEBUG0("    [ResetCommand]\n" );
		    LzwReset(h);
		    latent = NOT_FOUND;
		    current.code_len = h->CodeSize;
		    break;
		case SIZEUP_WORD:
		    /* It's a size-up command */
		    DEBUG0("    [SizeUpCommand]\n" );
		    IncrementCodeSize(h);
		    current.code_len++;
		    break;
		default:
		    /* It's a complex code */
		    if (code != h->CellNumber) {
			/* The normal case */
			TORI(EmitCellToBlock(h->Dictionnary + code, out,
					     max_out, &out_count));
			t = FirstByte(h->Dictionnary + code);
		    } else {
			/* The not-so-normal case (last char = first) */
			TORI(EmitCellToBlock(h->Dictionnary + latent, out,
					     max_out, &out_count));
			t = FirstByte(h->Dictionnary + latent);
			out[out_count] = t;
			out_count++;
			if (out_count > max_out) 
			    { DEBUG0("OoD!\n"); return -1; }
		    }
		    /* Insert the new string */
		    h->Dictionnary[h->CellNumber].last = t;
		    h->Dictionnary[h->CellNumber].prev = h->Dictionnary +
		      latent;
		    h->CellNumber++;
		    latent = code;
		}
	    }
	} else {
	    /* This happens only when the dictionnary */
	    /* is empty. So code is < 256 (usually)   */
	    if (code > 256)
	        DEBUG1("[BUG: latent=%d]\n", code);
	    latent = code;
	    out[out_count] = latent & 0xff;
	    out_count++;
	    if (out_count > max_out)  { DEBUG0("OoD!\n"); return -1; }
	}
	/* Read the next code. */
	TORI(ReadCodeFromBlock(&current, in, size, &byte_count, &bit_count));
	code = (current.code[0] + (current.code[1] << 8));
    }
    DEBUG0("    [EofCommand]\n" );
    /* Return the out size */
    (*sout) = out_count;

    return 0;
}


/*****************************************************************************/
/*****************************************************************************/
/*****************************************************************************/
/*                          HIGH LEVEL FUNCTIONS                             */
/*****************************************************************************/
/* ENCODING */
int LzwEncode(int in, int out, float *cr)
{
    Buffer *bin, *bout;
    CrunchHeader head;
    LzwContext h;
    int r;

    /* Header write */
    TORI(ComputeHeader(in, &head, METHOD_LZW));
    TORI(WriteHeader(out, &head));
 
    /* Allocate buffers */
    bin = NewBuffer(in);
    bout = NewBuffer(out);

    /* Encoding sequence */
    LzwInitContext(&h);
    r = LzwRawEncode(&h, bin, bout);
    LzwFreeHashTable(&h);

    /* Write compressed length */
    TORI(WriteLength(&head, bin, bout));
 
    /* Compute compression ratio */
    *cr = ((float)bin->total - (float)bout->total) / (float)bin->total;
 
    /* Free buffers */
    KillBuffer(bin);
    KillBuffer(bout);

    return r;
}


/* DECODING */
int LzwDecode(int in, int out)
{
    Buffer *bin, *bout;
    CrunchHeader head;
    LzwContext h;
    int r;

    /* Read header */
    TORI(ReadHeader(in, &head));
    if (CheckFileHeader(in, &head) != METHOD_LZW)
        return -1;
    TORI(SeekRealPosition(in, &head, 0));
 
    /* Allocate buffers */
    bin = NewBuffer(in);
    bout = NewBuffer(out);

    /* Decoding sequence */
    LzwInitContext(&h);
    r = LzwRawDecode(&h, bin, bout);

    /* Free buffers */
    KillBuffer(bin);
    KillBuffer(bout);

    return r;
}
