/*
  Input
*/

#include "input.h"

#include "config.h"
#include "head.h"
#include "mem.h"
#include "message.h"
#include "os.h"
#include "page.h"
#include "print.h"
#include "shared.h"
#include "status.h"
#include "var.h"

static word num_vocab_words;
static word vocab_entry_size;
static word strt_vocab_table;

#define QUICK_BITS 8

/*
  We know that, since the value we are searching for
  was encoded (using encode), and the two possible dictionary
  lengths are two and three words (6/9 letters), the top
  bit will not be set on the first word.
  We thus only need to store the quick lookup values for bits
  14 and down of the first coded word.
*/

#define QUICK_KEY(w) (((w)>>(15-QUICK_BITS)) & ((1<<QUICK_BITS)-1))
#define QUICK_HOLD   (1<<QUICK_BITS)

static word quick[QUICK_HOLD + 1];

static void init_quick(void)
{
  word i = 0;
  word j = 0;
  word p = strt_vocab_table;
  for(j = 0; j < QUICK_HOLD; ++j)
  {
    while(i < num_vocab_words && QUICK_KEY(rd_word_addr(p)) < j)
    {
      i += 1;
      p += vocab_entry_size;
    }
    quick[j] = i;
  }
  quick[QUICK_HOLD] = i;
}

static void init_vocab(long_word vocab_strt)
{
  static long_word last_vocab_start = 0xFFFFFFFFL;
  if(vocab_strt != last_vocab_start)
  {
    word num          = rd_byte_addr(vocab_strt);
    signed_word words = rd_word_addr(vocab_strt + num + 2);

    vocab_entry_size  = rd_byte_addr(vocab_strt + num + 1);
    strt_vocab_table  = (word) (vocab_strt + num + 4);

    if(words > 0)
    {
      num_vocab_words  = words;
    }
    else
    {
      /* Can this ever happen ? */
      num_vocab_words  = -words;
    }
    init_quick();
    last_vocab_start = vocab_strt;
  }
}

static word look_up(word coded[], word encoded_size)
{
  word q = QUICK_KEY(coded[0]);
  word a = quick[q];
  word b = quick[q+1];
  /* p is the address of word j of table entry i */
  word i = a, j;
  word p = strt_vocab_table + i * vocab_entry_size;
  /* Search possible region */
  for(j = 0; j < encoded_size; ++j, p += 2)
  {
    while(i < b && rd_word_addr(p) < coded[j])
    {
      p += vocab_entry_size;
      i += 1;
    }
    if(i == b || rd_word_addr(p) > coded[j])
      return 0;
  }
  /* Check match, since might have hit 'diagonally' */
  while(j > 0)
    if(coded[--j] != rd_word_addr(p -= 2))
      return 0;
  return p;
}

static void buffer_copy(long_word start, word length, byte *buffer, word size)
{
  word i;
  word len = min(size, length);
  for(i = 0; i < len; ++i)
    buffer[i] = rd_byte_addr(start + i);
  buffer[len] = 0;
}

static int store_word(long_word words, word code, int len, int start)
{
  byte upto = rd_byte_addr(words);
  byte held = rd_byte_addr(words + 1);
  if(held < upto)
  {
    long_word p  = words + 2 + (long_word) (4 * (word) held);
    wr_byte_addr(words + 1, held + 1);
    wr_word_addr(p, code);
    wr_byte_addr(p + 2, len);
    wr_byte_addr(p + 3, start);
    return 1;
  }
  else
  {
    return 0;
  }
}

static void too_many_words(long_word chars)
{
  display((byte *) "Too many words typed. Flushing.\n");
  wr_byte_addr(chars + 2 + rd_byte_addr(chars + 1), 0);
}

typedef enum { punctuation, white_space, letter } char_class;

static char_class classify_char(byte c)
{
  word vocab = hd_vocab();
  int puncs  = rd_byte_addr(vocab);
  while(puncs--)
    if(rd_byte_addr(++vocab) == c)
      return punctuation;
  return os_strpos(" \t\r.,?", c) >= 0 ? white_space : letter;
}

static void general_parse_buffer(long_word chars, long_word words)
{
  int x         = 2;
  int chars_len = 2 + rd_byte_addr(chars + 1);
  int plus      = hd_plus();

  wr_byte_addr(words + 1, 0);

  while(x < chars_len)
  {
    while(x < chars_len
       && classify_char(rd_byte_addr(chars + x)) == white_space) ++x;
    if(x < chars_len)
    {
      int len = 0;
      word coded[max(STD_ENCODED_SIZE, PLUS_ENCODED_SIZE)];
      byte the_word[max(STD_CHARS_PER_WORD, PLUS_ENCODED_SIZE) + 1];
      int esz = plus ? PLUS_ENCODED_SIZE : STD_ENCODED_SIZE;
      int cpw = plus ? PLUS_CHARS_PER_WORD : STD_CHARS_PER_WORD;
      if(classify_char(rd_byte_addr(chars + x)) == punctuation)
      {
        len = 1;
      }
      else
      {
        while(x + len < chars_len
           && classify_char(rd_byte_addr(chars + x + len)) == letter) ++len;
      }
      buffer_copy(chars + x, len, the_word, cpw);
      encode(the_word, coded);
      if(!store_word(words, look_up(coded, esz), len, x))
      {
        too_many_words(chars);
	break;
      }
      x += len;
    }
  }
}

static void advanced_parse_buffer(long_word chars, long_word words,
  long_word vocab_strt, word ignore_offset)
{
  long_word char_ptr   = chars + 2;
  long_word in_buf_end = chars + 2 + rd_byte_addr(chars + 1);

  word coded[PLUS_ENCODED_SIZE];

  init_vocab(vocab_strt);
  wr_byte_addr(words + 1, 0);

  while(char_ptr != in_buf_end)
  {
    word bytes = (byte) (in_buf_end - char_ptr);
    word count = get_code(&char_ptr, bytes, coded);
    if(count == 0) break;

    if(rd_byte_addr(words) == rd_byte_addr(words + 1))
    {
      too_many_words(chars);
      break;
    }
    else
    {
      byte held   = rd_byte_addr(words + 1);
      long_word p = words + 2 + (long_word) 4 * held;
      word offset = look_up(coded, PLUS_ENCODED_SIZE);
      if(offset != 0 || ignore_offset == 0)
        wr_word_addr(p, offset);
      wr_byte_addr(p + 2, count);
      wr_byte_addr(p + 3, (byte) (char_ptr - chars));
      wr_byte_addr(words + 1, held + 1);
      char_ptr += count;
    }
  }
}

static void read_line(long_word buffer)
{
  int i, len;
  static char line[256];
  len = getline((byte *)"", line, rd_byte_addr(buffer));
  for(i = 0; i < len; ++i)
    wr_byte_addr(buffer + 2 + i,  os_lower(line[i]));
  wr_byte_addr(buffer + 1, len);
}

static word scan_buffer(long_word *start, word length)
{
  long_word char_ptr      = *start;
  long_word end_of_buffer = *start + length;

  while(char_ptr != end_of_buffer
     && classify_char(rd_byte_addr(char_ptr)) == white_space)
    ++char_ptr;
  *start = char_ptr;
  if(char_ptr != end_of_buffer)
  {
    if(classify_char(rd_byte_addr(char_ptr)) == punctuation)
    {
      return 1;
    }
    else
    {
      while(char_ptr != end_of_buffer
         && classify_char(rd_byte_addr(char_ptr)) == letter)
        ++char_ptr;
      return (word) (char_ptr - *start);
    }
  }
  else
  {
    return 0;
  }
}

/* Globals */

void init_input(void)
{
  init_vocab(hd_vocab());
}

void input(void)
{
  extern word param_stack[];

  int num_params           = param_stack[0];
  long_word in_buf_strt    = param_stack[1];
  long_word word_buff_strt = num_params < 2 ? 0 : param_stack[2];

  /* Empty the Print Buffer */

  if(!hd_plus())
    status();
  flush_prt_buff();

#if 0 /* These parameters can't be used */
  if(num_params < 3) param_stack[3] = 0xFFFF;
  if(num_params < 4) param_stack[4] = 0;
  param_stack[0] = 4;
#endif

  read_line(in_buf_strt);

  if(!hd_five())
  {
    general_parse_buffer(in_buf_strt, word_buff_strt);
  }
  else
  {
    if(param_stack[2] != 0)
      advanced_parse_buffer(in_buf_strt, word_buff_strt, hd_vocab(), 0);
    store('\n');
  }
}

word get_code(long_word *start, word length, word coded[])
{
  byte the_word[PLUS_CHARS_PER_WORD + 1];

  length = scan_buffer(start, length);
  if(length)
  {
    buffer_copy(*start, length, the_word, PLUS_CHARS_PER_WORD);
    encode(the_word, coded);
  }
  return length;
}

void parse(void)
{
  extern word param_stack[];

  int num_params            = param_stack[0];
  long_word in_buf_strt     = param_stack[1];
  long_word word_buff_strt  = param_stack[2];
  long_word vocab_strt      = num_params < 3 ? hd_vocab() : param_stack[3];	
  word ignore_offset        = num_params < 4 ? 0 : param_stack[4];

  advanced_parse_buffer(in_buf_strt, word_buff_strt,
    vocab_strt, ignore_offset);
}
