/*
 * buffer.c
 *
 * routines of input, and formatting according to the styles
 * Copyright (c) 1988, 89, 90, 91, 92, 93 Miguel Santana
 * Copyright (c) 1995, 96, 97 Akim Demaille, Miguel Santana
 * $Id: buffer.c,v 1.47.2.3 1997/05/26 08:26:20 demaille Exp $
 */

/*
 * This file is part of a2ps.
 * 
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2, or (at your option)
 * any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; see the file COPYING.  If not, write to
 * the Free Software Foundation, 59 Temple Place - Suite 330,
 * Boston, MA 02111-1307, USA.
 */

#include "a2ps.h"
#include "styles.h"
#include "buffer.h"
#include "utilsty.h"
#include "routines.h"
#include "getline.h"
#include "jobs.h"
#include "psgen.h"

#define IS_STRING		-123	/* When my-getc returns a string */

/* The buffer structure, which encloses a lower case version to
 * speed up case insensitive parsing */
typedef struct {
  ustring content;
  ustring lowerCaseContent;
  size_t allocsize;
  int size;
  int curr;
} Buffer;

/* Description of the current style used */
STYLE theStyle = plain_style;
int translateSymbols = FALSE;

/* The alphabet used */
int inAlphabet1[256];
int inAlphabet2[256];

/* Sorted keywords */
keyword * sorted_keywords = NULL;
int num_keywords = 0;
int key_min[256], key_max[256];

/* Sorted regulars and specials */
symbol * sorted_regulars = NULL;
int num_regulars = 0;
int reg_min[256], reg_max[256];
symbol * sorted_specials = NULL;
int num_specials = 0;
int spe_min[256], spe_max[256];

/* Sequences ready to interpret */
matching_sequence * matching_sequences = NULL;
int num_sequences = 0;

/*
 * Use the information offered by main.c
 */
extern print_job * job;


/****************************************************************/
/*		Initialization of the pretty-printing	       	*/
/****************************************************************/
/*
 * Set up the variables depending on the current language
 */
void
setStyle (STYLE style)
{
  int c;

  if (style != plain_style && theStyle != style) {
    /* Set the alphabet tables */
    ustring list;
    
    for (c = 0 ; c <= 255 ; c++) {
      inAlphabet1[c] = FALSE;
      inAlphabet1[c] = FALSE;
      key_min[c] = reg_min[c] = spe_min[c] = -1;
      key_max[c] = reg_max[c] = spe_max[c] = -2;
    }
    
    for (list = (ustring) languages[style].first_characters ; 
	 *list ;  list += 2)
      for (c = *list ; c <= *(list+1) ; c++)
	inAlphabet1[c] = TRUE;
    
    for (list = (ustring) languages[style].following_characters ; 
	 *list ;  list += 2)
      for (c = *list ; c <= *(list+1) ; c++)
	inAlphabet2[c] = TRUE;
    
    /* Get a sorted version of the keywords */
    num_keywords = sort_keywords (&sorted_keywords, style, key_min, key_max);
    
    /* prepare the matching sequences */
    num_sequences = set_sequences (&matching_sequences, style);
  }
  theStyle = style;
}

void
setSymbolTranslation(int on)
{
  static STYLE last_style = plain_style;

  /* Make sure that symbol translation is always enabled with prescript,
   * and never in plain style */
  translateSymbols = ((theStyle != plain_style)
		      && (on
			  || (languages[theStyle].style == prescript_style)));
  
  /* If symbol translation is enabled and the style is not the last
   * style used, sort the arrays */
  if (translateSymbols && theStyle != last_style) {
    num_regulars = 
      sort_regulars (&sorted_regulars, theStyle, reg_min, reg_max);
    num_specials = 
      sort_specials (&sorted_specials, theStyle, spe_min, spe_max);
  }

  last_style = theStyle;
}

/****************************************************************/
/*		pretty printing service routines	       	*/
/****************************************************************/
/*
 * return TRUE if string begins with a sub-string in array
 */
inline static int 
is_in_pattern_array (ustring string, const uchar ***array)
{
  while (**array) {
    if (ustrprefix (**array, string))
      return TRUE;
    (*array) ++;
  }
  return FALSE;
}

/*
 * Does a matching_string matches the buffer?
 */
inline static int
match (matching_string mstr, Buffer * buffer)
{
  ustring string;
  
  string=(languages[theStyle].sensitiveness == CASE_SENSITIVE ?
	  buffer->content : buffer->lowerCaseContent) + buffer->curr;
  
  return (ustrprefix (mstr.string, string)
	  && (!mstr.at_start || buffer->curr == 0)
	  && (!mstr.at_end
	      || (buffer->curr + ustrlen (mstr.string) == buffer->size - 1)));
}

/*
 * Find where STRING is the start of a sequence
 */
inline static matching_sequence *
is_in_sequence_array (Buffer * buffer, matching_sequence * array, int curr)
{
  int i;

  for (i = 0 ; i < num_sequences ; i++)
    if (match (matching_sequences[i].open, buffer))
      return &matching_sequences[i];
  
  return NULL;
}

/*
 * puts in bufres the longuest word in buffer starting at curr
 */
static void 
read_word (Buffer * buffer, FACE* face, ustring bufres)
{
  int i=0;

  bufres[i++] = *(buffer->content + (buffer->curr++));
  while (inAlphabet2[*(buffer->content + (buffer->curr))])
    bufres[i++] = *(buffer->content + (buffer->curr++));;
  
  bufres[i] = NUL;
}


/****************************************************************/
/*			lexical analysis routines	       	*/
/****************************************************************/
/*
 * puts in bufres a keyword in buffer starting at curr, returns TRUE
 *	returns FALSE if not a keyword
 */
static int 
is_keyword (Buffer * buffer, FACE* face, ustring bufres)
{
  int i;
  uchar* string;
  
  string=(languages[theStyle].sensitiveness == CASE_SENSITIVE ?
	  buffer->content : buffer->lowerCaseContent) + buffer->curr;
  
  /* Return the biggest keyword that prefixes `string' such that
   * string[strlen(keyword)] is not in alphabet2 */
  for (i = key_max [*string] ; key_min [*string] <= i ; i--) {
    if (ustrprefix(sorted_keywords[i].theKeyword, string)
	&& !inAlphabet2[string[ustrlen(sorted_keywords[i].theKeyword)]])
      {/* Success */
	int length = ustrlen(sorted_keywords[i].theKeyword);
	ustrncpy(bufres, buffer->content+buffer->curr, length);
	bufres[length] = NUL;
	buffer->curr += length;
	*face = sorted_keywords[i].theFont;
	return TRUE;
      }
  }
  /* We found no keyword */
  return FALSE;
}

	    
/* return TRUE if string begins with a regular symbol
 * the converted symbol is in bufres
 */
static int
is_regular (Buffer * buffer, FACE* face, ustring bufres)
{
  int i;
  ustring string;
  
  string=(languages[theStyle].sensitiveness == CASE_SENSITIVE ?
	  buffer->content : buffer->lowerCaseContent) + buffer->curr;
  
  /* Return the biggest keyword that prefixes `string' such that
   * string[strlen(keyword)] is not in alphabet2 */
  for (i = reg_max [*string] ; reg_min [*string] <= i ; i--) {
    if (ustrprefix(sorted_regulars[i].theKeyword, string)
	&& !inAlphabet2[string[ustrlen(sorted_regulars[i].theKeyword)]])
      {
	ustrcpy(bufres, sorted_regulars[i].theSymbol);
	buffer->curr += ustrlen(sorted_regulars[i].theKeyword);
	*face = sorted_regulars[i].theFont;
	return TRUE;
      }
  }

  /* We found no regular matching */
  return FALSE;
}


/* return TRUE if string begins with a special symbol
 * list : types of comment for the current style
 */
static int 
is_special (Buffer * buffer, FACE* face, ustring bufres)
{
  int i;
  ustring string;
  
  string = buffer->content + buffer->curr;
  
  /* Then, return the biggest keyword that prefixes `string' */
  for (i = spe_max [*string] ; spe_min [*string] <= i ; i--) {
    if (ustrprefix(sorted_specials[i].theKeyword, string)) 
      {
	ustrcpy(bufres, sorted_specials[i].theSymbol);
	buffer->curr += ustrlen(sorted_specials[i].theKeyword);
	*face = sorted_specials[i].theFont;
	return TRUE;
      }
  }

  /* We found no special symbol matching string */
  return FALSE;
}


/*
 * return TRUE if string begins with an "verbatim" sequence
 */
static int 
is_verbatim (Buffer * buffer, ustring bufres)
{
  /* list : "verbatim" sequences, should be written straightly when
   * outside a sequence (e.g. in ada ''' must be written such as) */
  const uchar** list = languages[theStyle].verbatims;

  if (is_in_pattern_array(buffer->content+buffer->curr, &list)) {
    ustrcpy(bufres, *(list));
    buffer->curr += ustrlen(*list);
    return TRUE;
  }
  return FALSE;
}

/*
 * return TRUE if string begins with an "escape" sequence
 */
static int 
is_escape (Buffer * buffer, ustring bufres)
{
  /* list : "escape" sequences, should be written straightly when in a
     sequence (e.g. \" in a C string)  */
  const uchar** list = languages[theStyle].escapes;

  if (is_in_pattern_array(buffer->content+buffer->curr, &list)) {
    ustrcpy(bufres, *(list));
    buffer->curr += ustrlen(*list);
    return TRUE;
  }
  return FALSE;
}

/* 
 * return TRUE if buffer+*curr begins with a sequence
 */
static int
is_sequence_begin(Buffer * buffer, FACE* begin_face, 
                  matching_sequence ** currentSequence, ustring bufres)
{
  matching_sequence * list = matching_sequences;

  if ((*currentSequence = is_in_sequence_array(buffer, list, buffer->curr))) {
    int delta = ustrlen((*currentSequence)->open.string);
    /* printed opening string */
    ustrncpy(bufres, buffer->content+buffer->curr, delta);
    bufres[delta] = NUL;
    /* opening string in the source */
    buffer->curr += delta;
    /* face for the opening string */
    *begin_face = (*currentSequence)->open.font;
    
    return TRUE;
  }
  return FALSE;
}

/* 
 * return TRUE if buffer+*curr begins with with end sequence pattern
 */
static int 
is_sequence_end (Buffer * buffer, FACE* face, 
		 matching_sequence * currentSequence, ustring bufres)
{
  int i;
  
  for (i = 0 ; i < currentSequence->nb ; i++) {
    if (match (currentSequence->close[i], buffer)) {
      /* Don't foget to include the \n (or \r), when matching
       * which a $ */
      int delta = (ustrlen(currentSequence->close[i].string)
		   + (currentSequence->close[i].at_end ? 1 : 0));
      ustrncpy(bufres, buffer->content+buffer->curr, delta);
      bufres[delta] = NUL;
      buffer->curr += delta;
      *face = currentSequence->close[i].font;
      return TRUE;
    }
  }
  return FALSE;
}

/****************************************************************/
/*		 	Buffer Service routines	       		*/
/****************************************************************/
/* getBuffer
 *   uses GNU getline, and if the language is case insensitive,
 *   build a lower case version of the buffer
 */
static void
getBuffer(Buffer * buffer, char delim)
{
  buffer->size = 
    getdelim ((char **) &(buffer->content), &(buffer->allocsize), 
	      delim, stdin);
  
  /* If the eol char is preceded by a \f, then just forget the eol,
   * so that there won't be a blank line at the top of the next page */
  if ((buffer->size >= 2) 
      && buffer->content [buffer->size - 2] == '\f') {
    buffer->content [--(buffer->size)] = '\0';
  }

  if (theStyle != plain_style
      && languages[theStyle].sensitiveness == CASE_INSENSITIVE) {
    int i;
    buffer->lowerCaseContent = 
      xrealloc (buffer->lowerCaseContent, buffer->allocsize);
    for (i=0 ; i <= buffer->size ; i++)
      buffer->lowerCaseContent[i] = tolower(buffer->content[i]);
  }
  buffer->curr = 0;
}

#define isEmptyBuffer(Buf) (Buf.curr >= Buf.size)

/* - returns a char if no keyword are recocognized, or EOF, or
 *   IS_STRING if returns a string. Then the result is in the global
 * bufres */
static int 
mygetstring (FACE *face, ustring bufres)
{
  static Buffer buffer; /* = { UNULL, UNULL, 0, 0};*/
  /* are we currently in a sequence ? */
  static int in_sequence = FALSE; 
  static int return_to_courier = FALSE;
  /* face in a sequence, and closing symbol's face */
  static matching_sequence * currentSequence = NULL;

  int c;
  
  if (isEmptyBuffer(buffer)) {
    getBuffer(&buffer, encodings[job->encoding].new_line);
    /* On more line read */
    job->jobs->linenum++;

    if (buffer.size == -1) {
      /* end of file: reset values */
      /* If this is a new file, it must not depend on the trailling
       * parameters of the previous file */
      in_sequence = FALSE;
      return_to_courier = FALSE;
      return EOF;
    }
  }
  
  if (theStyle != plain_style) {
    if (return_to_courier) {
      return_to_courier = FALSE;
      *face = PLAIN;
    }
    if (in_sequence) {
      *face = currentSequence->in_font;
      /* verbatim: not converted _anywhere_ */
      if (is_verbatim (&buffer, bufres))
	return IS_STRING;
      /* escape: not converted when in a sequence */
      if (is_escape (&buffer, bufres))
	return IS_STRING;
      /* end of sequence ? */
      if (is_sequence_end (&buffer, face, currentSequence, bufres))
	{
	  in_sequence = FALSE;
	  return_to_courier = TRUE;
	  return IS_STRING;
	}
    } else { /* (not in sequence) */
      /* whether we are in a string or not, verbatims are copied */
      if (is_verbatim (&buffer, bufres))
	return IS_STRING;
      if (is_sequence_begin (&buffer, face, &currentSequence, bufres))
	{
	  in_sequence = TRUE;
	  return IS_STRING;
	}
      else if (inAlphabet1[buffer.content[buffer.curr]]) {  
	/* we are in a word since this was a char belonging to the
	 * first alphabet */
	if (translateSymbols && is_regular (&buffer, face, bufres)) {
	  return_to_courier = TRUE;
	  return IS_STRING;
	} else if (is_keyword (&buffer, face, bufres)) {
	  return_to_courier = TRUE;
	  return IS_STRING;
	} else if (translateSymbols && is_special (&buffer, face, bufres)) {
	  return_to_courier = TRUE;
	  return IS_STRING;
	} else {
	  /* since some characters may be used inside an identifier
	   * (eg, x' = x in claire) but can also be used to open
	   * a special sequence (eg, 'x' in claire), then we must read
	   * the whole word, and print in.
	   */
	  read_word (&buffer, face, bufres);
	  return IS_STRING;
	}
      } else if (translateSymbols && is_special (&buffer, face, bufres)) {
	return_to_courier = TRUE;
	return IS_STRING;
      } else
	*face = PLAIN;
    }
    return buffer.content[(buffer.curr)++];
  } else {
    /* This is plain style */
    *face = PLAIN;
    c = buffer.content[(buffer.curr)++];
    
    /* Check if it is a special nroff'ed sequence */
    if (buffer.content[buffer.curr] == '\b')
      {
	/* We might be dealing with misceleanous nroff'ed pages */
	  /* 1. This might be a bolding sequence.  The bad news
	   *    is that some strange systems build the bold
	   *    sequences with only one rewriting, not the 3
	   *    usuals.
	   */
	/* Super strong `_', seen in fucking Sun's mpeg_rc doc */
	if (c	== buffer.content[buffer.curr+1] &&
	    '\b'	== buffer.content[buffer.curr+2] &&
	    c	== buffer.content[buffer.curr+3] &&
	    '\b'	== buffer.content[buffer.curr+4] &&
	    c	== buffer.content[buffer.curr+5] &&
	    '\b'	== buffer.content[buffer.curr+6] &&
	    c	== buffer.content[buffer.curr+7])
	  {
	    *face = LABEL_STRONG;
	    buffer.curr += 8;
	  }
	else if (c	== buffer.content[buffer.curr+1] &&
		 '\b'	== buffer.content[buffer.curr+2] &&
		 c	== buffer.content[buffer.curr+3] &&
		 '\b'	== buffer.content[buffer.curr+4] &&
		 c	== buffer.content[buffer.curr+5])
	  {
	    *face = KEYWORD_STRONG;
	    buffer.curr += 6;
	  }
	else if  (c 	== buffer.content[buffer.curr+1] &&
		  '\b'	== buffer.content[buffer.curr+2] &&
		  c	== buffer.content[buffer.curr+3])
	  {
	    *face = KEYWORD_STRONG;
	    buffer.curr += 4;
	  }
	else if  (c 	== buffer.content[buffer.curr+1])
	  {
	    *face = KEYWORD_STRONG;
	    buffer.curr += 2;
	  } 
	/* 2. If C is `_', then set font to italic and move to
	 *    next char */
	else if (c == '_') 
	  {
	    char c2 = buffer.content[buffer.curr+1];
	    /* 7. Winner of the cup pain in the ass: mpeg_rc, from 
	     *    Sun, where it tries both to underline, and to boldize */
	    if	('\b'	== buffer.content[buffer.curr+2] &&
		 c2	== buffer.content[buffer.curr+3] &&
		 '\b'	== buffer.content[buffer.curr+4] &&
		 c2	== buffer.content[buffer.curr+5] &&
		 '\b'	== buffer.content[buffer.curr+6] &&
		 c2	== buffer.content[buffer.curr+7])
	      {
		*face = LABEL_STRONG;
		c = c2;
		buffer.curr += 8;
	      }	    
	    else
	      {
		*face = KEYWORD;
		c = buffer.content[buffer.curr + 1];
		buffer.curr += 2;
	      }
	  }
	/* 3. Seen in gcc.1: o;\b;+, seen in cccp.1: +;\b;o
	 *    to have an itemizing symbol.  Let use our symbol
	 *    power :) 
	 */
	else if ((c == 'o' &&
		  '+' == buffer.content[buffer.curr+1])
		 || (c == '+' &&
		     'o' == buffer.content[buffer.curr+1]))
	  {
	    *face = SYMBOL;
	    buffer.curr += 2;
	    c = 0305; /* \oplus in LaTeX */
	  }
	/* 4. Seen in groff.1 : c;\b;O, for copyright */
	else if (c == 'c' &&
		 'O' == buffer.content[buffer.curr+1])
	  {
	    *face = SYMBOL;
	    buffer.curr += 2;
	    c = 0343; /* \copyright in LaTeX */
	  }
	/* 5. Seen in gtroff.1 : +;\b;_, for plus or minus */
	else if (c == '+' &&
		 '_' == buffer.content[buffer.curr+1])
	  {
	    *face = SYMBOL;
	    buffer.curr += 2;
	    c = 0261;
	  }
	/* 6. Seen in geqn.1 : ~>_ for greater or equal */
	else if (ustrprefix ("~\b>\b_", buffer.content + buffer.curr - 1))
	  {
	    *face = SYMBOL;
	    buffer.curr += 4;
	    c = 0263;
	  }
	else if (ustrprefix ("~\b<\b_", buffer.content + buffer.curr - 1))
	  {
	    *face = SYMBOL;
	    buffer.curr += 4;
	    c = 0243;
	  }
	/* Last. In some case, headers or footers too big, nroff
	 *       backslashes so that both chars. are superimposed.
	 *       We decided to keep only the first one
	 */
	else if (((job->jobs->linenum + 3) % 66 == 0)
		 || ((job->jobs->linenum - 4) % 66 == 0))
	  {
	    buffer.curr += 2;
	  }
	/* else: treate the backslash as a special characters */
      }
    return c;
  }
}

/*
 * read a new char of input. 
 */
void
print_postscript (print_job * job, STYLE lang)
{
  FACE face = PLAIN;
  FACE new_face = PLAIN;
  FACE true_face = PLAIN;
  /* result of mygetc when not char */
  uchar bufres[512];
  /* To grab the encoding switching instruction */
  uchar bufenc[512];
  int grabbing_encoding = FALSE;

  int c=0;
  int total_sheets;

  *bufres = NUL;
  *bufenc = NUL;
  
  while ((c = mygetstring(&new_face, bufres)) != EOF) {
    /* Is a new face ? */
    if (face != new_face) {
      /* Reset dynamic markers */
      if (new_face & TAG1)
	*job->tag1 = NUL;
      if (new_face & TAG2)
	*job->tag2 = NUL;
      if (new_face & TAG3)
	*job->tag3 = NUL;
      if (new_face & TAG4)
	*job->tag4 = NUL;
      if (grabbing_encoding && (!(new_face & ENCODING_TAG))) {
	/* Grabbing of the encoding name is completed */
	ENCODING newenc = select_encoding ((char *) bufenc);
	if (newenc == NOENCODING)
	  error (0, 0, _("unknown encoding: `%s', ignored"), bufenc);
	else
	  ps_switch_encoding (job, newenc);
	* bufenc = NUL;
	grabbing_encoding = FALSE;
      }
      face = new_face;
      true_face = face & FACE_MASK;
    }
    /* Grab markers if there are */
    switch (c) {
    case IS_STRING:
      if (face & TAG1)
	ustrcat(job->tag1, bufres);
      if (face & TAG2)
	ustrcat(job->tag2, bufres);
      if (face & TAG3)
	ustrcat(job->tag3, bufres);
      if (face & TAG4)
	ustrcat(job->tag4, bufres);
      break;
    default:
      if (face & TAG1)
	USTRCCAT(job->tag1, c);
      if (face & TAG2)
	USTRCCAT(job->tag2, c);
      if (face & TAG3)
	USTRCCAT(job->tag3, c);
      if (face & TAG4)
	USTRCCAT(job->tag4, c);
      /* Grab the dynamic encodings */
      if (face & ENCODING_TAG) {
	grabbing_encoding = TRUE;
	USTRCCAT(bufenc, c);
      }
      break;
    } 
  
    /* if the face is INVISIBLE, forget everything */
    /* if strip, skip (strong_)comments */
    /* FIXME: A better scheme, with just masks should be possible */
    if ((face & INVISIBLE)
	|| ((job->a2ps_stat->strip == 1 || job->a2ps_stat->strip == 3)
	    && face == COMMENT)
	|| ((job->a2ps_stat->strip == 2 || job->a2ps_stat->strip == 3)
	    && face == COMMENT_STRONG))
      continue;
    
    /* Return what's new */
    switch (c) {
    case IS_STRING:
      ps_print_string (job, bufres, true_face);
      break;
    default:
      ps_print_char (job, c, true_face);
      break;
    }
  }
  ps_end_file (job);
  
  /* Print the number of pages and sheets printed */
  total_sheets = job->jobs->sheets;
  if (job->rectoverso)
    total_sheets = (total_sheets + 1) / 2;
  if (job->jobs->pages == 1) 
    /* 1 page on 1 sheet */
    message (0, _("[%s(%s): 1 page on 1 sheet]\n"), 
	     job->jobs->name,
	     lang == plain_style ? _("plain") : languages[lang].name);
  else if (total_sheets == 1)
    /* several pages on 1 sheet */
    message (0, _("[%s(%s): %d pages on 1 sheet]\n"), job->jobs->name,
	     lang == plain_style ? _("plain") : languages[lang].name,
	     job->jobs->pages);
  else
    /* several sheets */
    message (0, _("[%s(%s): %d pages on %d sheets]\n"), job->jobs->name,
	     lang == plain_style ? _("plain") : languages[lang].name,
	     job->jobs->pages, total_sheets);
}
