/*
 * utilsty.c
 *
 * Some routines to deal with style sheets
 * Copyright (c) 1988, 89, 90, 91, 92, 93 Miguel Santana
 * Copyright (c) 1995, 96, 97 Akim Demaille, Miguel Santana
 * 
 */

/*
 * This file is part of a2ps.
 * 
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2, or (at your option)
 * any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; see the file COPYING.  If not, write to
 * the Free Software Foundation, 59 Temple Place - Suite 330,
 * Boston, MA 02111-1307, USA.
 */

/*
 * $Id: utilsty.c,v 1.1.2.7 1997/06/26 09:17:14 demaille Exp $
 */
#include "a2ps.h"
#include "styles.h"
#include "utilsty.h"
#include "routines.h"

/*
 * From readpipe.c
 */
FILE * readpipe PARAMS ((char *progname, ...));

/************************************************************************/
/*				style selection				*/
/************************************************************************/
/*
 * Return the index of the language which has STRING as name or abbrev.
 * Return unknown_style if none
 */
static STYLE
select_style_name (char *string)
{
  STYLE lang;

  for (lang=0 ; lang < nbr_languages ; lang++) {
    if (strcaseequ(string, languages[lang].name))
      return lang;
  }
  return unknown_style;
}


STYLE
guess_with_rules (print_job * job, char * filename)
{
  STYLE res;
  res = select_style_name (get_style_sheet_name (job, filename));
  return res;
}

static STYLE
get_style (char *string)
{
  STYLE lang;
  const char ** abbr;

  for (lang=0 ; lang < nbr_languages ; lang++) {
    if (strequ (string, languages[lang].name))
      return lang;
    for (abbr = languages[lang].abbreviations ; **abbr ; abbr++)
      if (!ustrcmp (string, *abbr))
	return lang;
  }
  return unknown_style;
}

STYLE
select_style (char * string)
{
  STYLE res;
  res = get_style (string);
  if (res == unknown_style)
    return plain_style;
  else
    return res;
}
  
/*
 * Given a filename, guess its language from the prefix
 */
STYLE
guess_language (print_job * job, char *filename)
{
  char * last_dot=NULL;
  STYLE lang;

  /*
   * 1. The user's rules
   */
  if ((lang = guess_with_rules (job, filename)) != unknown_style)
    return lang;

  /* 
   * 2. Builtin rules
   */
  /* This is bad: if there are dots in the path and not in the name ;) 
   * Fixed in a2ps next generation... */
  if ((last_dot=strrchr(filename, '.')) != NULL)
    if ((lang = get_style (last_dot+1)) != unknown_style)
      return lang;

  /*
   * 3. What file(1) says
   */
#ifdef FILE_LINK
  {
    char * file_command;
    char res[512];
    char * cp;
    char field1[512], field2[512];
    FILE *ptr;
    
    /* first, try to guess using file */
    file_command = xstrdup (FILE_LINK);
    if ((cp = strchr (file_command, ' '))) {
      *cp++ = NUL;
      message (4, "Reading pipe: `%s' `%s' `%s'\n",
	       file_command, cp, filename);
      ptr = readpipe (file_command, cp, filename, NULL);
    } else {
      message (4, "Reading pipe: `%s' `%s'\n",
	       file_command, filename);
      ptr = readpipe (file_command, filename, NULL);
    }
    XFREE (file_command);
    if (ptr != NULL) {
      fgets (res, sizeof (res), ptr);
      fclose (ptr);
      message (4, "File on %s says `%s'\n", filename, res);
      /* the two first fields are of interest */
      if (sscanf(res, "%*[^:]: %s%s", field1, field2) == 2) {
	message (4, "The fields are `%s' and `%s'\n", field1, field2);
	
	/* first, if field1 is "executable" or "a", then the
	 * name is in the 2nd field */
	if (strequ (field1, "executable") || strequ (field1, "a"))
	  {
	    if (strrchr (field2, DIR_SEP)) {
	      lang = get_style (strrchr(field2, DIR_SEP)+1);
	      if (lang != unknown_style)
		return lang;
	    } else if ((lang = get_style (field2)) != unknown_style)
	      return lang;
	  }
	
	/* second, if the first field starts by `/' (DIR_SEP)
	 * then, the name should be after the last `/' in field1 */
	if (field1[0] == DIR_SEP)
	  {
	    if (strrchr (field1, DIR_SEP)) {
	      lang = get_style (strrchr(field1, DIR_SEP)+1);
	      if (lang != unknown_style)
		return lang;
	    } else if ((lang = get_style (field2)) != unknown_style)
	      return lang;
	  }
	
	/* Last, the first field may be the name of the language */
	if (!strcaseequ (field1, "c")) {
	  /* never trust file when it says it's a c/C program */
	  /* but try for the others */
	  if ((lang = get_style (field1)) != unknown_style)
	    return lang;
	}
      }
    } else
      if (message_verbosity >= 4)
	error (0, errno, "file failed\n");
  }
#endif
  
  /*
   * 4.  Heck!  Let's say this is just plain text :(
   */
  return plain_style;
}

/* For debugging */
void
dprintkw(char * msg, keyword * list)
{
  int index = 0;
  fprintf (stderr, "++++++++++++++++++++++ %s\n", msg);
  while (list->theKeyword)
    fprintf(stderr, "keyword %2d: '%s'\n", index++, (list++)->theKeyword);
  fprintf(stderr, "Fin %s --------\n\n", msg);
}

void
dprintseq(char * msg, matching_sequence * list)
{
  int index = 0;
  int i;
  
  fprintf (stderr, "++++++++++++++++++++++ %s\n", msg);
  while (list->open.string) {
    fprintf(stderr, "seq %2d: '%s->'", 
	    index++, list->open.string);
    for (i = 0; i < list->nb; i++)
      fprintf (stderr, " `%s%s(%d)' ", 
	       list->close[i].string, 
	       list->close[i].at_end ? "\\n" : "",
	       list->close[i].font);
    fprintf (stderr, "\n");
    list ++;
  }
  
  fprintf(stderr, "Fin %s --------\n\n", msg);
}

void
dprintsym(char * msg, symbol * list)
{
  fprintf (stderr, "++++++++++++++++++++++ %s\n", msg);
  while (list->theKeyword)
    fprintf(stderr, "keyword : '%s'\n", (list++)->theKeyword);
  fprintf(stderr, "Fin %s --------\n\n", msg);
}

/************************************************************************/
/*			Routines on the keywords			*/
/************************************************************************/
/*
 * Routines on the keywords
 */
int
keywordCmp(const void * k1, const void * k2)
{
  return (ustrcmp(((keyword *) k1)->theKeyword, 
		  ((keyword *) k2)->theKeyword));
}

/*
 * Number of keywords, guard ({UNULL, 0}) excluded
 */
int 
count_keywords(STYLE lang)
{
  const keyword * array=languages[lang].keywords;
  int count = 0;

  while (array[count].theKeyword)
    count ++;
  return count;
}

/*
 * Sort the keywords, and return their number
 */
int
sort_keywords(keyword ** dest, STYLE style, int min[256], int max[256])
{
  int num, i;
  
  XFREE(*dest);

  num = count_keywords (style);
  *dest = (keyword *) xmalloc (sizeof(keyword) * (num + 1));
  for (i = 0 ; i <= num ; i++) {
    (*dest)[i].theKeyword = languages[style].keywords[i].theKeyword;
    (*dest)[i].theFont    = languages[style].keywords[i].theFont;
  }
  qsort(*dest, num, sizeof(keyword), keywordCmp);
  if (!min) return num;

  /* Build two arrays which contains for each letter, its first
   * and last occurence index in (*DEST) array */
  for (i = 0 ; i < num ; i++) {
    if (min [(*dest)[i].theKeyword[0]] == -1)
      min [(*dest)[i].theKeyword[0]] = i;
    max [(*dest)[i].theKeyword[0]] = i;
  }

  return num;
}

/************************************************************************/
/*			Routines on the symbols				*/
/************************************************************************/
/*
 * Comparison routines
 */
int
symbolCmp(const void * k1, const void * k2)
{
  return (ustrcmp(((symbol *) k1)->theKeyword, 
		  ((symbol *) k2)->theKeyword));
}

/*
 * Number of symbols, guard ({UNULL, UNULL, 0}) excluded
 */
int
count_symbols (const symbol * array)
{
  int count=0;

  while (array->theKeyword) {
    array ++;
    count ++;
  }
  return count;
}

/*
 * Sort the symbols, and return their number
 */
static int
sort_symbols (symbol ** dest, const symbol * src, int min[256], int max[256])
{
  int num = 0,i;

  XFREE (*dest);

  num = count_symbols(src);
  *dest = (symbol *) xmalloc (sizeof(symbol) * (num + 1));
  for (i = 0 ; i <= num ; i++) {
    (*dest)[i].theKeyword = src[i].theKeyword;
    (*dest)[i].theSymbol  = src[i].theSymbol;
    (*dest)[i].theFont    = src[i].theFont;
  }

  qsort((*dest), num, sizeof(symbol), symbolCmp);
  if (!min) return num;

  /* Build two arrays which contains for each letter, its first
   * and last occurence index in (*DEST) array */
  for (i = 0 ; i < num ; i++) {
    if (min [(*dest)[i].theKeyword[0]] == -1)
      min [(*dest)[i].theKeyword[0]] = i;
    max [(*dest)[i].theKeyword[0]] = i;
  }

  return num;
}

/*
 * Exported calls to sort the regular and special symbols
 */
int
sort_regulars (symbol ** dest, STYLE style, int min[256], int max[256])
{
  return sort_symbols(dest, languages[style].regulars, min, max);
}

int
sort_specials (symbol ** dest, STYLE style, int min[256], int max[256])
{
  return sort_symbols(dest, languages[style].specials, min, max);
}

/************************************************************************/
/*			Routines on the sequences			*/
/************************************************************************/
/*
 * Compare two sequences
 * First on the opening sequence, then on the closing
 */
int
sequence_cmp (const void *s1, const void *s2)
{
  int res;
  matching_sequence* S1 = (matching_sequence *) s1;
  matching_sequence* S2 = (matching_sequence *) s2;

  res = ustrcmp (S1->open.string, S2->open.string);
  if (res == 0)
    return ustrcmp (S1->close[0].string, S2->close[0].string);
  else
    return res;
}

/*
 * Number of sequences, guard ({UNULL, UNULL, 0}) excluded
 */
int
count_sequences (const sequence * array)
{
  int count=0;

  while (array->theOpening) {
    array ++;
    count ++;
  }
  return count;
}

matching_string
to_matching_string (ustring pattern, FACE font)
{
  int last;
  matching_string res;

  /* Should it be at start of a line? */
  switch (pattern[0]) {
  case '^':
    res.at_start = TRUE;
    res.string = xustrdup (pattern + 1);
    break;
  case '\\':
    /* When a pattern starts by a `\', just forget it */
    res.at_start = FALSE;
    res.string = xustrdup (pattern + 1);
    break;
  default:
    res.at_start = FALSE;
    res.string = xustrdup (pattern);
  }

  /* Should it be at the eol? */
  last = ustrlen (res.string) - 1;
  switch (res.string [last]) {
  case '$':
    res.at_end = TRUE;
    res.string [last] = '\0';
    break;
  case '\\':
    /* When the marker finishes with a `\', just forget it */
    res.at_end = FALSE;
    res.string [last] = '\0';
  default:
    res.at_end = FALSE;
  }

  /* The font */
  res.font = font;
  return res;
}

/*
 * Prepare the matching for in sequences
 */
int 
set_sequences (matching_sequence ** dest, STYLE style)
{
  int num = 0;
  int i, j, k;
  const sequence * seq = languages[style].sequences;

  XFREE (*dest);
  
  num = count_sequences (seq);
  
  *dest = ALLOC (matching_sequence, num + 1);
  
  for (i = 0 ; i < num ; i++) {
    /* 1. Opening part */
    (*dest)[i].open = to_matching_string (seq[i].theOpening, 
					  seq[i].theOpeningFont);
    /* 2. inside part */
    (*dest)[i].in_font = seq[i].theInsideFont;
    
    /* 3. Closing part */
    (*dest)[i].nb = 1;
    (*dest)[i].close = ALLOC (matching_string, 1);
    (*dest)[i].close[0] = to_matching_string (seq[i].theClosing,
					      seq[i].theClosingFont);
  }

  (*dest)[i].open.string = UNULL;
  
  /* We do not want to sort because it takes more time to
   * treat the exceptions at run-time (don't forget that the
   * longest match should always be taken) than plain search
   qsort (*dest, num, sizeof (matching_sequence), sequence_cmp);
   */
  
  /* Group the sequences with the same opening.
   * Note that they are supposed to be grouped together. */
  for (i = 0 ; i < num ; i++) {
    for (j = i ; 
	 j < num && ustrequ ((*dest)[i].open.string, (*dest)[j].open.string) ;
	 j++)
      /* skip */;
    if (j > i + 1) {
      /* Several closing alternatives */
      (*dest)[i].nb = j - i;
      (*dest)[i].close = 
	REALLOC ((*dest)[i].close, matching_string, (*dest)[i].nb);
      for (k = 0 ; k < (*dest)[i].nb ; k++)
	(*dest)[i].close[k] = (*dest)[i + k].close[0];
      /* shrink the result array */
      for (k = i + (*dest)[i].nb ; k < num ; k++)
	(*dest)[k - (*dest)[i].nb + 1] = (*dest)[k];
      num -= (*dest)[i].nb - 1;
    }
  }
  
  (*dest)[i].open.string = UNULL;
  
/*  dprintseq ("Oooo\n", *dest);*/
  
  return num;
}
