/*
This program is totally free to use, modify and copy as you wish.
I wrote it just to read some manuals which ps2ascii and ps2txt
just didn't managed to filter in a nice way.

Anyhow both ps2txt and ps2ascii is included so you can try them out.


I have developed this program in Linux

(a GREATE OS, Thank you Linus & CO)

using GCC  2.3.3 and the switches was -Wall -ansi -O2 and -m486,
it's also compiling in MSDOS and TC 3.0 without any problems.


General:
  ps2term.c is a utility that strips a PostScript (.PS) file from all
  graphic stuff and prints the result (ASCII) on stdout.

  I began with hacking in the ps2txt-2.1.c file, but when just the
  original main() function was left, i decided to make it as a whole
  new program.

  This program is NOT a regular interpreter of PostScript, it's just
  extract textstrings from an PostScriptfile and at the same time it
  try's to figure out how to format the text, this is done by looking
  for certain information in the input file, the program has a number
  of switches that selects what to search for and what todo when thats
  is the case.

Usage:
  ps2term [-1p] [-2 | -2- | -2s] [-b- | -bp] [-sSTRING] [-?] [infile.ps]

  The first time you want to filter an PS file just try ps2term without
  any switches, if this works fine then theres nothing to it.

  Else if there is a lot of parantheses in the output, try the -1p
  switch, or if the outtext has strange linefeeds or miss some spaces
  between words, use -2, -2s or -2-.

  If not this fix your problem with line feeds try the -b- or -bp, in
  worst case use the -xn where n is the the maximal number of character
  allowed in a line in the outtext.

  If not even this fix a nice ASCII text from your PS-file, then you just
  have to write a new program that does, havn't you ??? :)

  If there is some strange text showing up reapeatedly in the outtext try
  to set -s<STRING> where STRING is the beginnig of the "strange" text,
  enclose the STRING with qoutes




swithes and their meaning.
---------------------------------------
  The text-strings in a Postscrip-file are placed within paranthesis,
  text formatting and other stuff is mostly placed outside the
  parantheses. The methods searches for special info outside the
  paranthesis.

 -?
   Print a short usage info on stdout.

 -2
   Search for a sequence of 2 numbers,if found and  if one of these
   numbers is greater than the numbers found in previus search, do a
   linefeed else put a space.
   Some PS files has the pairs of x,y cordinates embedded, when using
   this switch the program use these pairs to detect where to place
   linefeeds.

 -2s
   As -2 but both numbers must be greater than the numbers from the
   previus search.

 -2-
   As -2 but one of the numbers must be less than the numbers in the
   previus search. It seems as some PS files is built bottom - up,
   this switch is good for such files.


 -1p
   Just process 1 paranthes level at a time, the defalt is to process
   from found left paranthes to the last paranthes in the line.
   When this program detect a left paranthes it start to search for
   the last right paranthes on the line ( and even next line if no one
   is found) and treats all contents behind them as a text string, but
   some PS files has font info placed inbetween them and you have to
   inspect each pair of paranthesis if its text or not, this switch
   does just that.

 -b-
   Always break a line if an character is followed by a '-'. If this
   program doesn't recognize a break at end of a line this switch
   will force a break when this occures.


 -bp
   Always break a line when a  period '.' occurs.

 -sSTRING
   Supress, do not include text in parantheses that begins with STRING.

 -xn
   Force a linefeed at at most n characters in a line. If there is any
   space in the intervall n-7 and n then the linefeed will be replace
   that space else a '-' is placed after the last letter.

 If no filename is given when ps2term is started ps2term will use stdin,
 stdout is always the outfile.

 4-May-1993

 su92ban@nada.kth.se

 Bengt Andersson
 Cardellgatan 4
 112 36 Stockholm, Sweden
*/


#include <stdio.h>
#include <ctype.h>
#include <string.h>



#define  TRUE     1
#define  FALSE    0
#define MAX_ROW   25		/* number of line in a page */


#define NUM       1
#define SPAR      2
#define STRICT    4
#define REV       8
#define SUP       16
#define PER       32
#define MIN       64

#define BUF_SIZE  132



void parse (FILE *, int, char *);
int parse_par (char *, int);
void newpage (int);
void put_ch (char);

int max_len = 0;



int
main (int argc, char *argv[])
{
  int i;
  int known_flag;
  FILE *ps_file = NULL;
  int method = 0;		/* used as flags rather than number */
  char sup_str[BUF_SIZE];	/* The place where the supress string goes */


  for (i = 1; i < argc; i++)	/* parse command line args */
    {
      known_flag = FALSE;
      if (strcmp (argv[i], "-2") == FALSE)
	{
	  method += NUM;
	  known_flag = TRUE;
	}
      else if (strcmp (argv[i], "-2s") == FALSE)
	{
	  method += NUM + STRICT;
	  known_flag = TRUE;
	}
      else if (strcmp (argv[i], "-1p") == FALSE)
	{
	  method += SPAR;
	  known_flag = TRUE;
	}
      else if (strcmp (argv[i], "-bp") == FALSE)
	{
	  method += PER;
	  known_flag = TRUE;
	}
      else if (strcmp (argv[i], "-b-") == FALSE)
	{
	  method += MIN;
	  known_flag = TRUE;
	}
      else if (strcmp (argv[i], "-2-") == FALSE)
	{
	  method += REV + NUM;
	  known_flag = TRUE;
	}
      else if (strstr (argv[i], "-s") != NULL)
	{
	  sscanf (argv[i] + 2, "%s", sup_str);
	  method += SUP;
	  known_flag = TRUE;
	}
      else if (strstr (argv[i], "-x") != NULL)
	{
	  sscanf (argv[i] + 2, "%d", &max_len);
	  known_flag = TRUE;
	}

      else if (strstr (argv[i], "-?") != NULL)
	{
	  puts ("usage:\tps2term [-1p] [-2 | -2- | -2s]" \
		" [-b- | -bp] [-sSTRING] [-?] [infile.ps]");
          exit (1);
	}
      else
	ps_file = fopen (argv[i], "r");
    }
  if (ps_file == NULL)
    ps_file = stdin;
  parse (ps_file, method, sup_str);	/* go on and doit */
  return 0;
}



int
parse_par (char *str, int brk)
{
  static int prev_ch;
  int ch, ch_num, x = 0;
  int row_count = 0;
  while ((ch = str[x]) != '\0')
    {
      switch (ch)
	{
	case '\\':
	  if ((ch = str[++x]) == '\0')
	    return row_count;
	  else if (isdigit (ch))/* this is a macro number */
	    {
	      ch_num = str[x] - '0';
	      while (isdigit (str[++x]))
		ch_num = ch_num * 10 + str[x] - '0';

	      switch (ch_num)
		{
		case 13:
		  parse_par ("ff", brk);
		  break;

		case 16:
		  parse_par ("ffl", brk);
		  break;


		case 14:
		case 214:
		case 256:
		  parse_par ("fi", brk);
		  break;

		case 15:
		case 215:
		case 257:
		  parse_par ("fl", brk);
		  break;

		case 267:
		  parse_par (" * ", brk);
		  break;

		case 17:
		case 31:
		case 45:
		case 50:
		case 51:
		case 212:
		case 252:
		case 261:
		case 262:
		case 263:
		case 272:

		  parse_par ("  ", brk);
		  break;


		case 320:
		  put_ch ('-');
		  break;

		case 325:
		  put_ch ('\'');
		  break;


		case 343:
		  put_ch ('@');	/* it should really be the copyright sign */
		  break;

		case 245:
		  put_ch ('\n');
		  break;




		default:
		  break;
		}
	    }
	  else
	    switch (ch)
	      {
	      case '(':
	      case ')':
		put_ch (ch);
		x++;
		break;		/* from \? */

	      case 't':
		put_ch ('\t');
		x++;
		break;		/* write a tab */

	      case 'n':
		put_ch ('\n');
		row_count++;
		x++;
		break;		/* write a <cr> & update row_counter */

	      case '\\':
		put_ch ('"');
		x++;
		break;		/* open quotes */

	      default:
		put_ch (ch);
		break;
	      }
	  break;		/* for case '/' */

	case '-':
	  if (isalpha (prev_ch) && brk & MIN)
	    {
	      put_ch (ch);
	      put_ch ('\n');
	      x++;
	      row_count++;
	      break;
	    }

	case '.':
	  if (brk & PER)
	    {
	      put_ch (ch);
	      put_ch ('\n');
	      x++;
	      row_count++;
	      break;
	    }

	default:
	  put_ch (ch);
	  x++;
	  break;
	}
      prev_ch = ch;
    }
  return row_count;
}

void 
put_ch (char ch)
{
  static int ch_count;
  static char last_ch;
  /* Avoid a lot of Linefeeds */

  if (ch != '\n' || last_ch != '\n')
    putchar (ch);

/* if forced linefeed, try to make it at a space at 5 positions before
   max_len, else put a '-' at the end of line */

  if (max_len != 0 && ch_count >= max_len - 5)
    {
      if (isspace (ch) || ch_count >= max_len)
	{
	  if (isalpha (ch))
	    putchar ('-');
	  putchar ('\n');
	  ch_count = 0;
	}
    }

  else if (ch == '\n')
    ch_count = 0;
  last_ch = ch;

  ch_count++;
}


void
newpage (int rows)
{
  int i;
  for (i = rows; i < MAX_ROW; i++)
    putchar ('\n');
  put_ch ('\n');
}


void
/* read line by line from ps_file */
parse (FILE * ps_file, int method, char *sup_str)
{
  int ch;			/* current character */
  int row_count = 0;		/* keep track on number of rows in a page */
  int i, index;
  int last_paren = 0;
  int tmp;
  int brk_line = FALSE;

  char line_buf[BUF_SIZE];	/* place to throw away comment lines */
  char *sub_buf;
  float tmp1[2], tmp2[2];



  /* first strip of the prolog part of the file */
  while (fgets (line_buf, BUF_SIZE, ps_file) != (char) NULL &&
	 strstr (line_buf, "EndProlog") == (char) NULL &&
	 strstr (line_buf, "end of header") == (char) NULL)
    ;

  while (fgets (line_buf, BUF_SIZE, ps_file) != (char) NULL)
    {
      index = 0;

      while ((ch = line_buf[index++]) != '\0' && index < BUF_SIZE)
	{
	  if (index == 1 && brk_line)
	    {
	      ch = '(';
	      brk_line = FALSE;
	      index = 0;
	    }

	  if (method & NUM)
	    if (isdigit (ch))
	      {
		sub_buf = line_buf + index - 1;
		tmp1[1] = tmp1[0];
		tmp2[1] = tmp2[0];
		i = sscanf (sub_buf, "%f%f", &tmp1[0], &tmp2[0]);
		if (i == 2)
		  {
		    while (isdigit (ch = line_buf[index++]) || ch == '.')
		      ;
		    index++;
		    while (isdigit (ch = line_buf[index++]) || ch == '.')
		      ;

		    if (method & REV)
		      {
			if (tmp1[0] < tmp1[1] || tmp2[0] < tmp2[1])
			  put_ch ('\n');
			else
			  put_ch (' ');
		      }
		    else if (method & STRICT)
		      {
			if (tmp1[0] > tmp1[1] && tmp2[0] > tmp2[1])
			  put_ch ('\n');
			else
			  put_ch (' ');
		      }
		    else
		      {
			if (tmp1[0] > tmp1[1] || tmp2[0] > tmp2[1])
			  put_ch ('\n');
			else
			  put_ch (' ');
		      }
		  }
	      }

	  switch (ch)
	    {
	    case ' ':
	    case '\n':		/* ignore newlines in ps_file */
	      break;

	    case '/':		/* ignore definitions on a line */
	      break;

	    case '%':	/* it seems as a new page is mentioned in comments */
	      if (strstr (line_buf, "%Page:") != (char) NULL)
		newpage (row_count);
	      index = BUF_SIZE;
	      break;


	    case '{':
	      while (line_buf[index++] != '\0')
		{
		  if (line_buf[index] == '(')
		    {
		      i = index;
		      while (line_buf[index] != ')' &&
			     line_buf[index] != '\0')
			index++;
		      if (line_buf[index] == ')')
			{
			  line_buf[index++] = '\0';
			  sub_buf = line_buf + i + 1;
			  row_count += parse_par (sub_buf, method);
			}
		    }
		}
	      break;


	    case '(':
	      i = index;
	      if (method & SUP && strstr (line_buf + index, sup_str) != NULL)
		while (line_buf[index++] != ')')
		  ;
	      else
		{
		  tmp = 0;

		  while (line_buf[index++] != '\0')
		    if (line_buf[index] == ')' && line_buf[index - 1] != '\\')
		      {
			last_paren = index;
			if (method & SPAR)
			  break;
		      }
		    else
		      tmp += (line_buf[index - 1] == line_buf[index]) ? 1 : 0;
		  if (last_paren == 0)
		    {
		      brk_line = TRUE;
		      line_buf[index - 2] = '\0';
		    }
		  else
		    {
		      index = last_paren;
		      line_buf[index++] = '\0';
		    }
		  last_paren = 0;

		  if (tmp < 12)
		    {
		      sub_buf = line_buf + i;
		      row_count += parse_par (sub_buf, method);
		    }
		  else
		    {
		      for (i = 0; i <= tmp; i++)
			put_ch ('-');
		      put_ch ('\n');
		      ch = ' ';
		    }
		}
	      break;

	    case 'y':
	    case 'Y':
	    case 'V':
	    case 'v':
	    case 'N':
	    case 'Q':
	      if (!(method & NUM))
		{
		  put_ch ('\n');
		  row_count++;
		}
	      break;

	    case 'x':
	    case 'X':
	    case 'S':
	    case 'g':
	    case 'p':
	    case 'i':
	    case 'e':
	    case 'd':
	    case 'b':
	    case 'f':
	    case 'h':
	      if (!(method & NUM) || method & SPAR)
		put_ch (' ');
	      break;

	    default:
	      break;

	    }
	}
    }
}
