/* Output from p2c, the Pascal-to-C translator */
/* From input file "bigpas.p" */


/* CS 310 - Data Structures - Big Program #1 - By Matt Martini */

#include <p2c/p2c.h>


#define maxstrg         20   /*maximum word length*/

#define minthreshold    0.5   /*alternate 0.75*/


typedef Char word[maxstrg + 1];
   /*a word of text*/

/*record pointer*/

typedef struct wordptr {
  word txt;   /*a word of text*/
  long count;   /*frequency of word*/
  /*link to next word*/
  struct wordptr *nextwrd;
} wordptr;

typedef wordptr *alpharray['z' + 1 - 'a'];   /*array of pointers to*/

/*linked list of records*/


Static FILE *infile;   /*input file*/
Static FILE *outfile;   /*output file*/
Static Char filename[21];   /*name of file*/
Static alpharray stop_words;   /*common words*/
Static alpharray go_words;   /*interesting words*/
Static word aword;   /*a word*/
Static wordptr *thiswrd;   /*scratch pointer*/
Static wordptr *prev;   /*scratch pointer before thiswrd*/
Static boolean found;   /*word search successful*/



/*Clear all global variables and arrarys to nil.*/
Static Void INITALIZE()
{
  Char c;

  for (c = 'a'; c <= 'z'; c++) {
    go_words[c - 'a'] = NULL;
    stop_words[c - 'a'] = NULL;
  }
  *aword = '\0';
  prev = NULL;
  thiswrd = NULL;
  found = false;
}  /*initalize*/


/*Return the first letter of word.*/
Static Char FIRST_LET(wrd)
Char *wrd;
{
  Char STR1[256];

  sprintf(STR1, "%.1s", wrd);
  return (*STR1);
}  /*first_let*/


/*Turn an uppercase letter into lowercase.*/
Local Void TRIM_UPPER_CASE(UPch)
Char *UPch;
{
  long big_alpha[9];

  P_addsetr(P_expset(big_alpha, 0L), 'A', 'Z');
      /*if capital - make it small*/
  if (P_inset(*UPch, big_alpha))
    *UPch = _tolower(*UPch);
}  /*trin_upper_case*/


/*Read next word from from infile.*/
Static Void READWORD(infile, token)
FILE *infile;
Char *token;
{
  Char ch;
  Char neword[101];
  long alpha[9];

  P_addsetr(P_expset(alpha, 0L), 'a', 'z');
  P_addsetr(alpha, 'A', 'Z');
  *neword = '\0';
  if (P_eof(infile))   /*don't try to read past end of file*/
    return;
  ch = getc(infile);   /*read the first charachter*/
  if (ch == '\n')
    ch = ' ';
  /*if ch is a delimiter read charaters until a letter is found*/
  while ((!P_eof(infile)) & (!P_inset(ch, alpha))) {
    ch = getc(infile);
    if (ch == '\n')
      ch = ' ';
  }
  /*if ch is a letter read the rest of the word*/
  while ((!P_eof(infile)) & P_inset(ch, alpha)) {
    TRIM_UPPER_CASE(&ch);
    sprintf(neword + strlen(neword), "%c", ch);
    ch = getc(infile);
    if (ch == '\n')
      ch = ' ';
  }
  /*get last letter of last word*/
  if (P_eof(infile) & P_inset(ch, alpha)) {
    TRIM_UPPER_CASE(&ch);
    sprintf(neword + strlen(neword), "%c", ch);
  }
  /*truncate longwords to 20 chars.*/
  if (strlen(neword) <= maxstrg)
    strcpy(token, neword);
  else
    sprintf(token, "%.20s", neword);
}  /*readword*/


/*Insert a record containing token into its alphabetic position*/
/*in the linked list pointed to by which.*/
Static Void INSERT(which, token)
wordptr **which;
Char *token;
{
  Char fl;
  wordptr *pre, *here, *temp;   /*scratch pointers*/

  pre = NULL;   /*initalize variables*/
  here = NULL;
  temp = NULL;
  fl = FIRST_LET(token);   /*get first letter of token*/
  if (which[fl - 'a'] == NULL) {   /*no words in list with this first letter*/
    which[fl - 'a'] = (wordptr *)Malloc(sizeof(wordptr));
	/*make a record for this word*/
    strcpy(which[fl - 'a']->txt, token);
    which[fl - 'a']->count = 1;
    which[fl - 'a']->nextwrd = NULL;
    return;
  }
  here = which[fl - 'a'];
  do {   /*travel list to find correct spot*/
    pre = here;
    here = here->nextwrd;
  } while (here != NULL && strcmp(here->txt, token) <= 0);
  if (!strcmp(pre->txt, token)) {   /*is the word alreay in the list?*/
    pre->count++;   /*yes - update count*/
    return;
  }
  if (strcmp(which[fl - 'a']->txt, token) > 0)
  {   /*should this word be first on the list?*/
    temp = (wordptr *)Malloc(sizeof(wordptr));   /*yes - put it first*/
    strcpy(temp->txt, token);
    temp->count = 1;
    temp->nextwrd = which[fl - 'a'];
    which[fl - 'a'] = temp;
    return;
  }
  temp = (wordptr *)Malloc(sizeof(wordptr));
      /*no - put it in the correct spot*/
  strcpy(temp->txt, token);
  temp->count = 1;
  temp->nextwrd = here;
  pre->nextwrd = temp;

  /*list of words already exists - put word in it*/
  /*no  - put it in the list*/
  /*else*/
}  /*insert*/



/*Find a word in which and returns a pointer to the word and a pointer*/
/*to the word before it (nil if word is the first in the list) and    */
/*a flag to see if it was found.*/
Static Void FINDCELL(which, token, prev, thiswrd, found)
wordptr **which;
Char *token;
wordptr **prev, **thiswrd;
boolean *found;
{
  Char fl;
  wordptr *pre, *here;

  pre = NULL;
  *prev = NULL;   /*initalize variables*/
  here = NULL;
  *thiswrd = NULL;
  *found = false;
  fl = FIRST_LET(token);   /*find the first letter of token*/
  if (which[fl - 'a'] == NULL)   /*don't search if a list with this*/
    return;
  /*first letter doesn't exist*/
  *found = false;
  here = which[fl - 'a'];   /*set to first word in list*/
  if (!strcmp(here->txt, token))   /*is this the token?*/
    *found = true;   /*yes*/
  else {  /*no - look for word*/
    do {   /*travel list to find this word*/
      pre = here;
      here = here->nextwrd;
      if (!strcmp(here->txt, token))
	*found = true;
    } while (!(here == NULL || *found));
  }
  if (*found) {   /*word is found in the list*/
    *prev = pre;   /*set pointers to word and previous word*/
    *thiswrd = here;
  } else {  /*word is not in list*/
    *prev = NULL;   /*return nil*/
    *thiswrd = NULL;
  }  /*found*/
}  /*findcell*/


/*Output all words and their frequencies in alphbetical order.*/
Static Void WRITEWORDS(which)
wordptr **which;
{
  Char c;
  wordptr *whatwrd;

  whatwrd = NULL;
  for (c = 'a'; c <= 'z'; c++) {   /*travel thru array of linked lists*/
    whatwrd = which[c - 'a'];   /*get root record*/
    if (whatwrd != NULL) {
      do {   /*travel thru linked list outputing words*/
	fprintf(outfile, "%3ld  --> %s\n", whatwrd->count, whatwrd->txt);
	whatwrd = whatwrd->nextwrd;
      } while (whatwrd != NULL);
    }
  }
}  /*writewords*/


/* Local variables for CONFLATE: */
struct LOC_CONFLATE {
  alpharray which;
  Char c;
  wordptr *here;
} ;

/*Return the threshold value for a pair of words.*/
Local double con_thres(token, pred, LINK)
Char *token, *pred;
struct LOC_CONFLATE *LINK;
{
  long a, b, m, i;
  Char STR1[256], STR2[256];

  a = strlen(token);
  b = strlen(pred);
  i = 0;
  do {   /*find the number of matching letters*/
    i++;
  } while (!strcmp((sprintf(STR1, "%.*s", (int)i, token), STR1),
		   (sprintf(STR2, "%.*s", (int)i, pred), STR2)) && i <= a &&
	   i <= b);
  m = i - 1;
  return (m * 2.0 / (a + b));
}  /*con_thres*/

/* Local variables for combine_words: */
struct LOC_combine_words {
  struct LOC_CONFLATE *LINK;
} ;

/*dispose of a record pointed to by thiswrd while maintaining list*/
Local Void del_thiswrd(LINK)
struct LOC_combine_words *LINK;
{
  prev->nextwrd = thiswrd->nextwrd;
  prev->count += thiswrd->count;
  LINK->LINK->here = prev;
  Free(thiswrd);
}

/*dispose of the record before the one pointed*/
/*to by thiswrd while maintaining list*/
Local Void del_prev(LINK)
struct LOC_combine_words *LINK;
{
  thiswrd->count += prev->count;
  /*get a pointer to the record before prev*/
  FINDCELL(LINK->LINK->which, prev->txt, &prev, &thiswrd, &found);
  /*get out your scorecard - the names of the players have changed*/
  /*thiswrd now points to the old prev, and prev points to the word*/
  /*before that, here is maintained at the old thiswrd*/
  if (found && prev != NULL)
    prev->nextwrd = LINK->LINK->here;
  else
    go_words[LINK->LINK->c - 'a'] = LINK->LINK->here;
  Free(thiswrd);
}

/*Combine words occording to i. minimum length
                           ii. maximum frequency
                          iii. alphabetically.*/
Local Void combine_words(LINK)
struct LOC_CONFLATE *LINK;
{
  struct LOC_combine_words V;
  long thislen, prevlen;

  V.LINK = LINK;
  thislen = strlen(thiswrd->txt);
  prevlen = strlen(prev->txt);
  if (thislen > prevlen) {   /* i. select on minimum length*/
    del_thiswrd(&V);
    return;
  }
  if (thislen < prevlen) {
    del_prev(&V);
    return;
  }
  if (thiswrd->count < prev->count) {   /* ii. select on max frequency*/
    del_thiswrd(&V);
    return;
  }
  if (thiswrd->count > prev->count) {
    del_prev(&V);
    return;
  }
  if (strcmp(thiswrd->txt, prev->txt) > 0)
    del_thiswrd(&V);
  else
    del_prev(&V);

  /* iii. select alphabetically*/
}  /*combine_words*/


/*Conflate words with the same stem*/
Static Void CONFLATE(which_, minthres)
wordptr **which_;
double minthres;
{
  struct LOC_CONFLATE V;
  double threshold;

  memcpy(V.which, which_, sizeof(alpharray));
  for (V.c = 'a'; V.c <= 'z'; V.c++) {   /*travel thru array of lists*/
    V.here = V.which[V.c - 'a'];   /*if a list of this first letter exists*/
    if (V.here != NULL) {   /*travel along list*/
      do {
	thiswrd = NULL;
	prev = NULL;
	FINDCELL(V.which, V.here->txt, &prev, &thiswrd, &found);
	    /*get pointers*/
	if (found && prev != NULL) {   /*don't work on root word*/
	  /*if words meet criteria combine them*/
	  threshold = con_thres(thiswrd->txt, prev->txt, &V);
	  if (threshold > minthres)
	    combine_words(&V);
	}
	V.here = V.here->nextwrd;
      } while (V.here != NULL);
    }
  }  /*for*/
}  /*conflate*/



main(argc, argv)
int argc;
Char *argv[];
{
  PASCAL_MAIN(argc, argv);
  outfile = NULL;
  infile = NULL;
  INITALIZE();   /*initalize variables and arrays*/
  strcpy(filename, "CON:");   /*open output file to console*/
  /*alternate output file OUT.TXT*/
/* p2c: bigpas.p, line 315: Warning:
 * Don't know how to ASSIGN to a non-explicit file variable [207] */
  assign(outfile, filename);
/* p2c: bigpas.p, line 316: Note: REWRITE does not specify a name [181] */
  if (outfile != NULL)
    rewind(outfile);
  else
    outfile = tmpfile();
  if (outfile == NULL)
    _EscIO(FileNotFound);
  strcpy(filename, "STOPWORD.TXT");   /*open file stopword.txt*/
/* p2c: bigpas.p, line 318: Warning:
 * Don't know how to ASSIGN to a non-explicit file variable [207] */
  assign(infile, filename);
  rewind(infile);
  *aword = '\0';
  READWORD(infile, aword);   /*read stop-words into array stop_words*/
  while (!P_eof(infile)) {
    INSERT(stop_words, aword);
    READWORD(infile, aword);
  }
  if (infile != NULL)
    fclose(infile);
  infile = NULL;   /*close file stopwords.txt*/
  strcpy(filename, "GOWORD.TXT");   /*open file goword.txt*/
/* p2c: bigpas.p, line 329: Warning:
 * Don't know how to ASSIGN to a non-explicit file variable [207] */
  assign(infile, filename);
  rewind(infile);
  *aword = '\0';
  READWORD(infile, aword);   /*read all words - if not in stop_words*/
  while (!P_eof(infile)) {   /*put it into array go_words*/
    found = false;
    FINDCELL(stop_words, aword, &prev, &thiswrd, &found);
    if (!found)
      INSERT(go_words, aword);
    READWORD(infile, aword);
  }
  if (infile != NULL)
    fclose(infile);
  infile = NULL;   /*close file goword.txt*/
  fprintf(outfile, "CS 310-Data Structures-Big Program #1-By Matt Martini\n");
  fprintf(outfile, "The interesting words:\n\n");
  WRITEWORDS(go_words);   /*type out the interesting words and*/
  /*their frequency counts*/
  fprintf(outfile, "\nThe conflated list: (with a threshold of 0.5)\n\n");
  CONFLATE(go_words, minthreshold);   /*conflate list*/
  WRITEWORDS(go_words);   /*output conflated list*/
  if (outfile != NULL)
    fclose(outfile);
  outfile = NULL;   /*close output file*/
  if (infile != NULL)
    fclose(infile);
  if (outfile != NULL)
    fclose(outfile);
  exit(EXIT_SUCCESS);
}





/* End. */
