/*

Description

A query is compared with a database. 

Some tuples in the database are returned as possible relevant records,
here called 'the resultsfile'.

A number of these tuples is selected as 'good' answers on that query,
the rest is considered 'bad'.

Input:  query, database
Output: adjusted query

==========================================

Usage: genetic [options] [-f set_of_examples] resultfile 

Every tuple of course is a document vector.

The set_of_examples is a list of integers that point to 
those documents in the database that act as 'good' examples.

Alternatively the last value in each document vector may act as
indicator of good or bad examples

*/

#include <math.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#include <unistd.h>
#include "utils.h"

#define BUF_SIZE 64000
#define VECTOR_SIZE 32000

typedef struct
   {
   char concept[WORD_LENGTH];
   float weight[VECTOR_SIZE];
   }word_vector;

char   regel[1024];
char   dnaam[WORD_LENGTH];
int    vector_length, last_score;
int    num_examples,examples[100];
float sum_rel,sum_irrel,alpha,beta,gamma;


/* ---------------------------------------------------------------- */
void help(void)
{
printf("Usage: genetic [options] [-f set_of_examples] resultfile\n");

printf("   -A : print command-line, preceeded by '#'\n");
printf("   -f file  : numbers of relevant records\n");
printf("Copyright Hans Paijmans 1995,1996\n");
printf("version 1.0,  sept. 1996\n");
exit(1);
} 


/* ---------------------------------------------------------------- */

/* ---------------------------------------------------------------- */
void get_arguments(int argc,char **argv)
{
char *s,o;
int x,n;
FILE *fuit;
extern int optind;
extern char *optarg;

num_examples=0;
alpha=beta=gamma=1;

if ((argc<2) || (!strcmp(argv[1],"-h"))) help();

while ((o=getopt(argc,argv,"Ahdv"))!=-1)
     switch (o)
       {
       case 'A': printf("# ");
                 for (n=0;n<argc;n++) printf("%s ",argv[n]);
                 printf("\n");
                 break;
       case 'd': debug=1; break;

       case 'f': if ((fuit=fopen(optarg,"r"))==NULL) 
                          error_exit("no judgements-file: ",optarg);
                 while (!feof(fuit)) 
                          fscanf(fuit,"%d ",&examples[num_examples++]);
                 fclose(fuit);
                 if (debug)
                      for (x=0;x<num_examples;x++) printf("%d ",examples[x]);
                 break;
       case 'h': help();break;
       case 'v': verbose=1; break;

       }
/* --- */

dnaam[0]=0;

if (optind<argc) strcpy(dnaam,argv[optind++]);

last_score=1;

if (debug) 
    {
    check_all();
    printf("last_score         %d\n",last_score);
    printf("dname              %s\n",dnaam);
    }
}
/* ---------------------------------------------------------------- */
word_vector *read_vector(FILE *f)
{
char *concept;
char buffer[BUF_SIZE];
int x;
word_vector *vec;

if (fgets(buffer,99,f)==NULL) return(NULL);

vec=(word_vector*)malloc(sizeof(word_vector));
if (vec==NULL) error_exit("Memory problem","");

x=0;
concept=strtok(buffer," \t");
strcpy(vec->concept,concept);
while (concept)
  {
  concept=strtok(NULL," \t");
  if (concept) vec->weight[x++]=atof(concept);
  }
vector_length=x;
return (vec);
}
/* ---------------------------------------------------------------- */
main(int argc,char **argv)
{
int x,y,rel,irrel,score;
int doc_aantal;
FILE *dfile;
word_vector *vec,*doc_vec[10000];

initialize();
get_arguments(argc,argv);

if ((dfile=fopen(dnaam,"r"))==NULL) 
                      error_exit ("Datafile not found: ",dnaam);

/* -------------------- read the documentvectors ---------------------- */
doc_aantal=1;
do
   {
   vec=read_vector(dfile);
   if (vec!=NULL) doc_vec[doc_aantal++]=vec;
   }
while (vec!=NULL);

if (debug) printf("num_examp  %d\ndoc_aantal %d\n",num_examples,doc_aantal);

/* ------------------- add the relevance judgements ------------------- */

if (num_examples)  /* als we een serie aparte judgements hebben */
   {               /* voegen we ze achter de records */
   for (y=1;y<doc_aantal;y++) doc_vec[y]->weight[vector_length]=0;
   for (y=0;y<num_examples;y++) doc_vec[examples[y]]->weight[vector_length]=1;
   } else vector_length--;

/* ------------------------------------------------------------------- */
/*       vector_length now always includes the relevance judgment      */

/* ------------------- debug information -------------------- */
if (verbose) 
  {

  for (y=1;y<doc_aantal;y++)
     {
     printf("%10s ",doc_vec[y]->concept);
     for (x=0;x<vector_length;x++) printf("%5.2f ",doc_vec[y]->weight[x]);
     if (doc_vec[y]->weight[vector_length]) printf("+"); else printf("-");
     printf("\n");
     }  
  }
/* ------------------------------------------------------------ */



printf("\n");
exit(0);
}
























