/* 
 * field_index.c -- 
 * ITIID           : $ITI$ $Header $__Header$
 * Author          : Huynh Quoc T. Tung
 * Created On      : Fri Mar  4 11:15:41 1994
 * Last Modified By: Huynh Quoc T. Tung
 * Last Modified On: Mon Jul 11 16:37:33 1994
 * Update Count    : 99
 * Status          : Unknown, Use with caution!
 */

/*
#include <stdio.h>
#include <string.h>
*/
#include <ctype.h>
#include "cdialect.h"
#include "futil.h"
#include "field_index.h"

#define MAX_FIELDS_NAMES 30
#define MAX_WORD_LENGTH 20
#define MAX_LINE_LENGTH 1000

#ifdef STEM_WORDS
boolean index_stemming = false; /* used in irtfiles.c */
#endif

static boolean not_field = false;   /* not_field = true,
                                if line is not to add in a field */
static boolean is_field = false;    /* is_field = true, 
                                if line is to add in a field */
static boolean local_not_indexed = false; /* line not indexed */

fields_struct** index_fields = NULL;

nidx_table* Ntable = NULL;     /* for lines which are not indexed 
                                  defined in field_y.c */
ltable* Ltable = NULL; /* for layout */

#ifdef SYSVREGEXP
/* --------------------- sgrep function --------------------------- */

#include "../regexp/regexp.h"

static int number_of_expbuf = 0;
regexp **expbuf_set;

/* ------------------------------------------------------------- */
/* compile regexp only one,
   regexp's compiled are in expbuf_set.
   return 0 if success,
   return -1 if error. */



long compile_regexp (number_of_regexp, regexp_set)
     long number_of_regexp;
     char** regexp_set;
{
  long i;

  number_of_expbuf = number_of_regexp;

  if(number_of_regexp == 0)
    return(0);
  expbuf_set = (regexp **)
    malloc(number_of_regexp*sizeof(regexp *));
  for(i=0; i< number_of_regexp; i++) {
    expbuf_set[i] = regcomp(regexp_set[i]);
    if (expbuf_set[i] == NULL) {
      return(-1);
    }
  }
  return(0);
}
/* ------------------------------------------------------------- */

static char* sgrep _AP((char* s, regexp* expbuf,  long* begin_pos, long* end_pos));

static char* sgrep(s, expbuf, begin_pos, end_pos)
     char* s;
     regexp* expbuf;
     long* begin_pos;
     long* end_pos;
{
  if (regexec(expbuf, s)) { /* match */
    if(begin_pos != NULL)
      *begin_pos = expbuf->startp[0] - s;
    if(end_pos != NULL)
      *end_pos = expbuf->endp[0] - s;
    return(expbuf->startp[0]);
  }
  return(NULL);
}
/* ------------------------------------------------------------- */
/* matching line with regexp.
   regexp_pos is the position of compiled regexp.
 */
char* match(line, begin_pos, end_pos, regexp_pos)
     char* line;
     long* begin_pos;
     long* end_pos;
     long regexp_pos;
{
  return(sgrep(line, expbuf_set[regexp_pos], 
               begin_pos, end_pos));
}
#else
/* --------------------- sgrep function --------------------------- */

#define INIT register char *regexp=instring;
#define GETC() (*regexp++)
#define PEEKC() (*regexp)
#define UNGETC(c) (regexp--)
#define RETURN(pointer) return(0);          /* return 0 on sucess */
#define ERROR(val) return((char *) val);    /* return error code on failure */
static getrnge _AP((char *string));
#include <regexp.h>

char** expbuf_set = NULL;

/* ------------------------------------------------------------- */
/* compile regexp only one,
   regexp's compiled are in expbuf_set.
   return 0 if success,
   return -1 if error. */

static int* circf_set = NULL;
static int number_of_expbuf = 0;

long compile_regexp(number_of_regexp, regexp_set)
     long number_of_regexp;
     char** regexp_set;
{
  long i;
  int  status;
  char eof;
  char *c;
  char *endbuf;

  number_of_expbuf = number_of_regexp;

  if(number_of_regexp == 0)
    return(0);
  
  if(circf_set == NULL)
    circf_set = (int*)s_malloc((size_t)(sizeof(int) * number_of_regexp));
  expbuf_set = 
    (char**)s_malloc((size_t)(sizeof(char*) * number_of_regexp));
  for(i=0; i< number_of_regexp; i++) {
    expbuf_set[i] = (char*)s_malloc((size_t)(sizeof(char) * 1024));
    endbuf = (expbuf_set[i])+1024; /* end of buffer */
    status = (int) compile(regexp_set[i], expbuf_set[i], endbuf, eof='\0');
    circf_set[i] = circf;
    if(status)
      return(-1);
  }
  return(0);
}
/* ------------------------------------------------------------- */

static char* sgrep _AP((char* s, char* expbuf, int lcircf, long* begin_pos, long* end_pos));
static char* sgrep(s, expbuf, lcircf, begin_pos, end_pos)
     char* s;
     char* expbuf;
     int lcircf;
     long* begin_pos;
     long* end_pos;
{
  char *c;

  circf = lcircf;
  if (step(s, expbuf)) { /* match */
    if(begin_pos != NULL)
      *begin_pos = loc1 - s;
    if(end_pos != NULL)
      *end_pos = loc2 - s;
    return(loc1);
  }
  return(NULL);
}
/* ------------------------------------------------------------- */
/* matching line with regexp.
   regexp_pos is the position of compiled regexp.
 */
char* match(line, begin_pos, end_pos, regexp_pos)
     char* line;
     long* begin_pos;
     long* end_pos;
     long regexp_pos;
{
  return(sgrep(line, expbuf_set[regexp_pos], circf_set[regexp_pos],
               begin_pos, end_pos));
}
#endif
/* ------------------------------------------------------------- */

static void clear_expbuf _AP((void));
static void clear_expbuf()
{
  long i;
  for(i=0; i< number_of_expbuf; i++) 
    s_free(/* (char *) */(expbuf_set[i]));
  s_free(expbuf_set);
}

/* ------------------------------------------------------------- */
static void clear_ntable _AP((void));
static void clear_ntable()
{
  long i;
  
  if(Ntable != NULL) {
    s_free(Ntable->begin_tag_pos);
    s_free(Ntable->end_tag_pos);
    s_free(Ntable);
  }
}
/* ------------------------------------------------------------- */

static void clear_fields_Ftable _AP((database* db));
static void clear_fields_Ftable(db)
     database* db;
{
  long i, j, k;

  if(index_fields != NULL) {
    for(i=0; i<db->number_of_fields; i++) {
      j = index_fields[i]->number_of_Ftable;
      while(j > 0) {
        s_free(index_fields[i]->Ftable[j-1].index_kind);
        for(k=0; k < NUMBER_OF_INDEXTYPES; k++) {
          if(index_fields[i]->Ftable[j-1].indextypes[k] == NULL)
            break;
          else s_free(index_fields[i]->Ftable[j-1].indextypes[k]);
        }
        s_free(index_fields[i]->Ftable[j-1].indextypes);
        --j;
      }
      s_free(index_fields[i]->Ftable);
      if (index_fields[i]->field_name) s_free(index_fields[i]->field_name);
      s_free(index_fields[i]);
    }
    s_free(index_fields);
  }
  db->index_fields = NULL;
  return;                       /* return(0) (up) */
}
/* ------------------------------------------------------------- */

static void clear_Ltable _AP((void));
static void clear_Ltable()
{
  long i;
  
  if(Ltable != NULL) {
    s_free(Ltable->layout);
    if(Ltable->date_desc != NULL)
      s_free(Ltable->date_desc);
    if(Ltable->sscanf_arg != NULL)
      s_free(Ltable->sscanf_arg);
    s_free(Ltable);
  }
  return;                    /* return(0); (up) */
}

/* ------------------------------------------------------------- */

void clear_fields(db)
     database* db;
{
  clear_fields_Ftable(db);
  clear_Ftable_pos();
  clear_ntable();
  clear_Ltable();
  clear_expbuf();
  return;                    /* return(0); (up) */
}

/* ------------------------------------------------------------- */
static index_fields_struct* index_fields_array;
long Maxfield_names = 0;
long Nfield_names = 0;

static void clear_index_fields_array _AP((long number_of_elements));
static void clear_index_fields_array(number_of_elements)
     long number_of_elements;
{
  long i;
  
  if(index_fields_array != NULL) {
    for(i=0; i<number_of_elements; i++) {
      s_free(index_fields_array->field_names[i]);
    }
    s_free(index_fields_array->numeric);
    /* should'nt we free this too ?(up) */
    s_free(index_fields_array->field_names); 
    s_free(index_fields_array);
  }
  Maxfield_names = 0;
  Nfield_names = 0;
}
/* ------------------------------------------------------------- */

  
/* store the name and field_id of fields to generate */

long save_index_fields(field_name, number_of_elements, field_id)
     char* field_name;
     long* number_of_elements;
     long *field_id;
{
  long i = 0;
  long len;
  
  if(index_fields_array != NULL) {
    for(i=0; i<*number_of_elements; i++) {
      if(field_name == NULL) {
        if(index_fields_array->field_names[i] == NULL) {
          *field_id = i;
          return(0);
        }
      }
      else {
        if(index_fields_array->field_names[i] != NULL) 
          if(!strcmp(field_name, index_fields_array->field_names[i])) {
            *field_id = i;
            return(0);
          }
      }
    }
    if(Nfield_names >= Maxfield_names) {
      Maxfield_names += MAX_FIELDS_NAMES;
      index_fields_array->field_names = 
        (char**)s_realloc(index_fields_array->field_names, 
                          (size_t)(sizeof(char*) * Maxfield_names));
      index_fields_array->numeric =
        (boolean*)s_realloc(index_fields_array->numeric,
                            (size_t)(sizeof(boolean) * Maxfield_names));
    }
    ++Nfield_names;
    *field_id = i;
    *number_of_elements += 1;
    index_fields_array->numeric[i] = false;
    if(field_name != NULL) {
      len = strlen(field_name);
      index_fields_array->field_names[i] = 
        (char*)s_malloc((size_t)(sizeof(char) * (len + 2)));
      s_strncpy(index_fields_array->field_names[i], field_name, len + 1);
    }
    else index_fields_array->field_names[i] = NULL;
  }
  else {
    index_fields_array = (index_fields_struct*)
      s_malloc((size_t)(sizeof(index_fields_struct)));
    Maxfield_names = Nfield_names + MAX_FIELDS_NAMES;
    index_fields_array->field_names = /* nobody frees this? (up) */
      (char**)s_malloc((size_t)(sizeof(char*) * Maxfield_names));
    index_fields_array->numeric =
      (boolean*)s_malloc((size_t)(sizeof(boolean) * Maxfield_names));
    if(field_name != NULL) {
      len = strlen(field_name);
      index_fields_array->field_names[i] = 
        (char*)s_malloc((size_t)(sizeof(char) * (len + 2)));
      s_strncpy(index_fields_array->field_names[i],field_name, len + 1);
    }
    else index_fields_array->field_names[i] = NULL;
    index_fields_array->numeric[i] = false;
    ++Nfield_names;
    *field_id = 0;
    *number_of_elements += 1;
  }
  return(1);
}
/* ------------------------------------------------------------- */

void save_numeric_index_fields(field_id)
     long field_id;
{
  index_fields_array->numeric[field_id] = true;
}
/* ------------------------------------------------------------- */

static long* Ftable_pos = NULL;

void clear_Ftable_pos()
{
  if(Ftable_pos != NULL)
    s_free(Ftable_pos);
}


/* ------------------------------------------------------------- */
/* checking how a line should be indexed.
 */

long how_index_line(field_id, line, 
                    number_of_not_ended_section,
                    document_id,
                    weight,
                    file_position_before_line,
                    line_length,
                    newline_terminated,
                    db,
                    wordfunction1,
                    wordfunction2,
                    word_position, word_pairs,
                    minwordlen, type)	
     long field_id;
     char* line;
     long* number_of_not_ended_section;
     long document_id;
     long weight;
     long file_position_before_line;
     long *line_length;
     boolean *newline_terminated;
     database* db;
     wordfunc* wordfunction1;
     wordfunc* wordfunction2;
     boolean word_position, word_pairs;
     int minwordlen;
     char* type;
{
  long i, fpos, tmplen;
  long char_count;
  long lnumber_of_not_ended_section;
  long begin_pos; /* This is  a  pointer  to the 
		   * first character that matched 
		   * the regular expression.
		   */
  long end_pos;   /* This is  a  pointer  to the
		   * to the character after  the  
		   * last  character that matches 
		   * the regular expression.
		   */
  long index_pos, n_index_pos;
  long rest;
  char* matchline = NULL;
  char tmpline[MAX_LINE_LENGTH];
  char indexline[MAX_LINE_LENGTH];

  s_strncpy(tmpline, line, MAX_LINE_LENGTH);
  tmplen = strlen(tmpline);
 /* if(tmpline[tmplen - 1] == '\n')
    tmpline[tmplen - 1] = '\0'; */
  s_strncpy(indexline, line, MAX_LINE_LENGTH);

  /* if the end_marke of previous line not found then
     continues to search for the end_marke.
   */
  if(*number_of_not_ended_section > 0) {
    lnumber_of_not_ended_section = *number_of_not_ended_section;
    for(i=0; i<*number_of_not_ended_section; i++) {
      if(Ftable_pos[i] > -1) { 
        fpos = Ftable_pos[i];
        if(match(tmpline,&end_pos,NULL,
                 db->index_fields[field_id]->Ftable[fpos].end_tag_pos)) {
          if(end_pos > 0) /* copy n-character until end_pos */
            s_strncpy(indexline, tmpline, end_pos + 1);
          else indexline[0] = '\0'; /* if the end_marke at begin of line then not index this line */
          Ftable_pos[i] = -1;
          --lnumber_of_not_ended_section;
        }
        /* end_marke not yet found */
        else s_strncpy(indexline, tmpline, MAX_LINE_LENGTH);
      }
      else --lnumber_of_not_ended_section;
      if(indexline[0] != '\0') {
        if(-1 == index_line_section(field_id, fpos,
                                    indexline,
                                    document_id,
                                    weight,
                                    file_position_before_line,
                                    line_length,
                                    newline_terminated,
                                    db,
                                    wordfunction1,
                                    wordfunction2,
                                    word_position, word_pairs,
                                    minwordlen, type))
          waislog(WLOG_HIGH, WLOG_ERROR, "map_over_words failed");
      }
    }
    if(lnumber_of_not_ended_section == 0) 
      *number_of_not_ended_section = 0;
  }
  
  for(i=0; i < db->index_fields[field_id]->number_of_Ftable; i++) { /* check for field_name */
    if((matchline = s_strdup(match(tmpline, &begin_pos, &end_pos, 
                                   db->index_fields[field_id]->Ftable[i].begin_tag_pos)))) { 
      /* match field */
      for(char_count=0; char_count<begin_pos; char_count++) 
        indexline[char_count] = ' ';
      s_strncpy(&indexline[begin_pos], matchline, MAX_LINE_LENGTH - begin_pos);

      /* the index position is giving. 
         The line should be indexed exactly at index_pos.
       */
      if(db->index_fields[field_id]->Ftable[i].index_pos > -1) {
        if(matchline != NULL) s_free(matchline);
        if(match(tmpline, NULL, &index_pos,
                 db->index_fields[field_id]->Ftable[i].index_pos)) {
          for(char_count=0; char_count<index_pos - 1; char_count++) 
            indexline[char_count] = ' ';
          if(match(&tmpline[index_pos-1], &end_pos, NULL, db->index_fields[field_id]->Ftable[i].end_tag_pos)) 
            s_strncpy(&indexline[index_pos-1], &tmpline[index_pos-1],end_pos + 1);
          else {
            s_strncpy(&indexline[index_pos-1], &tmpline[index_pos-1], MAX_LINE_LENGTH - index_pos - 1);
            if(Ftable_pos == NULL)
              Ftable_pos = (long*)s_malloc((size_t)sizeof(long) * 10);
            else {
              if(*number_of_not_ended_section >= 10) {
                Ftable_pos = (long*) s_realloc(Ftable_pos, 
                                               (size_t)sizeof(long) * (*number_of_not_ended_section + 10));
              }
            }
            Ftable_pos[*number_of_not_ended_section] = i;
            ++(*number_of_not_ended_section);
          }
        }
      }
      else {
        for(char_count=0; char_count<begin_pos; char_count++) 
          indexline[char_count] = ' ';
	rest = end_pos - begin_pos;
        if(match(&tmpline[end_pos], &end_pos, NULL, db->index_fields[field_id]->Ftable[i].end_tag_pos))
          s_strncpy(&indexline[begin_pos], matchline, end_pos + rest + 1);
        else {
          if(Ftable_pos == NULL)
            Ftable_pos = (long*)s_malloc((size_t)sizeof(long) * 10);
          else {
            if(*number_of_not_ended_section >= 10) {
              Ftable_pos = (long*) s_realloc(Ftable_pos, 
                                             (size_t)sizeof(long) * (*number_of_not_ended_section + 10));
            }
          }
          Ftable_pos[*number_of_not_ended_section] = i;
          ++(*number_of_not_ended_section);
        }
      }

      /* It is a numeric field. */

      if(db->index_fields[field_id]->numeric) {
        
        if(db->index_fields[field_id]->n_index_pos > -1) {

          /* Indexing line at n_index_pos, 
             or at index_pos or at beginning of line 
           */
          if(match(tmpline, NULL, &n_index_pos,
                   db->index_fields[field_id]->n_index_pos)) {
            for(char_count=0; char_count<n_index_pos - 1; char_count++) 
              indexline[char_count] = ' ';
	    if(db->index_fields[field_id]->numeric_len > 0) {
	      s_strncpy(&indexline[n_index_pos-1], &tmpline[n_index_pos-1], 
			db->index_fields[field_id]->numeric_len + 1);
	    } else {
	      if(end_pos > 0) {
		s_strncpy(&indexline[n_index_pos-1], &tmpline[n_index_pos-1],end_pos + 1);
	      } else {
		s_strncpy(&indexline[n_index_pos-1], &tmpline[n_index_pos-1],
			  MAX_LINE_LENGTH - n_index_pos);
	      }
	    }
          }
        }
	else {
	  if(db->index_fields[field_id]->numeric_len > 0) {
	    indexline[db->index_fields[field_id]->numeric_len] = '\0';
	  }
	}
      }
      
      if(-1 == index_line_section(field_id, i,
                                  indexline,
                                  document_id,
                                  weight,
                                  file_position_before_line,
                                  line_length,
                                  newline_terminated,
                                  db,
                                  wordfunction1,
                                  wordfunction2,
                                  word_position, word_pairs,
                                  minwordlen, type))
        waislog(WLOG_HIGH, WLOG_ERROR, "map_over_words failed");
    
      if(matchline != NULL) s_free(matchline);
    }
  }
  if(matchline != NULL) s_free(matchline);
  return(0);
}
/* ------------------------------------------------------------- */

/* line: it is a line that is not to index.
   line_length: number of character of line.
   newline_terminated = true if the line terminates with a newline,
   else newline_terminated = false.
*/

long count_words(line, line_length, newline_terminated)
     char* line;
     long* line_length;
     boolean* newline_terminated;
{
  unsigned long ch;
  long char_count = 0;
  long word_len = 0;
  long word_count = 0;
  
  for(ch = (unsigned char)line[char_count++]; 
      ch != '\0'; ch = (unsigned char)line[char_count++]){
   
    boolean alnum = isalnum(ch);
    
    if(alnum) 
      word_len++;
    else {
      if(word_len != 0)
        if(word_len >= 2)
          word_count++;
      word_len = 0;
    }
  }
  if(newline_terminated != NULL){
    if('\n' != line[char_count-2])
      *newline_terminated = false;
    else
      *newline_terminated = true;
  }
  if(line_length != NULL)
    *line_length = char_count - 1;
  return(word_count);
}
/* ------------------------------------------------------------- */

/* The line should be indexed as in the field_description file */

long index_line_section(field_id, fpos,
                        line,
                        document_id,
                        weight,
                        file_position_before_line,
                        line_length,
                        newline_terminated,
                           db,
                        wordfunction1,
                        wordfunction2,
                        word_position, word_pairs,
                        minwordlen, type)	
     long field_id;
     long fpos;
     char* line;
     long document_id;
     long weight;
     long file_position_before_line;
     long *line_length;
     boolean *newline_terminated;
     database* db;
     wordfunc* wordfunction1;
     wordfunc* wordfunction2;
     boolean word_position, word_pairs;
     int minwordlen;
     char* type;
{
  long i = 0;
  long number_of_words = 0;
  
  char** indextypes = db->index_fields[field_id]->Ftable[fpos].indextypes;
  int*  index_kind = db->index_fields[field_id]->Ftable[fpos].index_kind;

  while(indextypes[i]) {
    if(!strncmp(indextypes[i],TEXT,strlen(TEXT)))
      type = NULL;
    else {
      type = indextypes[i];
    }
    if(index_kind[i] == I_GLOBAL) {
      not_field = true;
      is_field = false;
    }
    else {
      not_field = false;
      is_field = true;
    }
      
    if(not_field) {
#ifdef STEM_WORDS
      if(db->stemming)
	index_stemming = true;
      else index_stemming = false;
#endif
      number_of_words = map_over_words(line, document_id, 
                                       weight, 
                                       file_position_before_line,
                                       line_length, 
                                       newline_terminated,
                                       db,	
                                       wordfunction1,
                                       word_position, word_pairs,
#ifdef SOUND
                                       minwordlen, type);
#else
                                       minwordlen);
#endif
#ifdef STEM_WORDS
    index_stemming = false;
#endif
    }
    else { /* is_field */
#ifdef STEM_WORDS
      if(db->fields[field_id].stemming)
	index_stemming = true;      /* used in map_over_words of irtfiles.c */
      else index_stemming = false;
#endif
      number_of_words = map_over_words(line, document_id, 
                                       weight, 
                                       file_position_before_line,
                                       line_length, 
                                       newline_terminated,
                                       db,	
                                       wordfunction2,
                                       word_position, word_pairs,
#ifdef SOUND
                                       minwordlen, type);
#else
                                       minwordlen);
#endif
#ifdef STEM_WORDS
    index_stemming = false;
#endif
    }
    ++i;
  }
  if(is_field)
    db->fields[field_id].total_word_count += number_of_words;
  return(number_of_words);
}
/* ------------------------------------------------------------- */

/* insert all fields to create */

boolean init_index_fields(adding_to_existing_index, create_new_fields, 
                          field_adding_to_existing_index, number_of_fields, db)
     boolean* adding_to_existing_index;
     boolean* create_new_fields;
     boolean field_adding_to_existing_index;
     long number_of_fields;
     database* db;
{
  long i, j;
  long fno_len, fna_len, em_len, idx_len, nidx_flen, nidx_elen;
  long number_of_fields_not_exists = 0;
  char file[1001];
  char* system_call;
  char* field_name;
  
  if(number_of_fields == 0)
    return(0);

  db->fields = 
    (field_db*)s_malloc((size_t)(sizeof(field_db) * number_of_fields));
  if(db->field_index_streams == NULL)
    db->field_index_streams = (FILE**)s_malloc((size_t)sizeof(FILE*));
  if(db->field_dictionary_streams == NULL)
    db->field_dictionary_streams = (FILE**)s_malloc((size_t)sizeof(FILE*));

  if(*create_new_fields && !field_adding_to_existing_index) {
    field_name = (char*)s_malloc((size_t)(sizeof(char) * (MAX_FILENAME_LEN + 1)));
    db->number_of_fields = 0;
  }
  
  for(i= 0; i< number_of_fields; i++) {
    /* only fields which not exist will be created */
    if(*create_new_fields && !field_adding_to_existing_index) {
      if(index_fields_array->field_names[i] != NULL) {
        s_strncpy(field_name, db->database_file, MAX_FILENAME_LEN);
        s_strncat(field_name,field_ext,MAX_FILENAME_LEN,MAX_FILENAME_LEN);
        s_strncat(field_name,index_fields_array->field_names[i],
                  MAX_FILENAME_LEN,MAX_FILENAME_LEN);
        s_strncat(field_name,dictionary_ext,MAX_FILENAME_LEN,MAX_FILENAME_LEN);
        if(!probe_file(field_name)) {
          db->fields[number_of_fields_not_exists].field_id = i;
          db->fields[number_of_fields_not_exists].index_file_number = 0;
          db->fields[number_of_fields_not_exists].total_word_count = 0;
          db->fields[number_of_fields_not_exists].numeric = index_fields_array->numeric[i];
	  db->fields[number_of_fields_not_exists].stemming = index_fields[i]->stemming;
          fna_len = strlen(index_fields_array->field_names[i]);
          db->fields[number_of_fields_not_exists].field_name = 
            (char*)s_malloc((size_t)(sizeof(char) * (fna_len + 2)));
          s_strncpy(db->fields[number_of_fields_not_exists].field_name, 
                    index_fields_array->field_names[i], fna_len + 1);
          ++number_of_fields_not_exists;
        }
      }
      db->number_of_fields += number_of_fields_not_exists;
    }
    else { /* insert all field names in database */
      db->fields[i].field_id = i;
      db->fields[i].index_file_number = 0;
      db->fields[i].total_word_count = 0;
      if(index_fields_array->field_names[i] != NULL) {
        fna_len = strlen(index_fields_array->field_names[i]);
        db->fields[i].field_name = 
          (char*)s_malloc((size_t)(sizeof(char) * (fna_len + 2)));
        s_strncpy(db->fields[i].field_name,index_fields_array->field_names[i], 
                  fna_len + 1);
      }
      else db->fields[i].field_name = NULL;
      db->fields[i].numeric = index_fields_array->numeric[i];
      db->fields[i].stemming = index_fields[i]->stemming;
    }
  }
  clear_index_fields_array(number_of_fields);
  
  /* delete all fields exist */
  
  if(!*adding_to_existing_index) {
    if(!*create_new_fields) {
      system_call = (char*)s_malloc((size_t)(sizeof(char) * (1000 + 3)));
      strncpy(system_call, "rm ", MAX_FILENAME_LEN + 3);
      s_strncat(system_call, db->database_file,
                MAX_FILENAME_LEN, MAX_FILENAME_LEN);
      s_strncat(system_call, field_ext, MAX_FILENAME_LEN, MAX_FILENAME_LEN);
      s_strncat(system_call, "*", MAX_FILENAME_LEN, MAX_FILENAME_LEN);
      s_strncat(system_call, dictionary_ext, 
                MAX_FILENAME_LEN, MAX_FILENAME_LEN);
      system(system_call);
      s_free(system_call);
    }
  }
  /* insert only new fields, old fields not deleted and
   * adding new words in global dictionary
   */
  if(*create_new_fields && field_adding_to_existing_index) {
    *create_new_fields = false;
  }
  /* insert only new fields, old fields not deleted and 
   * not updates global fields.
   */
  else if(*create_new_fields && !field_adding_to_existing_index) {
    db->doc_table_allocated_entries = 1;
    s_free(field_name);
  }
  return(0);
}

/* ------------------------------------------------------------- */
/* open stream for each field.
 * return 0, success.
 * return 1, error
 */

boolean open_field_streams_init(initialize, field_adding_to_existing_index, field_id, db)
     boolean initialize;
     boolean field_adding_to_existing_index;
     long field_id;
     database* db;
{
  char file[1001];
  
  if(db->field_index_streams == NULL)
    db->field_index_streams = (FILE**)s_malloc((size_t)sizeof(FILE*));
  if(db->field_dictionary_streams == NULL)
    db->field_dictionary_streams = (FILE**)s_malloc((size_t)sizeof(FILE*));

  /* ext_open_database for fields */
  if(initialize) {
    *(db->field_index_streams) = NULL;
    *(db->field_index_streams) = 
      s_fopen(field_index_filename(file, db->fields[field_id].field_name, db), 
              "w+b");
    if(*(db->field_index_streams) == NULL) {
      waislog(WLOG_HIGH, WLOG_ERROR,
              "2can't open the inverted index file of field %s\n", 
              db->fields[field_id].field_name);
      disposeDatabase(db);
      return(1);
    }
  }
  else {
    *(db->field_dictionary_streams) = 
      s_fopen(field_dictionary_filename(file, 
                                        db->fields[field_id].field_name, db), 
              "r+b");
    if (*(db->field_dictionary_streams) == NULL){ 
      if(!field_adding_to_existing_index) {
        waislog(WLOG_HIGH, WLOG_ERROR,
                "can't open the word hash file %s\n",file); 
        disposeDatabase(db);
        return(1);
      }
    }
    *(db->field_index_streams) = 
      s_fopen(field_index_filename(file, db->fields[field_id].field_name, db), 
              "r+b");
    if(*(db->field_index_streams) == NULL) {
      if(field_adding_to_existing_index) {
        *(db->field_index_streams) = 
          s_fopen(field_index_filename(file,db->fields[field_id].field_name,db), 
                  "w+b");
        if(*(db->field_index_streams) == NULL) {
          waislog(WLOG_HIGH, WLOG_ERROR,
                  "2can't open the inverted index file of field %s\n", 
                  db->fields[field_id].field_name);
          disposeDatabase(db);
          return(1);
        }
      }
      else {
        waislog(WLOG_HIGH, WLOG_ERROR,
                "2can't open the inverted index file of field %s\n", 
                db->fields[field_id].field_name);
        disposeDatabase(db);
        return(1);
      }
    }
  }
  return(0);
}

boolean open_global_Database(initialize, db)
     boolean initialize;
     database* db;
{
  char file[MAX_FILE_NAME_LEN + 1 ];
  
  if(initialize == false) {
    db->dictionary_stream = s_fopen(dictionary_filename(file, db), "r+b");
    if (db->dictionary_stream == NULL) { 
      waislog(WLOG_HIGH,WLOG_ERROR,"can't open the word hash file %s\n",file); 
      disposeDatabase(db);
      return(false);
    }
  }
  return(ext_open_database(db, initialize, false));
}
