/* 
 * boolean_op.c -- 
 * SCCS Status     : %W%	%G%
 * Author          : Huynh Quoc T. Tung
 * Created On      : Sun Oct 17 16:40:46 1993
 * Last Modified By: Huynh Quoc T. Tung
 * Last Modified On: Mon May 30 14:28:41 1994
 * Update Count    : 27
 * Status          : Unknown, Use with caution!
 */

/*********************   PROCEDURE DESCRIPTION   ************************
 * Data structure used is the pushdown stack.
 * Basic operations:
 * push(v)
 * long v;
 *
 * push an item onto the stack (insert it at the beginning)
 *
 * pop() : pop an item (remove it from the beginning)
 * 
 * void Union(result, set1, set2)
 * search_result_struct *result;
 * search_result_struct *set1;
 * search_result_struct *set2;
 *
 * union two sets and write it into result.
 *
 * void Intersection(result, set1, set2)
 * search_result_struct * result;
 * search_result_struct * set1;
 * search_result_struct * set2;
 * 
 * intersection two sets and write it into result.
 *
 * void Difference(result, set1, set2)
 * search_result_struct *result;
 * search_result_struct *set1;
 * search_result_struct *set2;
 *
 * elements of set1 which are common to elements of set2 will be ignored.
 *
 */

/*
#include <stdio.h>
#include <string.h>
*/
#include "cdialect.h"
#include "irfiles.h"
#include "boolean_op.h"
#ifdef NEW_WEIGHT /* tung , 5/94 */
#include "irtfiles.h"
#endif

struct node
{ 
  long key; 
  struct node *next;
};
struct node *head, *z, *t;

search_result_struct *end_result = NULL;

boolean IsOperator(op)
     char *op;
{
  if(!strcmp(op,"and") ||
     !strcmp(op,"or") ||
     !strcmp(op,"not"))
    return(true);
  else return(false);
}

void stackinit()
{
  head = (struct node *) malloc(sizeof (* head));
  z = (struct node *) malloc(sizeof(* z));
  head->next = z; 
  head->key = 0;
  z->next = z;
}

void push(v)
     long v;
{
  t = (struct node *) malloc(sizeof(* t));
  t->key = v;
  t->next = head->next;
  head->next = t;
}

long pop()
{
  long x;
  
  t = head->next;
  head->next = t->next;
  x = t->key;
  free(t);
  return x;
}

int stackempty()
{
  return head->next == z;
}

void Union(result, set1, set2)
     search_result_struct *result;
     search_result_struct *set1;
     search_result_struct *set2;
{
  while((set1->number_of_hits != 0) &&
        (set2->number_of_hits != 0)) {
    if(set1->doc_ids_array->doc_id < set2->doc_ids_array->doc_id) {
      result->doc_ids_array->doc_id = set1->doc_ids_array->doc_id;
      result->doc_ids_array->score = set1->doc_ids_array->score;
      ++result->number_of_hits;
      ++result->doc_ids_array ;
      --set1->number_of_hits;
      ++set1->doc_ids_array; 
    }
    else if(set1->doc_ids_array->doc_id > set2->doc_ids_array->doc_id) {
      result->doc_ids_array->doc_id = set2->doc_ids_array->doc_id;
      result->doc_ids_array->score = set2->doc_ids_array->score;
      ++result->number_of_hits;
      ++result->doc_ids_array ;
      --set2->number_of_hits;
      ++set2->doc_ids_array;
    }
    else { /* doc_id1 == doc_id2  */
      result->doc_ids_array->doc_id = set1->doc_ids_array->doc_id;
      result->doc_ids_array->score = set1->doc_ids_array->score + set2->doc_ids_array->score;
      ++result->number_of_hits;
      ++result->doc_ids_array ;
      --set1->number_of_hits; --set2->number_of_hits;
      ++set1->doc_ids_array; ++set2->doc_ids_array;
    }
  }
  if((set1->number_of_hits == 0) &&
     (set2->number_of_hits != 0)) {
    memcpy((char *)result->doc_ids_array, 
           (char *)set2->doc_ids_array, 
           set2->number_of_hits * sizeof(doc_descr_struct));
    set2->doc_ids_array += set2->number_of_hits;
    result->doc_ids_array += set2->number_of_hits;
    result->number_of_hits += set2->number_of_hits;
    set2->number_of_hits = 0;
  }
  else if((set1->number_of_hits != 0) &&
          (set2->number_of_hits == 0)) {
    memcpy((char *)result->doc_ids_array, 
           (char *)set1->doc_ids_array, 
           set1->number_of_hits * sizeof(doc_descr_struct));
    set1->doc_ids_array += set1->number_of_hits;
    result->doc_ids_array += set1->number_of_hits;
    result->number_of_hits += set1->number_of_hits;
    set1->number_of_hits = 0;
  }
}

void Or_Operator(op1, op2)
     search_result_struct* op1;
     search_result_struct* op2;
{
  long doc_ids_array_size = sizeof(doc_descr_struct);
  long op1_number_of_hits, op2_number_of_hits;
  
  op1_number_of_hits = op1->number_of_hits;
  op2_number_of_hits = op2->number_of_hits;
  
  if(op1->number_of_hits == 0) {
    if(op1->doc_ids_array != NULL)
      s_free(op1->doc_ids_array);
    if(op2->number_of_hits > 0) {
      memcpy((char *)end_result->doc_ids_array, 
             (char *)op2->doc_ids_array, 
             doc_ids_array_size * op2->number_of_hits);
    }
    end_result->number_of_hits = op2->number_of_hits;
    push(op2->operand_id);
  }
  else if(op2->number_of_hits == 0) {
    if(op2->doc_ids_array != NULL)
      s_free(op2->doc_ids_array);
    if(op1->number_of_hits > 0) {
      memcpy((char *)end_result->doc_ids_array, 
             (char *)op1->doc_ids_array, 
             doc_ids_array_size * op1->number_of_hits);
    }
    end_result->number_of_hits = op1->number_of_hits;
    push(op1->operand_id);
  } 
  else if((op1->number_of_hits != 0) &&
          (op2->number_of_hits != 0)) { 
    end_result->number_of_hits = 0;
    
    Union(end_result, op1, op2);
    
    op1->doc_ids_array -= op1_number_of_hits - op1->number_of_hits;
    op2->doc_ids_array -= op2_number_of_hits - op2->number_of_hits;
    end_result->doc_ids_array -= end_result->number_of_hits;
    op2->number_of_hits = end_result->number_of_hits;
    s_free(op1->doc_ids_array);
    if(end_result->number_of_hits > 0) {
      memcpy((char *)op2->doc_ids_array, 
             (char *)end_result->doc_ids_array, 
             end_result->number_of_hits * doc_ids_array_size);
    }
    else {
      op2->number_of_hits = end_result->number_of_hits;
      s_free(op2->doc_ids_array);
    }
    push(op2->operand_id);
  }
}

void Intersection(result, set1, set2)
     search_result_struct * result;
     search_result_struct * set1;
     search_result_struct * set2;
{
  while((set1->number_of_hits != 0) &&
        (set2->number_of_hits != 0)) {
    if(set1->doc_ids_array->doc_id == set2->doc_ids_array->doc_id) {
      result->doc_ids_array->doc_id = set1->doc_ids_array->doc_id;
#ifdef NEW_WEIGHT /* tung , 5/94 */
      result->doc_ids_array->score = MINIMUM(set1->doc_ids_array->score, set2->doc_ids_array->score);
#else
      result->doc_ids_array->score = set1->doc_ids_array->score + set2->doc_ids_array->score;
#endif
      ++result->number_of_hits;
      ++result->doc_ids_array ;
      --set1->number_of_hits; --set2->number_of_hits;
      ++set1->doc_ids_array ; ++set2->doc_ids_array ;
    }
    else if(set1->doc_ids_array->doc_id < set2->doc_ids_array->doc_id) {
      --set1->number_of_hits;
      ++set1->doc_ids_array ;
    }
    else /* doc_id1 > doc_id2 */
      {
        --set2->number_of_hits;
        ++set2->doc_ids_array ;
      }
  }
}

void And_Operator(op1, op2)
     search_result_struct* op1;
     search_result_struct* op2;
{
  long doc_ids_array_size = sizeof(doc_descr_struct);
  long op1_number_of_hits, op2_number_of_hits;
  
  op1_number_of_hits = op1->number_of_hits;
  op2_number_of_hits = op2->number_of_hits;
  
  if(op1->number_of_hits == 0) {
    end_result->number_of_hits = 0;
    if(op1->doc_ids_array != NULL)
      s_free(op1->doc_ids_array);
    if(op2->doc_ids_array != NULL)
      s_free(op2->doc_ids_array);
    push(op1->operand_id);
  }
  else if(op2->number_of_hits == 0) {
    end_result->number_of_hits = 0;
    if(op1->doc_ids_array != NULL)
      s_free(op1->doc_ids_array);
    if(op2->doc_ids_array != NULL)
      s_free(op2->doc_ids_array);
    push(op2->operand_id);
  }
  else if((op1->number_of_hits != 0) &&
          (op2->number_of_hits != 0)) {
    end_result->number_of_hits = 0;

    Intersection(end_result, op1, op2);

    op1->doc_ids_array -= op1_number_of_hits - op1->number_of_hits;
    op2->doc_ids_array -= op2_number_of_hits - op2->number_of_hits;
    end_result->doc_ids_array -= end_result->number_of_hits;
    op2->number_of_hits = end_result->number_of_hits;
    s_free(op1->doc_ids_array);

    if(end_result->number_of_hits > 0) {
      memcpy((char *)op2->doc_ids_array, 
             (char *)end_result->doc_ids_array, 
             end_result->number_of_hits * doc_ids_array_size);
    }
    else {
      op2->number_of_hits = end_result->number_of_hits;
      s_free(op2->doc_ids_array);
    }
    push(op2->operand_id);
  }
}

void Difference(result, set1, set2)
     search_result_struct *result;
     search_result_struct *set1;
     search_result_struct *set2;
{
  while((set1->number_of_hits != 0) &&
        (set2->number_of_hits != 0)) {
    if(set1->doc_ids_array->doc_id == set2->doc_ids_array->doc_id) {
#ifdef NEW_WEIGHT /* tung , 5/94 */
      result->doc_ids_array->score = MINIMUM(set1->doc_ids_array->score, 1.0 - set2->doc_ids_array->score);
      if(result->doc_ids_array->score < 0)
	result->doc_ids_array->score = 0;
#endif
      --set1->number_of_hits; --set2->number_of_hits;
      ++set1->doc_ids_array; ++set2->doc_ids_array;
    }
    else if(set1->doc_ids_array->doc_id < set2->doc_ids_array->doc_id) {
      result->doc_ids_array->doc_id = set1->doc_ids_array->doc_id;
      result->doc_ids_array->score = set1->doc_ids_array->score;
      ++result->number_of_hits;
      ++result->doc_ids_array;
      --set1->number_of_hits;
      ++set1->doc_ids_array;
    }
    else /* doc_id1 > doc_id2 */ {
      --set2->number_of_hits;
      ++set2->doc_ids_array;
    }
  }
  if((set1->number_of_hits != 0) &&
     (set2->number_of_hits == 0)) {
    memcpy((char *)result->doc_ids_array, 
           (char *)set1->doc_ids_array, 
           set1->number_of_hits * sizeof(doc_descr_struct));
    set1->doc_ids_array += set1->number_of_hits;
    result->doc_ids_array += set1->number_of_hits;
    result->number_of_hits += set1->number_of_hits;
    set1->number_of_hits = 0;
  }
}

void Not_Operator( op1, op2)
     search_result_struct* op1;
     search_result_struct* op2;
{
  long doc_ids_array_size = sizeof(doc_descr_struct);
  long op1_number_of_hits, op2_number_of_hits;
  
  op1_number_of_hits = op1->number_of_hits;
  op2_number_of_hits = op2->number_of_hits;
  
  if(op1->number_of_hits == 0) {
    end_result->number_of_hits = 0;
    if(op1->doc_ids_array != NULL)
      s_free(op1->doc_ids_array);
    if(op2->doc_ids_array != NULL)
      s_free(op2->doc_ids_array);
    push(op1->operand_id);
  }
  else if(op2->number_of_hits == 0) {
    if(op2->doc_ids_array != NULL)
      s_free(op2->doc_ids_array);
    if(op1->number_of_hits > 0) {
      memcpy((char *)end_result->doc_ids_array, 
             (char *)op1->doc_ids_array, 
             doc_ids_array_size * op1->number_of_hits + 1);
    }
    end_result->number_of_hits = op1->number_of_hits;
    push(op1->operand_id);
  }
  else if((op1->number_of_hits != 0) &&
          (op2->number_of_hits != 0)) {
    end_result->number_of_hits = 0;
    
    Difference(end_result, op1, op2);
    
    op1->doc_ids_array -= op1_number_of_hits - op1->number_of_hits;
    op2->doc_ids_array -= op2_number_of_hits - op2->number_of_hits;
    end_result->doc_ids_array -= end_result->number_of_hits;
    op1->number_of_hits = end_result->number_of_hits;
    s_free(op2->doc_ids_array);
    if(end_result->number_of_hits > 0) {
      memcpy((char *)op1->doc_ids_array, 
             (char *)end_result->doc_ids_array, 
             end_result->number_of_hits * doc_ids_array_size);
    }
    else {
      op1->number_of_hits = end_result->number_of_hits;
      s_free(op2->doc_ids_array);
    }
    push(op1->operand_id);
  }
}

boolean stackinitialized = false;

boolean boolean_operations(operator, result_array)
     char *operator;
     search_result_struct* result_array;
{
  long operand_id1, operand_id2;
  int i;
  
  operand_id1 = operand_id2 = -1L;
  
  if (!stackinitialized) {
    stackinit();
    stackinitialized = true;
  }
  if (!strcmp(operator, "or")) {
    if(stackempty())
      operand_id1 = -1L;
    else operand_id1 = pop();
    if(stackempty())
      operand_id2 = -1L;
    else operand_id2 = pop();
    if((operand_id1 == -1) || (operand_id2 == -1)) {
      waislog(WLOG_HIGH, WLOG_ERROR,
              "boolean search failed, too few operands.\n");
      return(false);
    }
    else Or_Operator(&result_array[operand_id1], &result_array[operand_id2]);
  }
  else if (!strcmp(operator, "and")) {
    if(stackempty())
      operand_id1 = -1L;
    else operand_id1 = pop();
    if(stackempty())
      operand_id2 = -1L;
    else operand_id2 = pop();
    if((operand_id1 == -1) || (operand_id2 == -1)) {
      waislog(WLOG_HIGH, WLOG_ERROR,
              "boolean search failed, too few operands.\n");
      return(false);
    }
    else And_Operator(&result_array[operand_id1], &result_array[operand_id2]);
  }
  else if (!strcmp(operator, "not")) {
    if(stackempty())
      operand_id1 = -1L;
    else operand_id1 = pop();
    if(stackempty())
      operand_id2 = -1L;
    else operand_id2 = pop();
    if((operand_id1 == -1) || (operand_id2 == -1)) {
      waislog(WLOG_HIGH, WLOG_ERROR,
              "boolean search failed, too few operands.\n");
      return(false);
    }
    else Not_Operator(&result_array[operand_id2], &result_array[operand_id1]);
  }
}

boolean save_operand_id(operand_id, search_result_array, entries)
     long operand_id;
     search_result_struct* search_result_array;
     long entries;
{
  if (!stackinitialized) {
    stackinit();
    stackinitialized = true;
  }
  if(end_result == NULL) {
    end_result = 
      (search_result_struct *)s_malloc(sizeof(search_result_struct));
    end_result->doc_ids_array = 
      (doc_descr_struct *)s_malloc(sizeof(doc_descr_struct) * entries);
    if(end_result->doc_ids_array == NULL) {
      waislog(WLOG_HIGH, WLOG_ERROR, "Out of memory");
      return(false);
    }
  }
  end_result->number_of_hits = search_result_array[operand_id].number_of_hits;
  if(search_result_array[operand_id].doc_ids_array != NULL) 
    memcpy((char*)end_result->doc_ids_array,
           (char*)search_result_array[operand_id].doc_ids_array,
           search_result_array[operand_id].number_of_hits * 
           sizeof(doc_descr_struct));
  push(operand_id);
  return(true);
}

long retriev_result(entries, result)
     long entries;
     double* result;
{
  int doc_id, count;
  long number_of_hits = 0;
  
  if((end_result != NULL) && (result != NULL))
    for(count=0; count < end_result->number_of_hits; count++) {
      doc_id = end_result->doc_ids_array[count].doc_id;
      result[doc_id] = end_result->doc_ids_array[count].score;
      ++number_of_hits;
    }

  if(end_result != NULL) {
    if(end_result->doc_ids_array != NULL)
      s_free(end_result->doc_ids_array);
    s_free(end_result);
  }

  stackinitialized = false;
  
  return(number_of_hits);
} 
