/* This is systool $Revision: 1.15 $ */

/* This file contains a set of utility routines which come in handy
 * often, but is not necessarily tied to one single routine in the
 * optimizer.
 */

#include "optimqr.h"
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <stdarg.h>
#include <sys/time.h>
#include <time.h>
#include <unistd.h>

/* The apply_givens() routine applies a Givens Rotation to some
 * matrix.  It does not perform an actual numerical calculation, but
 * it sets the zeros/non-zeros according to what a Givens Rotation
 * would do to the system given.
 *
 * The routine takes five arguments.
 * 1) The matrix holding the zero/non-zero values
 * 2) The system description that specifies how rows/columns are ordered
 * 3) The row "a" in which the nonzero to eliminate with, resides
 * 4) The row "b" in which the nonzero to eliminate resides
 * 5) The column "c" in which the two elements reside
 */
void apply_givens(matrix_t* matrix, Tsolution* sys, 
		  unsigned a, unsigned b, unsigned c) 
{
  /* Apply givens transformation to system */
  /* zero out element b,c with element a,c */
  int i;
  int ra = sys->row_ordering[a];
  int rb = sys->row_ordering[b];
  int cc = sys->column_ordering[c];
  assert(ra != rb);
  assert(a == c);
  assert(b >= c);
  assert(matrix->elements[ra][cc]);
  assert(matrix->elements[rb][cc]);
  /* mark fillins in row b */
  for(i = 0; i < dim2; i++) 
    if(matrix->elements[ra][i])    /* no need to use column ordering, */
      matrix->elements[rb][i] = 1; /* since all are visited once      */  
  for(i = 0; i < dim2; i++) 
    if(matrix->elements[rb][i])
      matrix->elements[ra][i] = 1;

  /* All other sparsity distances referring to theese two rows
   * must be re-calculated  (we just invalidate them now)
   */
  for(i = 0; i < dim1; i++)
    matrix->wspdist[i][ra] =
      matrix->wspdist[i][rb] = 
      matrix->wspdist[ra][i] =
      matrix->wspdist[rb][i] = -1;

  /* When an element in row b has been eleminated with
   * an element from row a, the sparsity distance between
   * the two rows is one.  Weight must be applied too.
   */
  matrix->wspdist[rb][ra] 
    = matrix->wspdist[ra][rb] = GIVENS_COST;

  /* zero out element in row b column c */
  matrix->elements[rb][cc] = 0;
}

/* The apply_fake_givens() routine works almost like the apply_givens()
 * routine.
 * Except, we do not rely on a non-zero as the eliminating element.
 * This is not numerically correct, but the routine comes in handy when
 * we estimate costs of factorization on systems that are not necessarily
 * legal (eg. systems that can have zeros in the diagonal).
 *
 * The routine takes the same arguments as apply_givens().
 *
 */
void apply_fake_givens(matrix_t* matrix, Tsolution* sys, 
		  unsigned a, unsigned b, unsigned c) 
{
/* Same as apply_givens(), just this one ignores zero-valued
 * eliminating elements */
  int i;
  int ra = sys->row_ordering[a];
  int rb = sys->row_ordering[b];
  int cc = sys->column_ordering[c];
  if(ra == rb) writesys("assertion", sys);
  assert(ra != rb);
  assert(a == c);
  assert(b >= c);
  assert(matrix->elements[rb][cc]);
  /* mark fillins in row b */
  for(i = 0; i < dim2; i++) 
    if(matrix->elements[ra][i])    /* no need to use column ordering, */
      matrix->elements[rb][i] = 1; /* since all are visited once      */  
  for(i = 0; i < dim2; i++) 
    if(matrix->elements[rb][i])
      matrix->elements[ra][i] = 1;

  /* All other sparsity distances referring to theese two rows
   * must be re-calculated  (we just invalidate them now)
   */
  for(i = 0; i < dim1; i++)
    matrix->wspdist[i][ra] =
      matrix->wspdist[i][rb] = 
      matrix->wspdist[ra][i] =
      matrix->wspdist[rb][i] = -1;

  /* When an element in row b has been eleminated with
   * an element from row a, the sparsity distance between
   * the two rows is one.  Weight must be applied too.
   */
  matrix->wspdist[rb][ra] 
    = matrix->wspdist[ra][rb] = GIVENS_COST;

  /* zero out element in row b column c */
  matrix->elements[rb][cc] = 0;
}



/* The spdist() routine returns the sparsity pattern distance between 
 * two rows in a matrix.
 *
 * The sparsity distance is the difference in zeros / non-zeros between
 * two rows.
 * Examples:
 *  a:   0  0  1  0
 *  b:   0  0  1  1
 *   spdist(a,b) = 1
 *  a:   0  0  1  0
 *  b:   1  1  0  1
 *   spdist(a,b) = 4
 *
 * This is a true metric, since
 *          a=b <=> spdist(a,b)=0
 *    and   spdist(a,b) >= 0   for all a,b
 *    and   spdist(a,b)+spdist(b,c) >= spdist(a,c)
 *
 * The spdist() routine takes three arguments.  The matrix holding
 * the data, and the two rows we wish to compare.
 */
unsigned spdist(char** matrix, unsigned a, unsigned b) 
{
  unsigned dist = 0;
  unsigned i;
  for(i = 0; i < dim2; i++)
    if(matrix[a][i] != matrix[b][i])
      dist++;
  return dist;
}

/* The wspdist() routine returns the weighted sparsity pattern 
 * distance between two rows in a matrix
 *
 * The weighted sparsity distance is similar to the normal sparsity
 * distance, except we put a weight on the sparsity differences, depending
 * on whether the difference is located in the upper or the lower triangle
 * of the matrix.
 *
 * This is handy because a difference in sparsity between two rows, will mean
 * that we either get a fillin (in the upper triangle) or an extra element to
 * eliminate (in the lower triangle). Since a fillin is cheaper than an extra
 * element in the lower-triangle, it makes sense to weight the sparsity distance.
 *
 */
unsigned wspdist(matrix_t* matrix, unsigned a, unsigned b) 
{
  unsigned dist = 0;
  unsigned i;
  /* Do table lookup, if applicable */
  if(matrix->wspdist[a][b] != -1) 
    return matrix->wspdist[a][b];
  /* Otherwise, calculate the hard way */
  for(i = 0; i < dim2; i++) {
    if(matrix->elements[a][i] != matrix->elements[b][i]) {
      if(!matrix->elements[a][i]) {
	/* row a gets a fillin */
	if(i < a) dist += GIVENS_COST;
	else dist++;
      } else {
	assert(!matrix->elements[b][i]);
	/* row b gets a fillin */
	if(i < b) dist += GIVENS_COST;
	else dist++;
      }
    }
  }
  return dist;
}

/* The init_bounds() routine initializes the cost and lower-bound 
 * (heuristic) on the cost, of eliminating some system.
 *
 * The routine also has a number of sanity-checks to catch possible
 * errors.
 *
 * The routine takes one argument, the system for which to initialize
 * the cost variables.
 * The routine calls the costs() routine (in optimqr.c) and the heuristic()
 * routine (in heuristic.c).
 */
void init_bounds(Tsolution* sys) 
{
  /* Calculate actual costs */
  costs(sys, &(sys->f), &(sys->g));
  /* And calculate the heuristic for branch completion */
  sys->h = heuristic(sys);
  /* There's no such thing as a free lunch */
  assert(sys->g && sys->f);
  /* Invariant for objective/bound functions:  f(s) \ge g(s) */
  assert(sys->f >= sys->g);
  /* Invariant for bound functions: g(s_{i-1}) \le g(s_i) */
/*   if(sys->ordered_pairs >= 2)   */
/*     assert(sys->g_history[sys->ordered_pairs-2]   */
/*   	   <= sys->g_history[sys->ordered_pairs-1]);   */
  /* Either we don't have a solution, or we do */
  assert((sys->f > sys->g)
	 != ( sys->ordered_pairs == dim1 
	      && sys->ordered_pairs == dim2 ) );
  assert((sys->f > sys->g) != (sys->h == 0));
}

/* The alloc_matrix() routine allocates a matrix data structure,
 * and returns a pointer to the newly allocated structure.
 *
 * This routine is rather inefficient, and should thus only be used
 * to set up initial data etc.
 *
 */
matrix_t * alloc_matrix() 
{
  /* Allocate a matrix */
  matrix_t *matrix;
  unsigned index;

  matrix = (matrix_t*)malloc(sizeof(matrix_t));
  if(!matrix) fatal("Out of memory allocating matrix");

  /* Allocate and initialize elements entry */
  matrix->elements = (char**)malloc(dim1*sizeof(char*));
  if(!matrix->elements) 
    fatal("Out of memory allocating matrix elements");

  matrix->elements[0] = (char*)malloc(dim1*dim2);
  if(!matrix->elements[0]) fatal("Out of memory allocating matrix");

  for(index = 1; index < dim1; index++)
    matrix->elements[index] = (char*)matrix->elements[index-1] + dim2;

  /* Allocate and initialize wspdist entries */
  matrix->wspdist = (int**)malloc(dim1*sizeof(int*));
  if(!matrix->wspdist) 
    fatal("Out of memory allocating matrix wspdist buffer");

  matrix->wspdist[0] = (int*)malloc(dim1*dim2*sizeof(int*));
  if(!matrix->wspdist[0]) fatal("Out of memory allocating matrix");

  for(index = 1; index < dim1; index++)
    matrix->wspdist[index] = (int*)matrix->wspdist[index-1] + dim2;

  /* Allocate row_nz, column_nz and */
  matrix->row_nz = (int*)malloc(dim1*sizeof(int*));
  matrix->column_nz = (int*)malloc(dim1*sizeof(int*));
  if(!matrix->row_nz || !matrix->column_nz)
    fatal("Out of memory allocating matrix nz buffers");

  return matrix;
}

/* The copy_matrix() routine takes two arguments:
 * The destination matrix, and the source matrix.
 *
 * It copies all elements from the source matrix into
 * the destination matrix.
 */
void copy_matrix(matrix_t* A, matrix_t* B) 
{
  /* Copy a matrix */
  unsigned i,j;
  for(i = 0; i < dim1; i++) {
    for(j = 0; j < dim2; j++) {
      A->elements[i][j] = B->elements[i][j];
      A->wspdist[i][j] = B->wspdist[i][j];
    }
    A->row_nz[i] = B->row_nz[i];
    A->column_nz[i] = B->column_nz[i];
  }
}

/* The initialize_solution() routine initializes a solution
 * buffer with sane values.
 *
 * The routine takes one argument, a pointer to the solution
 * buffer to set up.
 *
 * The solution buffer is initialized so that it holds a non-ordered
 * system.
 */
void initialize_solution(Tsolution* sys) {
  /* This will initialize a solution to a non-pivoted matrix
   * holding the values of matrixA 
   */
  int i;
  assert(sys);
  assert(sys->row_ordering && sys->column_ordering);
  assert(dim1 == dim2);
  for(i = 0; i < dim1; i++) {
    sys->row_ordering[i] = i;
    sys->column_ordering[i] = i;
    sys->f_history[i] = 0;
    sys->g_history[i] = 0;
    sys->h_history[i] = 0;
/*     sys->utility[i] = 0; */
  }
  sys->ordered_pairs = 0;
}

/* The copy_solution() routine takes two arguments: The
 * destination and the source solution buffer.
 *
 * It copies all data from the source buffer into the destination
 * buffer.
 */
void copy_solution(Tsolution* sys, Tsolution* template) {
  int i;
  assert(dim1 == dim2);
  for(i = 0; i < dim1; i++) {
    sys->row_ordering[i] = template->row_ordering[i];
    sys->column_ordering[i] = template->column_ordering[i];
    sys->f_history[i] = template->f_history[i];
    sys->g_history[i] = template->g_history[i];
    sys->h_history[i] = template->h_history[i];
/*     sys->utility[i] = template->utility[i]; */
  }
  sys->ordered_pairs = template->ordered_pairs;
}

matrix_t *fsmatrix = NULL;

/* The find_sequence() routine finds a suitable sequence of Givens
 * transforms that will factorize the system given.
 * 
 * The routine takes three arguments:
 * The system to order, the method to use, and a pointer to a file
 * handle to which to print the sequence.
 *
 * The method can be either ``1'' or ``0''. If method==1, then
 * we will produce a Givens rotation sequence for the entire system,
 * also including any not-yet ordered rows/columns. This is used by 
 * the heuristic() routine.
 * If method==0, then we will only produce a sequence for the part of
 * the system which is ordered.
 *
 * The file handle pointer is usually the NULL pointer. It only makes
 * sense to supply a file handle, when we print the sequence for use
 * by the code generation utility, at the end of the optimization when 
 * we have our near-optimal ordering.
 *
 * The routine uses the global matrixA variable which holds the system
 * structure.  It also makes use of the global variable ``fsmatrix'' which
 * is not used by any other routines. It uses the fsmatrix variable as a
 * temporary buffer to hold the system structure. The reason the fsmatrix is
 * global is, that we only want to allocate it _once_. Matrix allocation is
 * very expensive, and we ensure that this is only done the first time this
 * routine is called.
 *
 * The routine returns the cost of the factorization. This is calculated as:
 *  cost = G * N_GT + N_unz
 * Where G is the relative cost of a Givens Rotation compared to a fillin elimination,
 * N_GT is the number of Givens Rotations applied, and N_unz is the number of
 * upper-triangle non-zeros.
 *
 * In order for the rest of the optimizer to make certain assumptions, the sequence
 * we produce here, must be relatively independent on the actual ordering of the rows.
 */
int find_sequence(Tsolution* sys, int method, FILE* outfile)
{
  int n_gt = 0;
  int n_unz = 0;
  int i,j;

  assert(method == 0 || method == 1);

  /* Allocate working matrix if it's not done */
  if(!fsmatrix) fsmatrix = alloc_matrix();

  /* First, get a working copy of the system matrix to mess with */
  copy_matrix(fsmatrix, matrixA);
  
  /* Step thru all lower triangle non-zeros.
   * Traversal is done one column at a time
   * Traversal must be column-wise, since we introduce them
   * pesky fill-ins...
   */
  for(j = 0; j < (method ? dim2-1 : sys->ordered_pairs); j++) {  
    /* Step thru every lower triangle row element in current column. 
     * If row element is non-zero, apply givens.
     */
    int nonzeros;
    /* An elimination loop. Each iteration eliminates
     * one non-zero */
    do {
      int bestelimcost = OVERMUCH;
      int bestelimrow = -1;
      nonzeros = 0;
      for(i = j+1; i < dim1; i++) {
	/* If we have a non-zero, eliminate it */
	if(SMATRIX(fsmatrix,sys,i,j)) {

	  /* The row i needs elimination. 
	   */
	  /* For now, save the weighted sparsity distance
	   * Delta(i,j) in a vector
	   */

	  /* We consider elimination, even if our eliminating element
	   * is zero. We'll just have to tell apply_givens() to ignore
	   * that fact.
	   */
	  int ecost = wspdist(fsmatrix,i,j);
	  if(ecost < bestelimcost) {
	    bestelimcost = ecost;
	    bestelimrow = i;
	  }
	  nonzeros++;
	}
      }
      /* The row number contained in bestelimrow is the
       * row which has the minimal weighted sparsity distance
       * to the row with the diagonal element, of all rows
       * with a nonzero in the j'th column.
       * This row should be eliminated now, since the cost
       * of doing so is minimal 
       */
      if(nonzeros > 0) {
	assert(bestelimrow > -1);

	/* If we're doing the real thing here, we should always have
	 * non-zero diagonal elements!
	 */
	assert((outfile != NULL) <= (MATRIX(sys,j,j) != 0));
	/* Apply the givens_transform */
	if(MATRIX(sys,j,j)) {
	  apply_givens(fsmatrix, sys, j, bestelimrow, j);
	} else {
	  apply_fake_givens(fsmatrix, sys, j, bestelimrow, j);
	}
	if(outfile) fprintf(outfile, "%i %i %i\n", j, bestelimrow, j);

	/* If we are being called from costs() we should account
	 * all Givens Transforms.
	 * If we're called from heuristic(), only account for the
	 * Transforms done in the undefined space.
	 */
	if(method == 0) n_gt++;
	else if(j >= sys->ordered_pairs) n_gt++;
      }
      /* While more nonzeros are found, eliminate stuff */
    } while (nonzeros > 1);
  }

  /* Now count upper triangle non-zeros.
   */
  if(method == 0) {
    /* Only count in the well defined part of the
     * upper triangle */
    for(i = 0; i < sys->ordered_pairs; i++) 
      for(j = i; j < dim2; j++) 
	if(SMATRIX(fsmatrix,sys,i,j)) n_unz++; 
  } else {
    /* Count non-zeros in the entire undefined upper triangle */
    for(i = sys->ordered_pairs; i < dim1; i++) 
      for(j = i; j < dim2; j++) 
	if(SMATRIX(fsmatrix,sys,i,j)) n_unz++; 
  }
  
   /* Return cost 
    */
   return GIVENS_COST * n_gt + n_unz;
}


/* The write_status() routine is used for status reporting. It takes
 * two or more arguments, namely:
 * A descriptive name of the calling routine, a format string (similar
 * to that of printf() and similar routines), and some number of arguments
 * required by the format string. The usage is similar to that of printf()
 * with the exception that we supply a name of the calling routine too.
 *
 * The WRITE_INTERVAL define defines the number of seconds that should elapse
 * between each status write. We use this as a way to control the number of
 * status messages produced.
 *
 * This routine could be called _fairly_ often by the other routines in the
 * optimizer. Yet, it should be noted, that this routine makes at least one
 * system call (the gettimeofday() call), whenever it is called, so even though
 * frequent calling of this routine will not produce massive amounts of
 * output from the optimizer, it will still take up processing time.
 *
 * If the specified number of seconds have elapsed when this routine is called,
 * it will produce output almost similar to printf(), except we have a timestamp
 * printed first, and the name of the calling function in paranthesis.
 * Output could look like:
 *   22:25:55 (BB) live nodes: 0, (min,mean)g()+h():  (0,0)
 */
#define WRITE_INTERVAL 30
static time_t last_write_status = 0;
void write_status(char* caller, char* format,...)
{
  /* Write status information every some seconds */
  char outstr[1024];
  struct timeval now;
  struct tm *lnow;
  va_list ap;

  gettimeofday(&now, NULL);

  if(now.tv_sec < last_write_status+WRITE_INTERVAL) return;

  lnow = localtime(&now.tv_sec);
  printf("%02i:%02i:%02i (%s) ", lnow->tm_hour, 
	 lnow->tm_min, lnow->tm_sec, caller);

  va_start(ap, format);
  vsnprintf(outstr, 1024, format, ap);
  va_end(ap);
  
  printf(outstr);
  printf("\n");

  last_write_status = now.tv_sec;
}
