/* This is bb $Revision: 1.12 $ */

/* This will perform a branch-and-bound search for an
 * optimal QR factorization
 */

#include "optimqr.h"
#include <stdio.h>
#include <assert.h>

/* We use a hybrid between Depth-First and Best-First search in the
 * B&B search routine.
 * All live nodes are inserted into the bb_live_nodes list, in a
 * pseudo-sorted way: If a new live node has a lower estimated cost of
 * completion, that the currently first live-node, we insert the new
 * live node in front of the list.  If not, we insert the new live node
 * right after the first live node.  This way, we would maintain best-first
 * search, if the estimated cost of completion was always decreasing. This
 * is ofcourse not the fact, and sometimes the search becomes sort-of depth-first.
 *
 * In order to maintain the best-first search strategy to some extent, we
 * occationally sort the list of live-nodes, and prune any live-nodes which have
 * an estimated cost-of-completion higher than the currently best solution found.
 *
 * We specifically sort the list of live nodes _every_ time we find a new best
 * solution.
 * 
 * We also sort after some number of iterations.  This number is calculated below.
 */
#define SORT_EVERY (dim1*dim2*10)

/* Print output after every n iterations */
#define PRINT_EVERY (dim1*10)


/* Our list of live-nodes, and the actual number of live-nodes
 * in the list, is declared here 
 */
dmh_list* bb_live_nodes = NULL;
unsigned  bb_no_ln = 0;

/* Whenever we look at a live-node, we generate a new set of possible
 * branches.  This set of branches is evaluated, and some of the 
 * branches _may_ be inserted into the list of live-nodes, if they
 * qualify as possible new optimal solutions 
 */
dmh_list* bb_possible_branches = NULL;
unsigned bb_no_pb = 0;
unsigned bb_iter = 0;

unsigned bb_pb_best_gh;  /* holds the g()+h() of the best new p.b. */
unsigned bb_pb_best_f;   /* holds the f() of the best new p.b. */

/* Some live-node statistics */
unsigned bb_best_live_gh = 0;
unsigned bb_mean_live_gh = 0;

unsigned bb_min_improvement = 0;
unsigned bb_max_live_nodes = 0;

inline int costis(dmh_list*);

int do_cleanup = 0;  /* true if we found a better solution, or
		      * if we should cleanup & sort at next live list maint. */

dmh_list* bb_get_live_node(void);
void bb_build_branch_list(Tsolution*);
void bb_discard_bad_branches(void);
void bb_cleanup_live_nodes(void);

/* the bb_search() routine is the heart of the optimizer.  In this routine,
 * we build new branch-lists, discard the bad ones, and have the remaining
 * good branches inserted into our list of live-nodes.
 * We continue until there are no more live nodes to process.
 * When that happens, we have a near-optimal solution.
 *
 * This routine requires an initial ordering, which is taken from the global
 * variable "orderedsolution".
 * It leaves the best solution found in the global variable "bestsolution".
 *
 * While we optimize, the "bestsolution" variable _always_ hold the currently
 * best found solution.
 * Initially this variable hold a ``hypothetical'' solution with a very high
 * cost. This is set up by the main() routine.
 */
void bb_search(void) {
  Tsolution* node = NULL;
  dmh_list* dmhnode = NULL;
  unsigned live = 0;
  unsigned sort = 0;
  unsigned print = 0;

  /* Select a node from list of live nodes */
  node = orderedsolution;
  while (node) {  
    if(!sort) {
      do_cleanup = 1;
      sort = SORT_EVERY;
      live = 1;
    } sort--;
    if(!print) {
      print = PRINT_EVERY;
      write_status("BB", 
		   "live nodes: %u, (min,mean)g()+h():  (%u,%u)",
		   bb_no_ln, bb_best_live_gh, bb_mean_live_gh);       
    } print--;


    /* Create list of possible branches (new GTs) */
    bb_build_branch_list(node);
    /* Discard branches with g(s)+h(s) > f(best) */
    /* and add the rest to the list of live nodes */
    bb_discard_bad_branches();

    /* Maintain statistics */
    bb_max_live_nodes = MAX(bb_max_live_nodes,bb_no_ln);

    /* Select a node from list of live nodes */
    if(dmhnode) dmh_free(dmhnode);
    dmhnode = bb_get_live_node();
    if(dmhnode) node = dmhnode->solution;
    else node = NULL;

    bb_iter++;

  }
  printf("BB: max. no. of live nodes was: %u\n", bb_max_live_nodes);
  printf("BB: stopped after %u iterations.\n", bb_iter);
}

/* This routine returns the next live-node with an estimated cost-of-completion
 * strictly lower than the current best solution found.
 * We take live nodes from the front of the live-node list. If the element we
 * look at has an estimated cost higher than the real cost of the currently best 
 * solution, we prune the node from the list of live nodes, and carry on until
 * we find a usable live-node, or until there are no live-nodes left.
 */
dmh_list* bb_get_live_node(void) {
  dmh_list* node = NULL;
  do {
    if(node) dmh_free(node);
    if(bb_live_nodes) {
      node = bb_live_nodes;
      bb_live_nodes = bb_live_nodes->aux_next;
      bb_no_ln--;
    }
  } while(bb_no_ln && node 
	  && (node->solution->g + node->solution->h) >= bestsolution->f);
  return node;
}

/* The bb_build_branch_list() routine takes a live-node as input,
 * and inserts all branches that descend from that node, into the
 * global bb_possible_branches list.
 */
void bb_build_branch_list(Tsolution* node) {
  /* We should see how long we have come so far.
   * then, decide which rows or columns are possibly
   * next in line.
   * This will yield an optimal ordering with whatever
   * Givens Transform Sequence algorithm we use.
   * Currently we use a greedy algorithm for finding
   * the Givens Transform Sequence. This does not nessecarily
   * yeld optimal results wrt. computational cost of factorization
   * and back-substitution, but it's bloody fast, and if the row
   * and column ordering is optimal wrt. that sequence, the overall
   * result is probably pretty good.
   * It seems, we need Star Trek technology to find optimal solution
   * for both ordering and transforms.
   */
  
  int row;
  int usize = dim1 - (node ? node->ordered_pairs : 0);

  assert(node);
  assert(bestsolution);
  assert(dim1 == dim2);

  /* set bb_pb_best_gh to something high (could be any large value) */
  bb_pb_best_gh = bestsolution->f - bb_min_improvement;
  bb_pb_best_f = bestsolution->f - bb_min_improvement;

  /* scan thru possible new column/row pairs, and create new branch
   * for each.
   */

  for(row = node->ordered_pairs; row < dim1; row++) {
    int col;
    /* We scan thru the unfixed space in a row-wise
     * manner.
     * For each row, we see if a column-pivot can
     * make this row feasible for fixing now.
     * For each column-pivot that can make the row
     * feasible for fixing, we should generate a branch.
     */
    /* If s is the side-length of the unfixed space, this
     * routine has the potential of generating s^2 new
     * branches (for a dense system that is).
     * But since we're working with sparse systems, we 
     * probably won't see any branch generation near this
     * limit (at least not for large s).
     */

    /* Scan thru the elements in this row, to see if any
     * of the non-fixed elements would make a branch 
     * possible
     */

/*     write_status("BB-BB", "Building branches"); */

    for(col = node->ordered_pairs; col < dim2; col++) {

      /* If this row element has a non-zero, it makes
       * for a new branch...
       */

      if(MATRIX(node,row,col)) {

	int tmpcol;
	int pcolumn;
	int tmprow;
	int prow;
	
	/* allocate new branch */
	dmh_list* neworder = dmh_alloc();
	copy_solution(neworder->solution, node);

/*   	writesys("branch-dump",node);   */

	/* apply column pivot or setup  */

	/* Find out what column/row we actually want to pivot (the
	 * column/row that slots col,row point to 
	 */
	prow = neworder->solution->row_ordering[row];
	pcolumn = neworder->solution->column_ordering[col];

	/* Then save whatever our about-to-change entries are pointing at */
	tmprow = neworder->solution->row_ordering[neworder->solution->ordered_pairs];
	tmpcol = neworder->solution->column_ordering[neworder->solution->ordered_pairs];

	/* Change our entries to the new pivot column/row */
	neworder->solution->row_ordering[neworder->solution->ordered_pairs] = prow;
	neworder->solution->column_ordering[neworder->solution->ordered_pairs] = pcolumn;

	/* Make whatever was pointing at the pivot column/row, point at whatever
	 * was here before the pivot column/row */
	neworder->solution->row_ordering[row] = tmprow;
	neworder->solution->column_ordering[col] = tmpcol;

	assert(MATRIX(neworder->solution,
		      neworder->solution->ordered_pairs,
		      neworder->solution->ordered_pairs)
	       );

	neworder->solution->ordered_pairs++;

	init_bounds(neworder->solution);

	/* refresh bb_pb_best_gh */
	bb_pb_best_gh = MIN(neworder->solution->g + neworder->solution->h, bb_pb_best_gh);
	bb_pb_best_f = MIN(neworder->solution->f, bb_pb_best_f);

	/* insert ordering in branch list */
	neworder->aux_next = bb_possible_branches;
	bb_possible_branches = neworder;
	bb_no_pb++;
      }
    }
  }

  /* Make sure we hold our initial assertion about max. number
   * of generated branches
   */
  assert(bb_no_pb <= usize*usize);
}

/* The bb_discard_bad_branches() routine, looks thru the list
 * of proposed new branches to enter the list of live-nodes, 
 * and discards the ones that are not feasible wrt. their 
 * estimated cost-of-completion.
 * The nodes that are accepted, are put directly onto the
 * list of live nodes.
 *
 * This routine assumes that a number of new branches exist
 * in the bb_possible_branches global variable,  and moves
 * the approved branches onto the bb_live_nodes list.
 */
void bb_discard_bad_branches(void) {
  /* scan thru possible-branch list and discard
   *  all branches with g(s)+h(s) >= f(best) 
   */
  /* The branch with the lowest g()+h() is inserted in
   * the very front of the live_nodes list.
   * The remaining branches get sorted after g()+h() into 
   * the live nodes list as appropriate.
   */

  dmh_list* node = bb_possible_branches;
  unsigned ghbest = bestsolution->g + bestsolution->h - bb_min_improvement;
  unsigned fbest = bestsolution->f - bb_min_improvement;
/*   int serial = 0; */

  while(node) {
    if(node->solution->g == node->solution->f 
       && node->solution->f < fbest) {
      char outname[512];
      assert(node->solution->h == 0);
      printf("BB: Better solution found! f(s) = %u\n",node->solution->f);
      /* evt. dealloc current bestsolution first */
      bestsolution = node->solution;
      node = node->aux_next;
      bb_no_pb--;
      /* Also remember to update the f()=g() variable */
      ghbest = bestsolution->g + bestsolution->h - bb_min_improvement;
      fbest = bestsolution->f - bb_min_improvement;
      /* Print last-best system */
      sprintf(outname,"%s/last-best",outputdir);
      writesys(outname, bestsolution);
      do_cleanup = 1;
    } else {
      if(((double)node->solution->g 
	  + (double)accept_factor*(double)node->solution->h) 
	 >= (double)ghbest) {
	/* dealloc node */
	dmh_free(node);
	node = node->aux_next;
      } else {
	/* insert in live-node list */

	dmh_list* next = node->aux_next;
	assert(bb_pb_best_gh <= node->solution->g + node->solution->h);
	assert(bb_pb_best_f <= node->solution->f);

	/* Only act best-first alike, when we have one
	 * real solution */

 	if((bestsolution->f != OVERMUCH) && bb_live_nodes 
	   && costis(node) > (double)bb_live_nodes->solution->g) { 
 	  node->aux_next = bb_live_nodes->aux_next; 
 	  bb_live_nodes->aux_next = node; 
 	} else { 
	  /* Plain insertion in the front */
	  node->aux_next = bb_live_nodes;
	  bb_live_nodes = node;
 	} 

	node = next;
	bb_no_ln++;
      }
      bb_no_pb--;
    }
  }
  assert(!bb_no_pb);
  bb_possible_branches = NULL;

  /* Clean up if we found a better best_gh */
  if(do_cleanup) {
    unsigned prev = bb_no_ln;
    bb_cleanup_live_nodes();
    if(prev > bb_no_ln)
      printf("BB: Cleaned away %u obsolete live nodes\n", prev - bb_no_ln);
  }
}

/* The bb_cleanup_live_nodes() routine traverses thru the list of live nodes,
 * pruning the ones with an estimated cost-of-completion higher than or equal
 * to the cost of the currently best solution found.
 *
 * When only the nodes with a cost strictly lower than the current best
 * solution remains, the list of live nodes is sorted, to maintain the best-first
 * nature of the search strategy.
 *
 * The routine assumes that the list of live nodes is found in the bb_live_nodes
 * list, and that a solution (which may well be the initial hypothetical solution)
 * is found in the "bestsolution" global variable.
 */
void bb_cleanup_live_nodes(void) {
  /* Traverse the list of live nodes, and kill all nodes with
   * g()+h() >= bestsolution->f
   * 
   * When done, sort list to impose best-first strategy
   *
   */
  dmh_list* prnode;
  dmh_list* node;
  unsigned ghbest = (bestsolution->f - bb_min_improvement);
  dmh_list* trvrs;
  prnode = NULL;
  node = bb_live_nodes;

  /* We cannot assume the list of live nodes is sorted in any
   * way.
   */

  printf("(Delete");
  fflush(stdout);

  while(node) {

    assert(bb_no_ln);

    if(costis(node) >= ghbest) {
      dmh_list *delnode;

      /* Unchain node */
      if(prnode) {
	prnode->aux_next = node->aux_next;
      } else {
	bb_live_nodes = node->aux_next;
      }

      /* Delete us and go on */
      delnode = node;
      node = node->aux_next;
      dmh_free(delnode);    

      bb_no_ln --;

    } else {
      /* Go on */
      prnode = node;
      node = node->aux_next;
    }
  }

  /* Now to the sort thing      */
  /* Yes I know, this is O(n^2), but we do this rarely */
  /* It could be possible to speed up (or at least change) the nature of
   * the optimizer, by implementing a O(n*log(n)) sorting routine, and
   * sort the live-node list more frequently.
   * However, the current approach works well, but ofcourse leaves room
   * for improvements.
   */

  node = bb_live_nodes;
  bb_live_nodes = NULL;

  printf(")(Sort");
  fflush(stdout);

  trvrs = bb_live_nodes;

  bb_best_live_gh = bestsolution->f;
  bb_mean_live_gh = 0;

  while(node) {
    dmh_list * next = node->aux_next;

    /* Make a sorted insert in the live nodes list */
    dmh_list* ptrvrs = NULL;

    bb_best_live_gh = MIN(bb_best_live_gh, node->solution->g + node->solution->h);
    bb_mean_live_gh += node->solution->g + node->solution->h;

    /* Should we reset trvrs ? */
    if(!trvrs || (costis(trvrs) > costis(node))) trvrs = bb_live_nodes;

    /* go to insertion point */
    while(trvrs && costis(node) > costis(trvrs)) {
      ptrvrs = trvrs;
      trvrs = trvrs->aux_next;
    }
    if(!ptrvrs) {  /* if first element, just insert */
      node->aux_next = bb_live_nodes;
      bb_live_nodes = node;
    } else if(!trvrs) { /* if end of list, chain on */
      node->aux_next = NULL;
      ptrvrs->aux_next = node;
    } else { /* otherwise, chain in list */
      assert(costis(ptrvrs) <= costis(node));
      node->aux_next = trvrs;
      ptrvrs->aux_next = node;
    }  

    node = next;
  }
  if(bb_no_ln)
    bb_mean_live_gh /= bb_no_ln;
  else
    bb_mean_live_gh = 0;

  printf(")(ln:%i)\n",bb_no_ln);

  do_cleanup = 0;
}

/* The costis() routine returns the estimated cost-of-completion of
 * some partial solution (a node).
 * This is simply a helper-routine implemented to avoid programming
 * errors when calculating this cost.
 *
 * The routine assumes that it is passed a valid node as argument,
 * and it returns the estimated cost-of-completion of that node.
 * The routine does not depend on any global variables, except for
 * the accept_factor, which should be constant throughout the
 * optimization run.
 */
inline int costis(dmh_list* node)
{
  return (int)((double)accept_factor * (double)node->solution->h 
	       + (double)node->solution->g);
}
