/*  solve.c  */

#include "../DFrontMtx.h"
#include "../../timings.h"

#define MYDEBUG 0

/*--------------------------------------------------------------------*/
static void denseForwardSolve ( int nDJ, int nbndJ, int nrhs,
   int npivot, int pivotsizes[], double diagent[], double upperent[],
   double rhsJ[], double rhsBJ[] ) ;
static void sparseForwardSolve ( int nDJ, int nbndJ, int nrhs,
   int sizesU[], int upperind[], double diagent[], double upperent[],
   double rhsJ[], double rhsBJ[] ) ;
static void scaleByD ( int nDJ, int nrhs, int npivot, int pivotsizes[],
   double diagent[], double rhsJ[] ) ;
static void denseBackwardSolve ( int nDJ, int nbndJ, int nrhs,
   int npivot, int pivotsizes[], double diagent[], double upperent[],
   double solJ[] ) ;
static void sparseBackwardSolve ( int nDJ, int nbndJ, int nrhs,
   int sizesU[], int upperind[], double diagent[], double upperent[],
   double solJ[] ) ;
/*--------------------------------------------------------------------*/
/*
   ----------------------------------------------------------------
   this method is used in the multithreaded and MPI parallel solves

   frontmtx -- front matrix object
   rhsDA2   -- right hand side matrix object,
      note, for multithreaded factorization, rhsDA2 is global
      note, for MPI factorization, rhsDA2 is local
   J       -- present front to solve
   manager -- manager of working dense matrices
   mtxList -- list object for storing descendent matrices
   p_mtxJ  -- array of pointers to internal matrix objects
   p_mtxBJ -- array of pointers to boundary matrix objects
   status  -- status vector for the fronts
      status[J] = 'W' --> initialize the data structures
      status[J] = 'R' --> load update, solve and update
      status[J] = 'F' --> front is finished
   cpus -- vector of CPU breakdowns
      cpus[0] --- initialize the fronts
      cpus[1] --- load rhs and solution
      cpus[2] --- assemble children and parents
      cpus[3] --- solve and update
      cpus[4] --- store rhs and solution
      cpus[5] --- store updates
   msglvl  -- message level
   msgFile -- message filek

   created -- 97nov15, cca
   ----------------------------------------------------------------
*/
void
DFrontMtx_parallelForwardSolve (
   DFrontMtx          *frontmtx,
   DA2                *rhsDA2,
   int                J,
   DDenseMtxManager   *manager,
   DDenseMtxList      *mtxList,
   DDenseMtx          *p_mtxJ[],
   DDenseMtx          *p_mtxBJ[],
   char               status[],
   double             cpus[],
   int                msglvl,
   FILE               *msgFile
) {
DDenseMtx   *firstI, *mtxBJ, *mtxJ ;
double      t1, t2 ;
int         K ;
int         *par ;
   
if ( status[J] == 'W' ) {
/*
   -------------------------
   initialize mtxJ and mtxBJ
   -------------------------
*/
   MARKTIME(t1) ;
   DFrontMtx_forwInit(frontmtx, J, rhsDA2->n2, manager, 
                      &mtxJ, &mtxBJ) ;
   MARKTIME(t2) ;
   cpus[0] += t2 - t1 ;
   if ( mtxJ != NULL ) {
      MARKTIME(t1) ;
      DFrontMtx_forwLoadRHS(mtxJ, rhsDA2) ;
      MARKTIME(t2) ;
      cpus[1] += t2 - t1 ;
   }
   if ( msglvl > 2 ) {
      if ( mtxJ != NULL ) {
         fprintf(msgFile, "\n after initialization, mtxJ") ;
         DDenseMtx_writeForHumanEye(mtxJ, msgFile) ;
      }
      if ( mtxBJ != NULL ) {
         fprintf(msgFile, "\n after initialization, mtxBJ") ;
         DDenseMtx_writeForHumanEye(mtxBJ, msgFile) ;
      }
      fflush(msgFile) ;
   }
   p_mtxJ[J]  = mtxJ  ;
   p_mtxBJ[J] = mtxBJ ;
   status[J]  = 'R' ;
}
if ( status[J] == 'R' ) {
/*
   ------------------------------
   front is ready to be worked on
   ------------------------------
*/
   mtxJ  = p_mtxJ[J]  ;
   mtxBJ = p_mtxBJ[J] ;
   if ( DDenseMtxList_isCountZero(mtxList, J) == 1 ) {
/*
      ---------------------------------------------------------------
      no more updates from any children remain to be put on the list.
      ---------------------------------------------------------------
*/
      if ( msglvl > 1 ) {
         fprintf(msgFile,
                 "\n waiting for no more updates from children") ;
         fflush(msgFile) ;
      }
      if ( DDenseMtxList_isListNonempty(mtxList, J) == 1 ) {
         if ( msglvl > 2 ) {
            fprintf(msgFile, "\n updates from children to assemble");
            fflush(msgFile) ;
         }
/*
         --------------------------------------
         assemble all updates from the children
         --------------------------------------
*/
         MARKTIME(t1) ;
         firstI = DDenseMtxList_getList(mtxList, J) ;
         DFrontMtx_forwLoadFromChildren(mtxJ, mtxBJ, firstI) ;
         MARKTIME(t2) ;
         cpus[2] += t2 - t1 ;
         if ( msglvl > 2 ) {
            if ( mtxJ != NULL ) {
               fprintf(msgFile, "\n after assembly, mtxJ") ;
               DDenseMtx_writeForHumanEye(mtxJ, msgFile) ;
            }
            if ( mtxBJ != NULL ) {
               fprintf(msgFile, "\n after assembly, mtxBJ") ;
               DDenseMtx_writeForHumanEye(mtxBJ, msgFile) ;
            }
            fflush(msgFile) ;
         }
         DDenseMtxManager_releaseListOfObjects(manager, firstI) ;
      }
      if ( mtxJ != NULL ) {
/*
         -------------------------------
         do the forward solve and update
         -------------------------------
*/
         if ( msglvl > 1 ) {
            fprintf(msgFile, "\n doing forward solve and update") ;
            fflush(msgFile) ;
         }
         MARKTIME(t1) ;
         DFrontMtx_forwSolveAndUpdate(frontmtx, mtxJ, mtxBJ) ;
         MARKTIME(t2) ;
         cpus[4] += t2 - t1 ;
         if ( msglvl > 2 ) {
            if ( mtxJ != NULL ) {
               fprintf(msgFile, "\n after solve and update, mtxJ") ;
               DDenseMtx_writeForHumanEye(mtxJ, msgFile) ;
            }
            if ( mtxBJ != NULL ) {
               fprintf(msgFile, "\n after solve and update, mtxBJ") ;
               DDenseMtx_writeForHumanEye(mtxBJ, msgFile) ;
            }
            fflush(msgFile) ;
         }
/*
         --------------
         store solution
         --------------
*/
         if ( msglvl > 1 ) {
            fprintf(msgFile, "\n storing solution") ;
            fflush(msgFile) ;
         }
         MARKTIME(t1) ;
         DFrontMtx_forwStore(rhsDA2, mtxJ) ;
         MARKTIME(t2) ;
         cpus[5] += t2 - t1 ;
/*
         -----------------------
         release the matrix mtxJ
         -----------------------
*/
         if ( msglvl > 1 ) {
            fprintf(msgFile, "\n releasing object") ;
            fflush(msgFile) ;
         }
         DDenseMtxManager_releaseObject(manager, mtxJ) ;
      }
      par = ETree_par(frontmtx->frontETree) ;
      if ( mtxBJ != NULL && (K = par[J]) != -1 ) {
/*
         -----------------------------
         link the update to the parent
         -----------------------------
*/
         DDenseMtxList_addObjectToList(mtxList, mtxBJ, K) ;
      }
/*
      -----------------------
      set pointers and status
      -----------------------
*/
      p_mtxJ[J] = p_mtxBJ[J] = NULL ;
      status[J] = 'F' ;
   }
}
return ; }
 
/*--------------------------------------------------------------------*/
/*
   ----------------------------------------------------------------
   this method is used in the multithreaded and MPI parallel solves

   frontmtx -- front matrix object
   rhsDA2   -- right hand side matrix object,
      note, for multithreaded factorization, rhsDA2 is global
      note, for MPI factorization, rhsDA2 is local
   solDA2   -- solution matrix object,
      note, for multithreaded factorization, solDA2 is global
      note, for MPI factorization, solDA2 is local
   J       -- present front to solve
   manager -- manager of working dense matrices
   mtxList -- list object for storing descendent matrices
   p_mtxJ  -- array of pointers to internal matrix objects
   status  -- status vector for the fronts
      status[J] = 'W' --> initialize the data structures
      status[J] = 'R' --> load update, solve and update
      status[J] = 'F' --> front is finished
   cpus -- vector of CPU breakdowns
      cpus[0] --- initialize the fronts
      cpus[1] --- load rhs and solution
      cpus[2] --- assemble children and parents
      cpus[3] --- solve and update
      cpus[4] --- store rhs and solution
      cpus[5] --- store updates
   msglvl  -- message level
   msgFile -- message filek

   created -- 97nov15, cca
   ----------------------------------------------------------------
*/
void
DFrontMtx_parallelBackwardSolve (
   DFrontMtx          *frontmtx,
   DA2                *rhsDA2,
   DA2                *solDA2,
   int                J,
   DDenseMtxManager   *manager,
   DDenseMtxList      *mtxList,
   DDenseMtx          *p_mtxJ[],
   char               status[],
   double             cpus[],
   int                msglvl,
   FILE               *msgFile
) {
DDenseMtx   *mtx, *mtxJ, *mtxK ;
double      t1, t2 ;
int         first, I, K, nbytesNeeded, ncolJ, nDJ, nrhs, nrowJ ;
int         *rowindJ ;
 
nrhs = rhsDA2->n2 ;
if ( status[J] == 'W' ) {
/*
   -----------------------
   initialize the matrices
   -----------------------
*/
   MARKTIME(t1) ;
   mtxJ = DFrontMtx_backInit(frontmtx, J, nrhs, manager) ;
   MARKTIME(t2) ;
   cpus[0] += t2 - t1 ;
   p_mtxJ[J] = mtxJ ;
   if ( (nDJ = DFrontMtx_frontSize(frontmtx, J)) > 0 ) {
/*
      --------------------------------
      load the right hand side entries
      --------------------------------
*/
      MARKTIME(t1) ;
      DFrontMtx_rowIndices(frontmtx, J, &nrowJ, &rowindJ) ;
      DFrontMtx_backLoadSolution(mtxJ, nDJ, rowindJ, rhsDA2) ;
      MARKTIME(t2) ;
      cpus[1] += t2 - t1 ;
   }
   status[J] = 'R' ;
   if ( msglvl > 1 ) {
      fprintf(msgFile, "\n mtxJ initialized") ;
   }
   if ( msglvl > 2 ) {
      DDenseMtx_writeForHumanEye(mtxJ, msgFile) ;
   }
   fflush(msgFile) ;
}
if ( status[J] == 'R' ) {
   int   *par = ETree_par(frontmtx->frontETree) ;
   int   *fch = ETree_fch(frontmtx->frontETree) ;
   int   *sib = ETree_sib(frontmtx->frontETree) ;
   if (  (K = par[J]) == -1
      || DDenseMtxList_isListNonempty(mtxList, J) == 1 ) {
/*
      -------------------------------------------------------------
      either J is a root or the entries from the parent are present
      -------------------------------------------------------------
*/
      mtxJ = p_mtxJ[J] ;
      nDJ  = DFrontMtx_frontSize(frontmtx, J) ;
      if ( K != -1 ) {
         if ( msglvl > 1 ) {
            fprintf(msgFile,
                   "\n loading solution entries from parent %d", K) ;
            fflush(msgFile) ;
         }
         MARKTIME(t1) ;
         mtxK = DDenseMtxList_getList(mtxList, J) ;
         DFrontMtx_backLoadFromParent(mtxJ, nDJ, mtxK) ;
         MARKTIME(t2) ;
         cpus[2] += t2 - t1 ;
         if ( msglvl > 2 ) {
            fprintf(msgFile, "\n after load, mtxJ ") ;
            DDenseMtx_writeForHumanEye(mtxJ, msgFile) ;
            fflush(msgFile) ;
         }
/*
         ---------------------------
         release the parent's object
         ---------------------------
*/
         MARKTIME(t1) ;
         DDenseMtxManager_releaseObject(manager, mtxK) ;
         MARKTIME(t2) ;
         cpus[5] += t2 - t1 ;
      }
      if ( nDJ > 0 ) {
/*
         ---------------------------------
         perform the back solve and update
         ---------------------------------
*/
         if ( msglvl > 1 ) {
            fprintf(msgFile,
                   "\n performing the backward solve and update") ;
            fflush(msgFile) ;
         }
         MARKTIME(t1) ;
         DFrontMtx_backSolveAndUpdate(frontmtx, mtxJ) ;
         MARKTIME(t2) ;
         cpus[3] += t2 - t1 ;
         if ( msglvl > 2 ) {
            fprintf(msgFile, "\n after solve and update, mtxJ ") ;
            DDenseMtx_writeForHumanEye(mtxJ, msgFile) ;
            fflush(msgFile) ;
         }
/*
         -----------------
         store the entries
         -----------------
*/
         if ( msglvl > 1 ) {
            fprintf(msgFile, "\n storing the entries") ;
            fflush(msgFile) ;
         }
         MARKTIME(t1) ;
         DFrontMtx_backStore(mtxJ, nDJ, solDA2) ;
         MARKTIME(t2) ;
         cpus[4] += t2 - t1 ;
      }
      if ( (I = fch[J]) != -1 ) {
/*
         ------------------------------------------------
         give a copy of the solution matrix to each child
         ------------------------------------------------
*/
         ncolJ = mtxJ->nrow ;
         first = 1 ;
         for (    ; I != -1 ; I = sib[I] ) {
/*
            --------------------------
            owner of I is this process
            --------------------------
*/
            MARKTIME(t1) ;
            if ( first == 1 ) {
               if ( msglvl > 1 ) {
                  fprintf(msgFile,
                   "\n keeping same mtx object for child %d", I) ;
                  fflush(msgFile) ;
               }
               mtx = mtxJ ;
               first = 0 ;
            } else {
               if ( msglvl > 1 ) {
                  fprintf(msgFile,
                   "\n creating new mtx object for child %d", I) ;
                  fflush(msgFile) ;
               }
               nbytesNeeded = DDenseMtx_nbytesNeeded(ncolJ, nrhs) ;
               mtx = DDenseMtxManager_newObjectOfSizeNbytes(
                        manager, nbytesNeeded) ;
               DDenseMtx_setFields(mtx, J, -1,
                                   ncolJ, nrhs, 1, ncolJ);
               DVcopy(ncolJ*nrhs, mtx->entries, mtxJ->entries);
            }
            DDenseMtxList_addObjectToList(mtxList, mtx, I);
            MARKTIME(t2) ;
            cpus[5] += t2 - t1 ;
         }
      } else {
/*
         -------------------------------
         no children, release the matrix
         -------------------------------
*/
         DDenseMtxManager_releaseObject(manager, mtxJ) ;
      }
/*
      --------------------------
      set the status as finished
      --------------------------
*/
      status[J] = 'F' ;
   }
}
return ; }

/*--------------------------------------------------------------------*/
/*
   ----------------------------------------------------------
   initialize the two DDenseMtx objects for the forward solve

   created -- 97jun26, cca
   ----------------------------------------------------------
*/
void
DFrontMtx_forwInit (
   DFrontMtx          *frontmtx,
   int                J,
   int                nrhs,
   DDenseMtxManager   *manager,
   DDenseMtx          **pmtxJ,
   DDenseMtx          **pmtxBJ
) {
DDenseMtx   *mtxBJ, *mtxJ ;
int         nbndJ, nbytesNeeded, nDJ, nrowJ, size ;
int         *ivec, *rowindJ ;

nDJ = DFrontMtx_frontSize(frontmtx, J) ;
DFrontMtx_rowIndices(frontmtx, J, &nrowJ, &rowindJ) ;
nbndJ = nrowJ - nDJ ;
/*
   ---------------------------------
   initialize the two matrix objects
   ---------------------------------
*/
if ( nDJ > 0 ) {
/*
   mtxJ = DDenseMtxManager_newObject(manager, J, nDJ, nrhs, 1, nDJ) ;
*/
   nbytesNeeded = DDenseMtx_nbytesNeeded(nDJ, nrhs) ;
   mtxJ = DDenseMtxManager_newObjectOfSizeNbytes(manager, nbytesNeeded);
   DDenseMtx_init(mtxJ, J, -1, nDJ, nrhs, 1, nDJ) ;
   DVzero(nDJ*nrhs, mtxJ->entries) ;
   DDenseMtx_rowIndices(mtxJ, &size, &ivec) ;
   IVcopy(size, ivec, rowindJ) ;
   DDenseMtx_columnIndices(mtxJ, &size, &ivec) ;
   IVramp(size, ivec, 0, 1) ;
} else {
   mtxJ = NULL ;
}
if ( nbndJ > 0 ) {
/*
   mtxBJ = DDenseMtxManager_newObject(manager, 
                                      J, nbndJ, nrhs, 1, nbndJ) ;
*/
   nbytesNeeded = DDenseMtx_nbytesNeeded(nbndJ, nrhs) ;
   mtxBJ 
       = DDenseMtxManager_newObjectOfSizeNbytes(manager, nbytesNeeded);
   DDenseMtx_init(mtxBJ, J, -1, nbndJ, nrhs, 1, nbndJ) ;
   DVzero(nbndJ*nrhs, mtxBJ->entries) ;
   DDenseMtx_rowIndices(mtxBJ, &size, &ivec) ;
   IVcopy(size, ivec, rowindJ + nDJ) ;
   DDenseMtx_columnIndices(mtxBJ, &size, &ivec) ;
   IVramp(size, ivec, 0, 1) ;
} else {
   mtxBJ = NULL ;
}
*pmtxBJ = mtxBJ ;
*pmtxJ  = mtxJ  ;

return ; }

/*--------------------------------------------------------------------*/
/*
   --------------------------------
   load the right hand side entries

   created -- 97jun26, cca
   --------------------------------
*/
void
DFrontMtx_forwLoadRHS (
   DDenseMtx          *mtxJ,
   DA2                *rhsDA2
) {
double      *rhs, *rhsJ ;
int         ii, irhs, nDJ, nowned, nrhs ;
int         *rowindJ ;

if ( mtxJ != NULL ) {
   nDJ     = mtxJ->nrow ;
   nrhs    = mtxJ->ncol ;
   rowindJ = mtxJ->rowind ;
   rhsJ    = mtxJ->entries ;
   nowned  = rhsDA2->n1 ;
   rhs     = rhsDA2->entries ;
   for ( irhs = 0 ; irhs < nrhs ; irhs++ ) {
      for ( ii = 0 ; ii < nDJ ; ii++ ) {
         rhsJ[ii] = rhs[rowindJ[ii]] ;
      }
      rhsJ += nDJ ;
      rhs  += nowned ;
   }
}
return ; }

/*--------------------------------------------------------------------*/
/*
   -------------------------------------------------------------------
   assemble updates from the children and release their matrix objects

   created -- 97jun26, cca
   -------------------------------------------------------------------
*/
void
DFrontMtx_forwLoadFromChildren (
   DDenseMtx          *mtxJ,
   DDenseMtx          *mtxBJ,
   DDenseMtx          *firstI
) {
DDenseMtx   *mtxI ;
double      *rhsBJ, *rhsI, *rhsJ ;
int         ii, irhs, J, jj, nbndJ, nDJ, nrowI, nrhs ;
int         *rowindI ;
/*
   -----------------------------
   check for updates to assemble
   -----------------------------
*/
if ( firstI == NULL ) {
   return ;
}
if ( mtxJ != NULL ) {
   J = mtxJ->rowid ;
} else {
   J = mtxBJ->rowid ;
}
nrhs = 0 ;
if ( mtxJ != NULL ) {
   nDJ  = mtxJ->nrow ;
   nrhs = mtxJ->ncol ;
   rhsJ = mtxJ->entries ;
} else {
   nDJ  =   0  ;
   rhsJ = NULL ;
}
if ( mtxBJ != NULL ) {
   nbndJ = mtxBJ->nrow ;
   nrhs  = mtxBJ->ncol ;
   rhsBJ = mtxBJ->entries ;
} else {
   nbndJ =   0  ;
   rhsBJ = NULL ;
}
if ( nrhs == 0 ) {
   fprintf(stderr, "\n fatal error in DFrontMtx_forwLoad"
           "\n J = %d, mtxJ = %p, mtxBj = %p, nrhs = %d",
           J, mtxJ, mtxBJ, nrhs) ;
   exit(-1) ;
}
/*
   ----------------------
   loop over the children
   ----------------------
*/
for ( mtxI = firstI ; mtxI != NULL ; mtxI = mtxI->next ) {
   DDenseMtx_rowIndices(mtxI, &nrowI, &rowindI) ;
#if MYDEBUG > 0
   fprintf(stdout, "\n\n mtxI") ;
   DDenseMtx_writeForHumanEye(mtxI, stdout) ;
   fflush(stdout) ;
#endif
   rhsI = DDenseMtx_entries(mtxI) ;
   if ( mtxBJ == NULL ) {
/*
      ------------------------------------------------
      boundary block is empty, add into internal block
      ------------------------------------------------
*/
      rhsJ = mtxJ->entries ;
      for ( irhs = 0 ; irhs < nrhs ; irhs++ ) {
         for ( ii = 0 ; ii < nrowI ; ii++ ) {
            rhsJ[rowindI[ii]] += rhsI[ii] ;
         }
         rhsJ += nDJ ;
         rhsI += nrowI ;
      }
   } else if ( mtxJ == NULL ) {
/*
      ------------------------------------------------
      internal block is empty, add into boundary block
      ------------------------------------------------
*/
      rhsBJ = mtxBJ->entries ;
      for ( irhs = 0 ; irhs < nrhs ; irhs++ ) {
         for ( ii = 0 ; ii < nrowI ; ii++ ) {
            rhsBJ[rowindI[ii]] += rhsI[ii] ;
         }
         rhsBJ += nbndJ ;
         rhsI  += nrowI ;
      }
   } else {
/*
      ------------------------------------------------
      add entries to both internal and boundary blocks
      ------------------------------------------------
*/
      rhsJ  = mtxJ->entries  ;
      rhsBJ = mtxBJ->entries ;
      for ( irhs = 0 ; irhs < nrhs ; irhs++ ) {
         for ( ii = 0 ; ii < nrowI ; ii++ ) {
            jj = rowindI[ii] ;
            if ( jj < nDJ ) {
               rhsJ[jj] += rhsI[ii] ;
            } else {
               rhsBJ[jj-nDJ] += rhsI[ii] ;
            }
         }
         rhsJ  +=  nDJ  ;
         rhsBJ += nbndJ ;
         rhsI  += nrowI ;
      }
   }
}

return ; }

/*--------------------------------------------------------------------*/
/*
   ---------------------------------------------------------
   solve (L_{J,J} + I) sol_J = rhs_J
   update  rhs_{bnd{J}} := rhs_{bnd{J}} - L_{bnd{J},J} sol_J

   created -- 97jun27, cca
   ---------------------------------------------------------
*/
void
DFrontMtx_forwSolveAndUpdate (
   DFrontMtx          *frontmtx,
   DDenseMtx          *mtxJ,
   DDenseMtx          *mtxBJ
) {
double   *diagent, *lowerent, *rhsBJ, *rhsJ ;
int      J, nbndJ, nDJ, nent, npivot, nrhs, nrow ;
int      *lowerind, *pivotsizes, *sizes ;

if ( mtxJ == NULL ) {
   return ;
}
J    = mtxJ->rowid ;
nDJ  = mtxJ->nrow  ;
nrhs = mtxJ->ncol  ;
rhsJ = mtxJ->entries ;
if ( mtxBJ != NULL ) {
   nbndJ = mtxBJ->nrow ;
   rhsBJ = mtxBJ->entries ;
} else {
   nbndJ =   0  ;
   rhsBJ = NULL ;
}
if ( frontmtx->pivotingflag == 1 && frontmtx->symmetryflag == 0 ) {
   DFrontMtx_pivotsizes(frontmtx, J, &npivot, &pivotsizes) ;
} else {
   npivot     =   0  ;
   pivotsizes = NULL ;
}
if ( frontmtx->symmetryflag == 3 ) {
   DFrontMtx_diagEntries(frontmtx, J, &nent, &diagent) ;
} else {
   diagent = NULL ;
}
if ( frontmtx->sparsityflag == 0 ) {
/*
   -----------
   dense front
   -----------
*/
   if (  frontmtx->symmetryflag == 0 
      || frontmtx->symmetryflag == 3 ) {
      DFrontMtx_upperEntries(frontmtx, J, &nent, &lowerent) ;
   } else {
      DFrontMtx_lowerEntries(frontmtx, J, &nent, &lowerent) ;
   }
#if MYDEBUG > 0
   fprintf(stdout, "\n\n inside DFrontMtx_forwSolveAndUpd") ;
   fprintf(stdout, "\n %d entries in lowerent", nent) ;
   DVfprintf(stdout, nent, lowerent) ;
#endif
   denseForwardSolve(nDJ, nbndJ, nrhs, npivot, pivotsizes,
                     diagent, lowerent, rhsJ, rhsBJ) ;
} else {
/*
   ------------
   sparse front
   ------------
*/
   if (  frontmtx->symmetryflag == 0 
      || frontmtx->symmetryflag == 3 ) {
      DFrontMtx_upperEntries(frontmtx, J, &nent, &lowerent) ;
      DFrontMtx_upperSparsityInfo(frontmtx, J, 
                                  &nrow, &sizes, &lowerind) ;
   } else {
      DFrontMtx_lowerEntries(frontmtx, J, &nent, &lowerent) ;
      DFrontMtx_lowerSparsityInfo(frontmtx, J, 
                                  &nrow, &sizes, &lowerind) ;
   }
   sparseForwardSolve(nDJ, nbndJ, nrhs, sizes, lowerind,
                      diagent, lowerent, rhsJ, rhsBJ) ;
}
if ( frontmtx->symmetryflag != 3 ) {
/*
   ----------
   scale by D
   ----------
*/
   DFrontMtx_diagEntries(frontmtx, J, &nent, &diagent) ;
   scaleByD(nDJ, nrhs, npivot, pivotsizes, diagent, rhsJ) ;
}
return ; }

/*--------------------------------------------------------------------*/
/*
   --------------------------
   store the solution entries 

   created -- 97jun27, cca
   --------------------------
*/
void
DFrontMtx_forwStore (
   DA2                *rhsDA2,
   DDenseMtx          *mtxJ
) {
double   *rhs, *rhsJ ;
int      ii, irhs, nDJ, nowned, nrhs ;
int      *rowindJ ;

if ( mtxJ != NULL ) {
/*
   -----------------------------------------------------
   scatter the solution entries into the global solution
   -----------------------------------------------------
*/
   nDJ     = mtxJ->nrow ;
   nrhs    = mtxJ->ncol ;
   rhsJ    = mtxJ->entries ;
/*
fprintf(stdout, "\n J = %d, mtxJ = %p, id = %d, rhsJ = %p",
        J, mtxJ, mtxJ->id, rhsJ) ;
*/
   rowindJ = mtxJ->rowind ;
   nowned  = rhsDA2->n1 ;
   rhs     = rhsDA2->entries ;
   for ( irhs = 0 ; irhs < nrhs ; irhs++ ) {
      for ( ii = 0 ; ii < nDJ ; ii++ ) {
        rhs[rowindJ[ii]] = rhsJ[ii] ;
      }
      rhsJ +=  nDJ   ;
      rhs  += nowned ;
   }
}
return ; }

/*--------------------------------------------------------------------*/
/*
   ------------------------------------------
   initialize the solution object for front J

   created -- 97jun27, cca
   ------------------------------------------
*/
DDenseMtx *
DFrontMtx_backInit (
   DFrontMtx          *frontmtx,
   int                J,
   int                nrhs,
   DDenseMtxManager   *manager
) {
DDenseMtx   *mtxJ ;
int         nbytesNeeded, ncolJ, nDJ, size ;
int         *colindJ, *ivec ;
/*
   --------------------------
   get dimensions and indices
   --------------------------
*/
nDJ = DFrontMtx_frontSize(frontmtx, J) ;
DFrontMtx_columnIndices(frontmtx, J, &ncolJ, &colindJ) ;
/*
   ---------------------------------
   get object to hold local solution
   ---------------------------------
*/
nbytesNeeded = DDenseMtx_nbytesNeeded(ncolJ, nrhs) ;
mtxJ = DDenseMtxManager_newObjectOfSizeNbytes(manager, nbytesNeeded);
DDenseMtx_init(mtxJ, J, -1, ncolJ, nrhs, 1, ncolJ) ;
DVzero(ncolJ*nrhs, mtxJ->entries) ;
/*
   --------------------------------------------------------
   store the column indices of J in the row indices of mtxJ
   --------------------------------------------------------
*/
DDenseMtx_rowIndices(mtxJ, &size, &ivec) ;
IVcopy(ncolJ, ivec, colindJ) ;
DDenseMtx_columnIndices(mtxJ, &size, &ivec) ;
IVramp(size, ivec, 0, 1) ;

return(mtxJ) ; }

/*--------------------------------------------------------------------*/
/*
   -------------------------------------------------------------
   initialize the solution object for front J
   1. get object from manager
   2. load rhs_J
   3. store column indices of J in the row indices of the object

   created -- 97jun27, cca
   -------------------------------------------------------------
*/
void
DFrontMtx_backLoadSolution (
   DDenseMtx   *mtxJ,
   int         nDJ,
   int         rowindJ[],
   DA2         *rhsDA2
) {
double      *rhs, *rhsJ ;
int         ii, irhs, nowned, nrhs, nrowJ ;
/*
   --------------------------
   get dimensions and indices
   --------------------------
*/
if ( nDJ > 0 ) {
   nrowJ   = mtxJ->nrow      ;
   rhsJ    = mtxJ->entries   ;
   nrhs    = rhsDA2->n2      ;
   nowned  = rhsDA2->n1      ;
   rhs     = rhsDA2->entries ;
/*
   ----------------------------------
   gather the right hand side entries
   ----------------------------------
*/
   for ( irhs = 0 ; irhs < nrhs ; irhs++ ) {
      for ( ii = 0 ; ii < nDJ ; ii++ ) {
         rhsJ[ii] = rhs[rowindJ[ii]] ;
      }
      rhsJ += nrowJ  ;
      rhs  += nowned ;
   }
}
return ; }

/*--------------------------------------------------------------------*/
/*
   -----------------------------------------------
   if the parent exists, load the solution entries 
   from the parent and release parent's object

   created -- 97jun27, cca
   -----------------------------------------------
*/
void
DFrontMtx_backLoadFromParent (
   DDenseMtx          *mtxJ,
   int                nDJ,
   DDenseMtx          *mtxK
) {
if ( mtxK != NULL ) {
   double      *solJ, *solK ;
   int         ii, incJ, incK, irhs, ncolJ, nrhs ;
   int         *colindJ ;
/*
   -------------------------------------------
   copy solution entries from parent into self
   -------------------------------------------
*/
   ncolJ   = mtxJ->nrow ;
   nrhs    = mtxJ->ncol ;
   incJ    = mtxJ->inc2 ;
   solJ    = mtxJ->entries ;
   colindJ = mtxJ->rowind ;
   incK    = mtxK->inc2 ;
   solK    = mtxK->entries ;
   for ( irhs = 0 ; irhs < nrhs ; irhs++ ) {
      for ( ii = nDJ ; ii < ncolJ ; ii++ ) {
         solJ[ii] = solK[colindJ[ii]] ;
      }
      solJ += incJ ;
      solK += incK ;
   }
}
return ; }

/*--------------------------------------------------------------------*/
/*
   --------------------------------------------------------------
   perform the update rhs_J := rhsJ - U_{J,nnd{J}} * sol_{bnd{J}}
   and solve (I + U_{J,J}) sol_J = rhs_J

   created -- 97jun27, cca
   --------------------------------------------------------------
*/
void
DFrontMtx_backSolveAndUpdate (
   DFrontMtx   *frontmtx,
   DDenseMtx   *mtxJ
) {
if ( mtxJ != NULL ) {
   double   *diagent, *solJ, *upperent ;
   int      J, nbndJ, ncolJ, nDJ, nent, npivot, nrhs ;
   int      *pivotsizes, *sizes, *upperind ;
/*
   ------------------------------------------
   get the pivotsizes if pivoting was enabled
   ------------------------------------------
*/
   J   = mtxJ->rowid ;
   nDJ = DFrontMtx_frontSize(frontmtx, J) ;
   if ( frontmtx->pivotingflag == 1 && frontmtx->symmetryflag == 0 ) {
      DFrontMtx_pivotsizes(frontmtx, J, &npivot, &pivotsizes) ;
   } else {
      npivot     =   0  ;
      pivotsizes = NULL ;
   }
/*
   -------------------------------------------------------------
   get the diagonal entries for a (U^T + D)(D + U) factorization
   -------------------------------------------------------------
*/
   if ( frontmtx->symmetryflag == 3 ) {
      DFrontMtx_diagEntries(frontmtx, J, &nent, &diagent) ;
   } else {
      diagent = NULL ;
   }
   solJ  = mtxJ->entries ;
   nrhs  = mtxJ->ncol ;
   nbndJ = mtxJ->nrow - nDJ ;
   if ( frontmtx->sparsityflag == 0 ) {
/*
      -----------
      dense front
      -----------
*/
      DFrontMtx_upperEntries(frontmtx, J, &nent, &upperent) ;
      denseBackwardSolve(nDJ, nbndJ, nrhs, npivot, pivotsizes,
                         diagent, upperent, solJ) ;
   } else {
/*
      ------------
      sparse front
      ------------
*/
      DFrontMtx_upperEntries(frontmtx, J, &nent, &upperent) ;
      DFrontMtx_upperSparsityInfo(frontmtx, J, 
                                  &ncolJ, &sizes, &upperind) ;
      sparseBackwardSolve(nDJ, nbndJ, nrhs, sizes, upperind,
                          diagent, upperent, solJ) ;
   }
}
return ; }

/*--------------------------------------------------------------------*/
/*
   ------------------------------------------------
   store the solution entries and place a copy of 
   the solution matrix into each list for the child

   created -- 97jun27, cca
   ------------------------------------------------
*/
void
DFrontMtx_backStore (
   DDenseMtx          *mtxJ,
   int                nDJ,
   DA2                *solDA2
) {
double      *sol, *solJ ;
int         ii, irhs, ncolJ, nowned, nrhs ;
int         *colindJ ;
/*
   ----------------------------
   scatter the solution entries
   ----------------------------
*/
ncolJ   = mtxJ->nrow ;
nrhs    = mtxJ->ncol ;
colindJ = mtxJ->rowind ;
solJ    = mtxJ->entries ;
sol     = solDA2->entries ;
nowned  = solDA2->inc2 ;
for ( irhs = 0 ; irhs < nrhs ; irhs++ ) {
   for ( ii = 0 ; ii < nDJ ; ii++ ) {
      sol[colindJ[ii]] = solJ[ii] ;
   }
   sol  += nowned ;
   solJ += ncolJ  ;
}
return ; }

/*--------------------------------------------------------------------*/
/*
   -----------------------------------------
   perform a dense forward solve for a front

   created -- 97may17, cca
   -----------------------------------------
*/
static void
denseForwardSolve (
   int      nDJ,
   int      nbndJ,
   int      nrhs,
   int      npivot,
   int      pivotsizes[],
   double   diagent[],
   double   upperent[],
   double   rhsJ[],
   double   rhsBJ[]
) {
DA2      A, B, C ;
double   *rhs, *rowi ;
int      first, ii, ipivot, irhs, jj, last ;
#if MYDEBUG > 0
fprintf(stdout, 
        "\n inside denseForwardSolve"
        "\n nDJ = %d, nbndJ = %d, nrhs = %d, npivot = %d",
        nDJ, nbndJ, nrhs, npivot) ;
fflush(stdout) ;
#endif
/*
    ------------------------------
    solve for the internal entries
    ------------------------------
*/
rowi = upperent ;
if ( pivotsizes == NULL ) {
   for ( ii = 0 ; ii < nDJ ; ii++ ) {
      rhs = rhsJ ;
      for ( irhs = 0 ; irhs < nrhs ; irhs++ ) {
         register double sum = 0.0 ;
         for ( jj = 0 ; jj < ii ; jj++ ) {
            sum += rowi[jj] * rhs[jj] ;
         }
         rhs[ii] -= sum ;
         if ( diagent != NULL ) {
            rhs[ii] /= diagent[ii] ;
         }
         rhs += nDJ ;
      }
      rowi += ii ;
   }
} else {
   for ( ipivot = first = 0 ; ipivot < npivot ; ipivot++ ) {
      last = first + pivotsizes[ipivot] - 1 ;
      for ( ii = first ; ii <= last ; ii++ ) {
         rhs = rhsJ ;
         for ( irhs = 0 ; irhs < nrhs ; irhs++ ) {
            register double sum = 0.0 ;
            for ( jj = 0 ; jj < first ; jj++ ) {
               sum += rowi[jj] * rhs[jj] ;
            }
            rhs[ii] -= sum ;
            rhs += nDJ ;
         }
         rowi += first ;
      }
      first = last + 1 ;
   }
}
if ( nbndJ > 0 ) {
/*
    ---------------------------
    update the external entries
    ---------------------------
*/
   DA2_setDefaultFields(&A) ;
   DA2_setDefaultFields(&B) ;
   DA2_setDefaultFields(&C) ;
   DA2_init(&A, nbndJ, nrhs, 1, nbndJ, rhsBJ) ;
   DA2_init(&B, nbndJ, nDJ, nDJ, 1, rowi) ;
   DA2_init(&C, nDJ, nrhs, 1, nDJ, rhsJ) ;
#if MYDEBUG > 0
   fprintf(stdout, "\n DA2 A") ;
   DA2_writeForHumanEye(&A, stdout) ;
   fprintf(stdout, "\n DA2 B") ;
   DA2_writeForHumanEye(&B, stdout) ;
   fprintf(stdout, "\n DA2 C") ;
   DA2_writeForHumanEye(&C, stdout) ;
   fflush(stdout) ;
#endif
   DA2_ndUpd(&A, &B, &C) ;
#if MYDEBUG > 0
   fprintf(stdout, "\n DA2 A after update") ;
   DA2_writeForHumanEye(&A, stdout) ;
   fflush(stdout) ;
#endif
}
return ; }
   
/*--------------------------------------------------------------------*/
/*
   ------------------------------------------
   perform a sparse forward solve for a front

   created -- 97may17, cca
   ------------------------------------------
*/
static void
sparseForwardSolve (
   int      nDJ,
   int      nbndJ,
   int      nrhs,
   int      sizesU[],
   int      upperind[],
   double   diagent[],
   double   upperent[],
   double   rhsJ[],
   double   rhsBJ[]
) {
double   *rowi, *rhsj, *rhsbj ;
int      ii, irhs, kk, ncol, usizei ;
int      *indi ;
/*
    ------------------------------
    solve for the internal entries
    ------------------------------
*/
rowi = upperent ;
indi = upperind ;
for ( ii = 0 ; ii < nDJ ; ii++ ) {
   if ( (usizei = sizesU[ii]) > 0 ) {
      rhsj = rhsJ ;
      for ( irhs = 0 ; irhs < nrhs ; irhs++ ) {
         register double sum = 0.0 ;
         for ( kk = 0 ; kk < usizei ; kk++ ) {
            sum += rowi[kk] * rhsj[indi[kk]] ;
         }
         rhsj[ii] -= sum ;
         if ( diagent != NULL ) {
            rhsj[ii] /= diagent[ii] ;
         }
         rhsj += nDJ ;
      }
      rowi += usizei ;
      indi += usizei ;
   }
}
/*
    ---------------------------
    update the external entries
    ---------------------------
*/
ncol = nDJ + nbndJ ;
for ( ii = nDJ ; ii < ncol ; ii++ ) {
   if ( (usizei = sizesU[ii]) > 0 ) {
      rhsj  = rhsJ ;
      rhsbj = rhsBJ - nDJ ;
      for ( irhs = 0 ; irhs < nrhs ; irhs++ ) {
         register double sum = 0.0 ;
         for ( kk = 0 ; kk < usizei ; kk++ ) {
            sum += rowi[kk] * rhsj[indi[kk]] ;
         }
         rhsbj[ii] -= sum   ;
         rhsj      += nDJ   ;
         rhsbj     += nbndJ ;
      }
      rowi += usizei ;
      indi += usizei ;
   }
}
return ; }
   
/*--------------------------------------------------------------------*/
/*
   ----------------------------------------------
   scale the local entries by the diagonal matrix

   created -- 97may17, cca
   ----------------------------------------------
*/
static void
scaleByD ( 
   int      nDJ,
   int      nrhs,
   int      npivot,
   int      pivotsizes[],
   double   diagent[],
   double   rhsJ[]
) {
double   arr, ars, ass, recip, t1, t2 ;
double   *rhs ;
int      ii, ipivot, irhs, kk ; 

rhs = rhsJ ;
for ( irhs = 0 ; irhs < nrhs ; irhs++ ) {
   if ( pivotsizes == NULL ) {
      for ( ii = 0 ; ii < nDJ ; ii++ ) {
#if MYDEBUG > 1
         fprintf(stdout, "\n rhs[%d] = %12.4e = %12.4e / %12.4e",
                 ii, rhs[ii]/diagent[ii], rhs[ii], diagent[ii]) ;
#endif
            rhs[ii] = rhs[ii] / diagent[ii] ;
      }
   } else {
      for ( ipivot = ii = kk = 0 ; ipivot < npivot ; ipivot++ ) {
         switch ( pivotsizes[ipivot] ) {
         case 1 :
            rhs[ii] = rhs[ii] / diagent[kk] ;
            ii++ ;
            kk++ ;
            break ;
         case 2 :
            arr = diagent[kk++] ;
            ars = diagent[kk++] ;
            ass = diagent[kk++] ;
            recip = 1./(arr*ass - ars*ars) ;
            t1 = rhs[ii] ;
            t2 = rhs[ii+1] ;
            rhs[ii]   = recip*(ass*t1 - ars*t2) ;
            rhs[ii+1] = recip*(-ars*t1 + arr*t2) ;
            ii += 2 ;
            break ;
         default :
            fprintf(stderr, "\n fatal error in DFrontMtx_solve"
                    "\n npivot = %d, pivotsizes[%d] = %d\n",
                    ipivot, ipivot, pivotsizes[ipivot]) ;
            exit(-1) ;
         }
      }
   }
   rhs += nDJ ;
}
return ; }

/*--------------------------------------------------------------------*/
/*
   ------------------------------------------
   perform a dense backward solve for a front

   created -- 97may17, cca
   ------------------------------------------
*/
static void
denseBackwardSolve (
   int      nDJ,
   int      nbndJ,
   int      nrhs,
   int      npivot,
   int      pivotsizes[],
   double   diagent[],
   double   upperent[],
   double   solJ[]
) {
DA2      A, B, C ;
double   fac ;
double   *colj, *sol ;
int      first, ii, inc2, ipivot, irhs, jj, last, offset ;
/*
    --------------------------------
    get offset into boundary entries
    --------------------------------
*/
if ( pivotsizes == NULL ) {
   offset = (nDJ*(nDJ-1))/2 ;
} else {
   for ( ipivot = first = offset = 0 ; ipivot < npivot ; ipivot++ ) {
      offset += first * pivotsizes[ipivot] ;
      first  += pivotsizes[ipivot] ;
   }
}
inc2 = nDJ + nbndJ ;
#if MYDEBUG > 0
fprintf(stdout, "\n inside denseBackwardSolve"
     "\n nDJ = %d, nbndJ = %d, nrhs = %d, npivot = %d, pivotsizes = %p"
     "\n solJ = %p",
     nDJ, nbndJ, nrhs, npivot, pivotsizes, solJ) ;
fflush(stdout) ;
#endif
if ( nbndJ > 0 ) {
/*
    ----------------------------------------
    get updates from the ancestors' solution
    ----------------------------------------
*/
   DA2_setDefaultFields(&A) ;
   DA2_setDefaultFields(&B) ;
   DA2_setDefaultFields(&C) ;
   DA2_init(&A, nDJ, nrhs, 1, inc2, solJ) ;
   DA2_init(&B, nDJ, nbndJ, 1, nDJ, upperent + offset) ;
   DA2_init(&C, nbndJ, nrhs, 1, inc2, solJ + nDJ) ;
   DA2_ndUpd(&A, &B, &C) ;
}
/*
   -------------------------------
   solve for the internal vertices
   -------------------------------
*/
if ( pivotsizes == NULL ) {
   colj = upperent + offset ;
   for ( jj = nDJ - 1 ; jj >= 0 ; jj-- ) {
      colj -= jj ;
      sol  =  solJ ;
      for ( irhs = 0 ; irhs < nrhs ; irhs++ ) {
         if ( diagent == NULL ) {
            fac = sol[jj] ;
         } else {
            fac = sol[jj] = sol[jj] / diagent[jj] ;
         }
         for ( ii = 0 ; ii < jj ; ii++ ) {
            sol[ii] -= fac * colj[ii] ;
         }
         sol += inc2 ;
      }
   }
} else {
   colj = upperent + offset ;
   for ( ipivot = npivot - 1, last = nDJ - 1 ; 
         ipivot >= 0 ; 
         ipivot-- ) {
      first  =  last - pivotsizes[ipivot] + 1 ;
      offset -= first * pivotsizes[ipivot] ;
      sol    =  solJ ;
/*
fprintf(stdout, 
"\n ipivot = %d, first = %d, last = %d, offset = %d, sol = %p",
ipivot, first, last, offset, sol) ;
*/
      for ( irhs = 0 ; irhs < nrhs ; irhs++ ) {
         colj = upperent + offset ;
         for ( jj = first ; jj <= last ; jj++ ) {
/*
fprintf(stdout, 
"\n irhs = %d, sol = %p, colj = %p, jj = %d", 
irhs, sol, colj, jj) ;
*/
            fac = sol[jj] ;
            for ( ii = 0 ; ii < first ; ii++ ) {
               sol[ii] -= fac * colj[ii] ;
            }
            colj += first ;
         }
         sol += inc2 ;
      }
      last -= pivotsizes[ipivot] ;
   }
}
return ; }
   
/*--------------------------------------------------------------------*/
/*
   -------------------------------------------
   perform a sparse backward solve for a front

   created -- 97may17, cca
   -------------------------------------------
*/
static void
sparseBackwardSolve (
   int      nDJ,
   int      nbndJ,
   int      nrhs,
   int      sizesU[],
   int      upperind[],
   double   diagent[],
   double   upperent[],
   double   solJ[]
) {
double   fac ;
double   *colj, *sol ;
int      irhs, jj, kk, ncol, offset, usizej ;
int      *indj ;

ncol = nDJ + nbndJ ;
/*
    ----------------------------------------
    get updates from the ancestors' solution
    ----------------------------------------
*/
for ( jj = 0, offset = 0 ; jj < nDJ ; jj++ ) {
   offset += sizesU[jj] ;
}
colj = upperent + offset ;
indj = upperind + offset ;
for ( jj = nDJ ; jj < ncol ; jj++ ) {
   if ( (usizej = sizesU[jj]) > 0 ) {
      sol = solJ ;
      for ( irhs = 0 ; irhs < nrhs ; irhs++ ) {
         fac = sol[jj] ;
         for ( kk = 0 ; kk < usizej ; kk++ ) {
            sol[indj[kk]] -= colj[kk] * fac ;
         }
         sol += ncol ;
      }
      colj += usizej ;
      indj += usizej ;
   }
}
/*
   -------------------------------
   solve for the internal vertices
   -------------------------------
*/
colj = upperent + offset ;
indj = upperind + offset ;
for ( jj = nDJ - 1 ; jj >= 0 ; jj-- ) {
#if MYDEBUG > 1
   fprintf(stdout, "\n jj = %d, usizej = %d", jj, usizej) ;
#endif
   if ( (usizej = sizesU[jj]) > 0 ) {
      colj -= usizej ;
      indj -= usizej ;
#if MYDEBUG > 1
      fprintf(stdout, "\n indj") ;
      IVfprintf(stdout, usizej, indj) ;
      fprintf(stdout, "\n colj") ;
      DVfprintf(stdout, usizej, colj) ;
#endif
      sol  =  solJ ;
      for ( irhs = 0 ; irhs < nrhs ; irhs++ ) {
         if ( diagent == NULL ) {
            fac = sol[jj] ;
         } else {
            fac = sol[jj] = sol[jj] / diagent[jj] ;
         }
         for ( kk = 0 ; kk < usizej ; kk++ ) {
            sol[indj[kk]] -= colj[kk] * fac ;
         }
         sol += ncol ;
      }
   }
}
return ; }
   
/*--------------------------------------------------------------------*/
