/*  testFactor2.c  */

#include "../DFrontMtx.h"
#include "../../DLinSystem.h"
#include "../../SymbFac.h"
#include "../../Drand.h"
#include "../../timings.h"

/*--------------------------------------------------------------------*/
IV * initializeETree ( ETree *frontETree, char *inETreeFileName,
   int msglvl, FILE *msgFile ) ;
/*--------------------------------------------------------------------*/

void
main ( int argc, char *argv[] )
/*
   ------------------------------------------------------
   test the parallel factor method for a DFrontMtx object
   ( 1) read in a DPencil object
   ( 2) read in a Etree object
   ( 3) get the oldToNew permutation from the ETree
   ( 4) permute the ETree object
   ( 5) permute the DPencil object
   ( 6) get the symbolic factorization
   ( 6) create a solution DA2 object
   ( 7) multiply the solution with the matrix
        to get a right hand side DA2 object
   ( 8) read in the owners map IV object
   ( 9) factor the matrix in parallel
   (10) solve the systems

   created -- 97apr04, cca
   ------------------------------------------------------
*/
{
char               *inETreeFile, *inpmtxAfile, *inpmtxBfile, 
                   *inOwnersFile ;
DA2                *xDA2, *yDA2, *ykeepDA2, *zDA2 ;
DChv               *chv, *rootchv ;
DDenseMtxManager   *manager ;
DFrontMtx          *frontmtx ;
DLinSystem         *linsys ;
double             cputotal, droptol, factorops, nops, sigma, tau, 
                   t1, t2, wallclock ;
double             cpus[10], ops[4] ;
Drand              drand ;
ETree              *frontETree   ;
FILE               *msgFile ;
int                lookahead, msglvl, neqns, 
                   nfront, nrhs, pivotingflag, randomflag, rc, 
                   seed, sparsityflag, symmetryflag ;
int                stats[16] ;
IV                 *oldToNewIV, *ownersIV ;
IVL                *symbfacIVL ;

if ( argc != 17 ) {
   fprintf(stdout, 
      "\n\n usage : %s msglvl msgFile inETreeFile inpmtxAfile"
      "\n         sigma inpmtxBfile inOwnersFile seed symmetryflag "
      "\n         sparsityflag pivotingflag tau droptol randomflag "
      "\n         lookahead nrhs"
      "\n    msglvl       -- message level"
      "\n    msgFile      -- message file"
      "\n    inETreeFile  -- front tree file"
      "\n    inpmtxAfile  -- matrix file that contains A "
      "\n    sigma        -- shift value "
      "\n    inpmtxBfile  -- matrix file that contains B "
      "\n    inOwnersFile -- owners map file"
      "\n    seed         -- random number seed"
      "\n    symmetryflag -- symmetry flag"
      "\n       0 --> symmetric structure, symmetric entries"
      "\n       1 --> symmetric structure, nonsymmetric entries"
      "\n       2 --> nonsymmetric structure, nonsymmetric entries"
      "\n       3 --> matrix from QR factorization"
      "\n    sparsityflag -- sparsity flag"
      "\n       0 --> store dense fronts"
      "\n       1 --> store sparse fronts, use droptol to drop entries"
      "\n    pivotingflag -- pivoting flag"
      "\n       0 --> do not pivot"
      "\n       1 --> enable pivoting"
      "\n    tau     -- upper bound on factor entries"
      "\n               used only with pivoting"
      "\n    droptol -- lower bound on factor entries"
      "\n               used only with sparse fronts"
      "\n    randomflag -- random number flag"
      "\n       0 --> use matrix entries from file"
      "\n       1 --> use random matrix entries"
      "\n    lookahead -- parameter to schedule computation"
      "\n       0 --> mutex lock is not allocated or initialized"
      "\n       1 --> mutex lock is allocated and it can synchronize"
      "\n             only threads in this process."
      "\n       2 --> mutex lock is allocated and it can synchronize"
      "\n             only threads in this and other processes."
      "\n    nrhs -- number of right hand sides"
      "\n", argv[0]) ;
   return ;
}
msglvl = atoi(argv[1]) ;
if ( strcmp(argv[2], "stdout") == 0 ) {
   msgFile = stdout ;
} else if ( (msgFile = fopen(argv[2], "a")) == NULL ) {
   fprintf(stderr, "\n fatal error in %s"
           "\n unable to open file %s\n",
           argv[0], argv[2]) ;
   return ;
}
inETreeFile  = argv[3] ;
inpmtxAfile  = argv[4] ;
sigma        = atof(argv[5]) ;
inpmtxBfile  = argv[6] ;
inOwnersFile = argv[7] ;
seed         = atoi(argv[8]) ;
symmetryflag = atoi(argv[9]) ;
sparsityflag = atoi(argv[10]) ;
pivotingflag = atoi(argv[11]) ;
tau          = atof(argv[12]) ;
droptol      = atof(argv[13]) ;
randomflag   = atoi(argv[14]) ;
lookahead    = atoi(argv[15]) ;
nrhs         = atoi(argv[16]) ;
fprintf(msgFile, 
        "\n %s "
        "\n msglvl        -- %d" 
        "\n msgFile       -- %s" 
        "\n inETreeFile   -- %s"
        "\n inpmtxAfile   -- %s"
        "\n sigma         -- %f"
        "\n inpmtxBfile   -- %s"
        "\n inOwnersFile  -- %s"
        "\n seed          -- %d" 
        "\n symmetryflag  -- %d" 
        "\n sparsityflag  -- %d" 
        "\n pivotingflag  -- %d" 
        "\n tau           -- %e" 
        "\n droptol       -- %e" 
        "\n randomflag    -- %d" 
        "\n lookahead     -- %d" 
        "\n nrhs          -- %d" 
        "\n",
        argv[0], msglvl, argv[2], inETreeFile, inpmtxAfile, sigma,
        inpmtxBfile, inOwnersFile, seed, symmetryflag, sparsityflag, 
        pivotingflag, tau, droptol, randomflag, lookahead, nrhs) ;
fflush(msgFile) ;
/*
   --------------------------------------
   initialize the random number generator
   --------------------------------------
*/
Drand_setDefaultFields(&drand) ;
Drand_init(&drand) ;
Drand_setSeed(&drand, seed) ;
Drand_setNormal(&drand, 0.0, 1.0) ;
/*
   -------------------------------------------
   initialize the ETree and oldToNewIV objects
   -------------------------------------------
*/
frontETree = ETree_new() ;
oldToNewIV = ETree_initFromFile(frontETree, inETreeFile, 
                                msglvl, msgFile) ;
neqns = IV_size(oldToNewIV) ;
nfront = frontETree->nfront ;
if ( symmetryflag == 0 ) {
   factorops = ETree_nFactorOps(frontETree, 1) ;
} else {
   factorops = ETree_nFactorOps(frontETree, 2) ;
}
/*
   ------------------------
   set up the linear system
   ------------------------
*/
linsys = DLinSystem_new() ;
DLinSystem_setup(linsys, 0, neqns, nrhs, inpmtxAfile, sigma,
                 inpmtxBfile, symmetryflag, randomflag, &drand,
                 msglvl, msgFile) ;
if ( msglvl > 2 ) {
   fprintf(msgFile, "\n\n matrix pencil") ;
   DPencil_writeForHumanEye(linsys->pencil, msgFile) ;
   fprintf(msgFile, "\n\n solution") ;
   DDenseMtx_writeForHumanEye(linsys->solmtx, msgFile) ;
   fprintf(msgFile, "\n\n right hand side") ;
   DDenseMtx_writeForHumanEye(linsys->rhsmtx, msgFile) ;
   fflush(msgFile) ;
}
/*
   -------------------------
   permute the linear system
   -------------------------
*/
DLinSystem_permute(linsys, oldToNewIV, msglvl, msgFile) ;
if ( msglvl > 2 ) {
   fprintf(msgFile, "\n\n permuted matrix pencil") ;
   DPencil_writeForHumanEye(linsys->pencil, msgFile) ;
   fprintf(msgFile, "\n\n permuted solution") ;
   DDenseMtx_writeForHumanEye(linsys->solmtx, msgFile) ;
   fprintf(msgFile, "\n\n permuted right hand side") ;
   DDenseMtx_writeForHumanEye(linsys->rhsmtx, msgFile) ;
   fflush(msgFile) ;
}
/*
   --------------------------------------------
   create the symbolic factorization IVL object
   --------------------------------------------
*/
MARKTIME(t1) ;
symbfacIVL = SymbFac_initFromDPencil(frontETree, linsys->pencil) ;
MARKTIME(t2) ;
fprintf(msgFile, "\n CPU %8.3f : compute the symbolic factorization",
        t2 - t1) ;
if ( msglvl > 1 ) {
   fprintf(msgFile, "\n\n symbolic factorization IVL object") ;
   if ( msglvl == 2 ) {
      IVL_writeStats(symbfacIVL, msgFile) ;
   } else {
      IVL_writeForHumanEye(symbfacIVL, msgFile) ;
   }
   fflush(msgFile) ;
}
/*
   ---------------------------------------
   convert the DPencil storage to chevrons
   ---------------------------------------
*/
MARKTIME(t1) ;
DPencil_changeCoordType(linsys->pencil, 3) ;
DPencil_changeStorageMode(linsys->pencil, 3) ;
MARKTIME(t2) ;
fprintf(msgFile, "\n CPU %8.3f : convert to chevron vectors ",
        t2 - t1) ;
if ( msglvl > 1 ) {
   fprintf(msgFile, "\n\n DPencil object ") ;
   if ( msglvl == 2 ) {
      DPencil_writeStats(linsys->pencil, msgFile) ;
   } else if ( msglvl > 3 ) {
      DPencil_writeForHumanEye(linsys->pencil, msgFile) ;
   }
}
/*
   -------------------------------
   initialize the DFrontMtx object
   -------------------------------
*/
if ( msglvl > 1 ) {
   fprintf(msgFile, "\n nfront = %d, neqns = %d", nfront, neqns) ;
}
MARKTIME(t1) ;
frontmtx = DFrontMtx_new() ;
DFrontMtx_init(frontmtx, frontETree, symbfacIVL,
               symmetryflag, sparsityflag, pivotingflag, 
               1, 0, NULL) ;
MARKTIME(t2) ;
fprintf(msgFile, "\n CPU %8.3f : initialize the front matrix",
        t2 - t1) ;
if ( msglvl > 1 ) {
   fprintf(msgFile, 
        "\n diagDVL->tsize  = %d"
        "\n lowerDVL->tsize = %d"
        "\n upperDVL->tsize = %d",
        (frontmtx->diagDVL  != NULL) ? frontmtx->diagDVL->tsize  : 0,
        (frontmtx->lowerDVL != NULL) ? frontmtx->lowerDVL->tsize : 0,
        (frontmtx->upperDVL != NULL) ? frontmtx->upperDVL->tsize : 0) ;
}
if ( msglvl > 2 ) {
   fprintf(msgFile, "\n front matrix initialized") ;
   DFrontMtx_writeForHumanEye(frontmtx, msgFile) ;
   fflush(msgFile) ;
}
/*
   ---------------------
   read in the IV object
   ---------------------
*/
if ( strcmp(inOwnersFile, "none") == 0 ) {
   fprintf(msgFile, "\n no file to read from") ;
   exit(0) ;
}
ownersIV = IV_new() ;
MARKTIME(t1) ;
rc = IV_readFromFile(ownersIV, inOwnersFile) ;
MARKTIME(t2) ;
fprintf(msgFile, "\n CPU %8.3f : read in ownersIV from file %s",
        t2 - t1, inOwnersFile) ;
if ( rc != 1 ) {
   fprintf(msgFile, "\n return value %d from IV_readFromFile(%p,%s)",
           rc, ownersIV, inOwnersFile) ;
   exit(-1) ;
}
if ( msglvl > 1 ) {
   fprintf(msgFile, "\n\n after reading IV object from file %s",
           inOwnersFile) ;
   if ( msglvl == 2 ) {
      IV_writeStats(ownersIV, msgFile) ;
   } else {
      IV_writeForHumanEye(ownersIV, msgFile) ;
   }
}
fflush(msgFile) ;
/*
   -----------------
   factor the matrix
   -----------------
*/
IVzero(16, stats) ;
DVzero(10, cpus) ;
if ( msglvl > 1 ) {
   fprintf(msgFile, "\n\n starting the parallel factor") ;
   fflush(msgFile) ;
}
MARKTIME(t1) ;
rootchv = DFrontMtx_factorMT(frontmtx, linsys->pencil, tau,
                             droptol, ownersIV, lookahead, 
                             cpus, stats, msglvl, msgFile) ;
MARKTIME(t2) ;
wallclock = t2 - t1 ;
/*
fprintf(msgFile, "\n\n diag DVL") ;
DVL_writeForHumanEye(frontmtx->diagDVL, msgFile) ;
*/
if ( rootchv != NULL ) {
   fprintf(msgFile, "\n\n factorization did not complete") ;
   for ( chv = rootchv ; chv != NULL ; chv = chv->next ) {
      fprintf(stdout, "\n chv %d, nD = %d, nL = %d, nU = %d",
              chv->id, chv->nD, chv->nL, chv->nU) ;
/*
      DChv_writeForHumanEye(chv, msgFile) ;
*/
   }
}
fprintf(msgFile, "\n CPU %8.3f : factor matrix, %8.3f mflops",
        t2 - t1, 1.e-6*factorops/(t2-t1)) ;
fprintf(msgFile, 
        "\n %8d pivots, %8d pivot tests, %8d delayed rows and columns",
        stats[0], stats[1], stats[2]) ;
fprintf(msgFile, 
        "\n %d entries in D, %d entries in L, %d entries in U", 
        stats[3], stats[4], stats[5]) ;
fprintf(msgFile, 
        "\n %5d locks of the front matrix"
        "\n %5d locks of the aggregate list"
        "\n %5d locks of the postponed list",
        stats[6], stats[7], stats[8]) ;
/*
fprintf(msgFile, 
        "\n DChvManager information"
        "\n     %d active objects"
        "\n     %d total bytes requested, %d total bytes allocated"
        "\n     %d requests, %d releases, %d locks, %d unlocks",
        stats[9], stats[11], stats[10], stats[12], stats[13],
        stats[14], stats[15]) ;
*/
cputotal = DVsum(10, cpus) ;
fprintf(msgFile, "\n CPU %8.3f total time, %8.3f wall clock",
        cputotal, wallclock) ;
if ( cputotal > 0.0 ) {
   fprintf(msgFile,
   "\n    manager working storage %8.3f %6.2f"
   "\n    initialize/load fronts  %8.3f %6.2f"
   "\n    update fronts           %8.3f %6.2f"
   "\n    aggregate insert        %8.3f %6.2f"
   "\n    aggregate remove/add    %8.3f %6.2f"
   "\n    assemble postponed data %8.3f %6.2f"
   "\n    factor fronts           %8.3f %6.2f"
   "\n    extract postponed data  %8.3f %6.2f"
   "\n    store factor entries    %8.3f %6.2f"
   "\n    miscellaneous           %8.3f %6.2f"
   "\n    total time              %8.3f",
   cpus[0], 100.*cpus[0]/cputotal,
   cpus[1], 100.*cpus[1]/cputotal,
   cpus[2], 100.*cpus[2]/cputotal,
   cpus[3], 100.*cpus[3]/cputotal,
   cpus[4], 100.*cpus[4]/cputotal, 
   cpus[5], 100.*cpus[5]/cputotal, 
   cpus[6], 100.*cpus[6]/cputotal, 
   cpus[7], 100.*cpus[7]/cputotal, 
   cpus[8], 100.*cpus[8]/cputotal, 
   cpus[9], 100.*cpus[9]/cputotal, cputotal) ;
}
if ( msglvl > 2 ) {
   fprintf(msgFile, "\n\n front factor matrix") ;
   DFrontMtx_writeForHumanEye(frontmtx, msgFile) ;
}
/*
   ------------------------
   convert to local indices
   ------------------------
*/
fprintf(msgFile, "\n\n starting to convert to local indices") ;
fflush(msgFile) ;
/*
DFrontMtx_setLocalIndices(frontmtx) ;
*/
DFrontMtx_postProcess(frontmtx, 0, 1, msglvl, msgFile) ;
/*
   ---------------------------------------------------------------
   solve the linear system via multifrontal solves on multiple rhs
   ---------------------------------------------------------------
*/
if ( symmetryflag == 0 ) {
   nops = 2*(frontmtx->upperDVL->tsize
           + frontmtx->diagDVL->tsize
           + frontmtx->upperDVL->tsize) ;
} else {
   nops = 2*(frontmtx->lowerDVL->tsize
           + frontmtx->diagDVL->tsize
           + frontmtx->upperDVL->tsize) ;
}
MARKTIME(t1) ;
xDA2 = DA2_new() ;
yDA2 = DA2_new() ;
ykeepDA2 = DA2_new() ;
zDA2 = DA2_new() ;
DA2_init(yDA2, neqns, nrhs, 1, neqns, NULL) ;
DA2_init(zDA2, neqns, nrhs, 1, neqns, NULL) ;
DDenseMtx_setDA2(linsys->rhsmtx, ykeepDA2) ;
DDenseMtx_setDA2(linsys->solmtx, xDA2) ;
DA2_copy(yDA2, ykeepDA2) ;
DA2_zero(zDA2) ;
if ( msglvl > 1 ) {
   fprintf(msgFile, "\n\n starting the serial solve") ;
   fflush(msgFile) ;
}
MARKTIME(t1) ;
manager = DDenseMtxManager_new() ;
DDenseMtxManager_init(manager, 0) ;
DFrontMtx_solveDA2(frontmtx, zDA2, yDA2, manager, cpus) ;
MARKTIME(t2) ;
wallclock = t2 - t1 ;
DDenseMtxManager_writeForHumanEye(manager, msgFile) ;
DDenseMtxManager_free(manager) ;
DA2_sub(zDA2, xDA2) ;
fprintf(msgFile,
"\n CPU %8.3f : serial solve, %8.3f mflops, %12.4e max error",
     t2 - t1, 1.e-6*nops*nrhs/(t2 - t1), DA2_maxabs(zDA2)) ;
if ( msglvl > 2 ) {
   fprintf(msgFile, "\n error ") ;
   DA2_writeForHumanEye(zDA2, msgFile) ;
   fflush(msgFile) ;
}
cputotal = cpus[7] ;
fprintf(msgFile, "\n CPU %8.3f total time, %8.3f wall clock",
        cputotal, wallclock) ;
if ( cputotal > 0.0 ) {
   fprintf(msgFile,
   "\n    initialize/load matrices           %8.3f %6.2f"
   "\n    load rhs and solution              %8.3f %6.2f"
   "\n    assemble from children and parent  %8.3f %6.2f"
   "\n    solve and update                   %8.3f %6.2f"
   "\n    store entries                      %8.3f %6.2f"
   "\n    link and free objects              %8.3f %6.2f"
   "\n    miscellaneous                      %8.3f %6.2f"
   "\n    total time                         %8.3f",
   cpus[0], 100.*cpus[0]/cputotal,
   cpus[1], 100.*cpus[1]/cputotal,
   cpus[2], 100.*cpus[2]/cputotal,
   cpus[3], 100.*cpus[3]/cputotal,
   cpus[4], 100.*cpus[4]/cputotal, 
   cpus[5], 100.*cpus[5]/cputotal, 
   cpus[6], 100.*cpus[6]/cputotal, cputotal) ;
}
/*
   -------------------------------------
   parallel: solve the linear system via 
   multifrontal solves on multiple rhs
   -------------------------------------
*/
if ( msglvl > 1 ) {
   fprintf(msgFile, "\n\n starting the parallel solve") ;
   fflush(msgFile) ;
}
MARKTIME(t1) ;
DA2_copy(yDA2, ykeepDA2) ;
DA2_zero(zDA2) ;
MARKTIME(t1) ;
manager = DDenseMtxManager_new() ;
DDenseMtxManager_init(manager, 1) ;
DFrontMtx_MT_solve(frontmtx, zDA2, yDA2, manager, ownersIV,
                   cpus, msglvl, msgFile) ;
MARKTIME(t2) ;
wallclock = t2 - t1 ;
DDenseMtxManager_writeForHumanEye(manager, msgFile) ;
DDenseMtxManager_free(manager) ;
DA2_sub(zDA2, xDA2) ;
fprintf(msgFile,
"\n CPU %8.3f : parallel solve, %8.3f mflops, %12.4e max error",
     t2 - t1, 1.e-6*nops*nrhs/(t2 - t1), DA2_maxabs(zDA2)) ;
if ( msglvl > 2 ) {
   fprintf(msgFile, "\n error ") ;
   DA2_writeForHumanEye(zDA2, msgFile) ;
   fflush(msgFile) ;
}
cputotal = cpus[7] ;
fprintf(msgFile, "\n CPU %8.3f total time, %8.3f wall clock",
        cputotal, wallclock) ;
if ( cputotal > 0.0 ) {
   fprintf(msgFile,
   "\n    initialize/load matrices           %8.3f %6.2f"
   "\n    load rhs and solution              %8.3f %6.2f"
   "\n    assemble from children and parent  %8.3f %6.2f"
   "\n    solve and update                   %8.3f %6.2f"
   "\n    store entries                      %8.3f %6.2f"
   "\n    link and free objects              %8.3f %6.2f"
   "\n    miscellaneous                      %8.3f %6.2f"
   "\n    total time                         %8.3f",
   cpus[0], 100.*cpus[0]/cputotal,
   cpus[1], 100.*cpus[1]/cputotal,
   cpus[2], 100.*cpus[2]/cputotal,
   cpus[3], 100.*cpus[3]/cputotal,
   cpus[4], 100.*cpus[4]/cputotal, 
   cpus[5], 100.*cpus[5]/cputotal, 
   cpus[6], 100.*cpus[6]/cputotal, cputotal) ;
}
/*
   ------------------------
   free the working storage
   ------------------------
*/
ETree_free(frontETree) ;
DLinSystem_free(linsys) ;
IV_free(oldToNewIV) ;
IV_free(ownersIV) ;
IVL_free(symbfacIVL) ;
DA2_free(yDA2) ;
DA2_free(zDA2) ;
DFrontMtx_free(frontmtx) ;

fprintf(msgFile, "\n") ;
fclose(msgFile) ;

return ; }

/*--------------------------------------------------------------------*/
