/*  factorMT.c  */

#include "../DFrontMtx.h"
#include "../../Ideq.h"
#include "../../timings.h"

/*--------------------------------------------------------------------*/
/*
   -----------------------------
   worker method for each thread
   -----------------------------
*/
static void * DFrontMtx_workerFactor ( void *arg ) ;
/*--------------------------------------------------------------------*/
/*
   -------------------------------------------------------------------
   parallel factorization method for A.
   all but two input parameters are the same as the serial method.
   this is a wrapper method around DFrontMtx_factorMT().

   ownersIV  -- pointer to IV object that holds map from fronts 
                to threads
   lookahead -- lookahead parameter that allows computation at
                higher levels of the front tree to proceed when
                lower fronts are not yet finish. use lookahead = 0
                to turn off this option. otherwise lookahead ancestors
                of an active unfinished front can be active.

   return value -- pointer to the first DChv object in a list
                   that contains postponed data
 
   created -- 97nov08, cca
   -------------------------------------------------------------------
*/
DChv *
DFrontMtx_MT_factorDInpMtx (
   DFrontMtx   *frontmtx,
   DInpMtx     *inpmtx,
   double      tau,
   double      droptol,
   IV          *ownersIV,
   int         lookahead,
   double      cpus[],
   int         stats[],
   int         msglvl,
   FILE        *msgFile
) {
DChv      *rootchv ;
DPencil   pencil ;

DPencil_setDefaultFields(&pencil) ;
DPencil_init(&pencil, inpmtx, 0.0, NULL) ;
rootchv = DFrontMtx_MT_factorDPencil(frontmtx, &pencil, tau, droptol,
                    ownersIV, lookahead, cpus, stats, msglvl, msgFile) ;

return(rootchv) ; }

/*--------------------------------------------------------------------*/
/*
   -------------------------------------------------------------------
   parallel factorization method for A + sigma*B.
   all but two input parameters are the same as the serial method.
   this is a wrapper method around DFrontMtx_factorMT().

   ownersIV  -- pointer to IV object that holds map from fronts 
                to threads
   lookahead -- lookahead parameter that allows computation at
                higher levels of the front tree to proceed when
                lower fronts are not yet finish. use lookahead = 0
                to turn off this option. otherwise lookahead ancestors
                of an active unfinished front can be active.

   return value -- pointer to the first DChv object in a list
                   that contains postponed data
 
   created -- 97nov08, cca
   -------------------------------------------------------------------
*/
DChv *
DFrontMtx_MT_factorDPencil (
   DFrontMtx   *frontmtx,
   DPencil     *pencil,
   double      tau,
   double      droptol,
   IV          *ownersIV,
   int         lookahead,
   double      cpus[],
   int         stats[],
   int         msglvl,
   FILE        *msgFile
) {
DChv   *rootchv ;

rootchv = DFrontMtx_MT_factorDPencil(frontmtx, pencil, tau, droptol,
                    ownersIV, lookahead, cpus, stats, msglvl, msgFile) ;

return(rootchv) ; }

/*--------------------------------------------------------------------*/
/*
   -------------------------------------------------------------------
   parallel factorization method.
   all but two input parameters are the same as the serial method.

   ownersIV  -- pointer to IV object that holds map from fronts 
                to threads
   lookahead -- lookahead parameter that allows computation at
                higher levels of the front tree to proceed when
                lower fronts are not yet finish. use lookahead = 0
                to turn off this option. otherwise lookahead ancestors
                of an active unfinished front can be active.

   return value -- pointer to the first DChv object in a list
                   that contains postponed data
 
   created -- 97may30, cca
   -------------------------------------------------------------------
*/
DChv *
DFrontMtx_factorMT (
   DFrontMtx   *frontmtx,
   DPencil     *pencil,
   double      tau,
   double      droptol,
   IV          *ownersIV,
   int         lookahead,
   double      cpus[],
   int         stats[],
   int         msglvl,
   FILE        *msgFile
) {
char          buffer[20] ;
DChv          *rootchv ;
DChvManager   *manager ;
DChvList      *aggregatelist ;
DChvList      *postponedlist ;
double        t0, t1, t2 ;
DFactorData   *data, *dataObjects ;
FILE          *fp ;
int           ierr, ii, myid, nfront, nthread, rc ;
int           *owners ;
#if THREAD_TYPE == TT_POSIX
pthread_t     *tids ;
#endif
/*
   --------------
   check the data
   --------------
*/
MARKTIME(t0) ;
if (  frontmtx == NULL || pencil == NULL || tau < 1.0 || droptol < 0.0
   || ownersIV == NULL || lookahead < 0 || cpus == NULL || stats == NULL
   || msglvl < 0 || (msglvl > 0 && msgFile == NULL) ) {
   fprintf(stderr, "\n fatal error in DFrontMtx_factorMT()"
           "\n frontmtx = %p, pencil = %p"
           "\n tau = %f, droptol = %f, ownersIV = %p, lookahead = %d"
           "\n cpus = %p, stats = %p, msglvl = %d, msgFile = %p"
           "\n bad input\n", frontmtx, pencil, tau, droptol, 
           ownersIV, lookahead, cpus, stats, msglvl, msgFile) ;
   exit(-1) ;
}
IV_sizeAndEntries(ownersIV, &nfront, &owners) ;
nthread = 1 + IV_max(ownersIV) ;
/*
   ----------------------------------------------------------------
   create the DChvManager object to manage working DChv objects, 
   a DChvList object to handle the lists of aggregate DChv objects,
   and if pivoting is enabled, a DChvList object to handle 
   the lists of postponed DChv objects.
   ----------------------------------------------------------------
*/
MARKTIME(t1) ;
manager = DChvManager_new() ;
DChvManager_init(manager, 1) ;
aggregatelist = DFrontMtx_aggregateList(frontmtx, ownersIV, 1) ;
if ( frontmtx->pivotingflag == 1 ) {
   postponedlist = DFrontMtx_postList(frontmtx, ownersIV, 1) ;
} else {
   postponedlist = NULL ;
}
MARKTIME(t2) ;
if ( msglvl > 1 ) {
   fprintf(msgFile, "\n CPU %8.3f : initialize lists and manager", 
           t2 - t1) ;
}
/*
   -----------------------------------------------------------
   create nthread DFactorData objects and load with their data
   -----------------------------------------------------------
*/
MARKTIME(t1) ;
ALLOCATE(dataObjects, struct _DFactorData, nthread) ;
for ( myid = 0, data = dataObjects ; myid < nthread ; myid++, data++ ) {
   DFactorData_init(data, frontmtx, pencil, tau,
                    droptol, ownersIV, lookahead, manager, 
                    aggregatelist, postponedlist) ;
   if ( msglvl > 0 ) {
      sprintf(buffer, "res.%d", myid) ;
      if ( (fp = fopen(buffer, "w")) == NULL ) {
         fprintf(stderr, "\n fatal error, unable to open file %s",
                 buffer) ;
         exit(-1) ;
      }
      DFactorData_setInfo(data, myid, msglvl, fp) ;
   } else {
      DFactorData_setInfo(data, myid, msglvl, NULL) ;
   }
}
MARKTIME(t2) ;
if ( msglvl > 1 ) {
   fprintf(msgFile, "\n CPU %8.3f : initialize data objects", t2 - t1) ;
}
/*
   -------------------
   set the concurrency
   -------------------
*/
#if THREAD_TYPE == TT_SOLARIS
MARKTIME(t1) ;
thr_setconcurrency(nthread) ;
MARKTIME(t2) ;
if ( msglvl > 1 ) {
   fprintf(msgFile, "\n CPU %8.3f : set concurrency time", t2 - t1) ;
}
#endif
/*
#####   NOTE: for SGI machines, this command must be present
#####         for the thread scheduling to be efficient.
#####         this is NOT a POSIX call, but SGI needs it anyway
#if THREAD_TYPE == TT_POSIX
pthread_setconcurrency(nthread) ;
#endif
*/
/*
   ------------------
   create the threads
   ------------------
*/
MARKTIME(t1) ;
#if THREAD_TYPE == TT_SOLARIS
for ( myid = 0, data = dataObjects ; 
      myid < nthread - 1 ;
      myid++, data++ ) {
   rc = thr_create(NULL, 0, DFrontMtx_workerFactor, data, 0, NULL) ;
   if ( rc != 0 ) {
      fprintf(stderr, 
              "\n fatal error, myid = %d, rc = %d from thr_create",
              myid, rc) ;
      exit(-1) ;
   }
}
#endif
#if THREAD_TYPE == TT_POSIX
{
pthread_attr_t   attr ;
pthread_attr_init(&attr) ;
pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM) ;
ALLOCATE(tids, pthread_t, nthread) ;
for ( myid = 0, data = dataObjects ; myid < nthread ; myid++, data++ ) {
   rc = pthread_create(&tids[myid], &attr, 
                       DFrontMtx_workerFactor, data) ;
   if ( rc != 0 ) {
      fprintf(stderr, 
              "\n fatal error, myid = %d, rc = %d from pthread_create",
              myid, rc) ;
      exit(-1) ;
   } else if ( msglvl > 2 ) {
      fprintf(stderr, "\n thread %d created", tids[myid]) ;
   }
}
}
#endif
MARKTIME(t2) ;
if ( msglvl > 1 ) {
   fprintf(msgFile, "\n CPU %8.3f : thread creation time", t2 - t1) ;
}
/*
   ----------------
   join the threads
   ----------------
*/
MARKTIME(t1) ;
#if THREAD_TYPE == TT_SOLARIS
DFrontMtx_workerFactor(data) ;
for ( myid = 0 ; myid < nthread - 1 ; myid++ ) {
   thr_join(0, 0, 0) ;
}
#endif
#if THREAD_TYPE == TT_POSIX
{
void       *status ;
for ( myid = 0 ; myid < nthread ; myid++ ) {
   pthread_join(tids[myid], &status) ;
}
}
#endif
MARKTIME(t2) ;
if ( msglvl > 1 ) {
   fprintf(msgFile, "\n CPU %8.3f : thread join time", t2 - t1) ;
}
if ( postponedlist != NULL ) {
   rootchv = DChvList_getList(postponedlist, nfront) ;
} else {
   rootchv = NULL ;
}
DChvManager_writeForHumanEye(manager, msgFile) ;
/*
   -------------------
   fill the statistics
   -------------------
*/
for ( myid = 0, data = dataObjects ; 
      myid < nthread ;
      myid++, data++ ) {
   if ( msglvl > 3 ) {
      fprintf(msgFile, "\n thread %d stats", myid) ;
      IVfp80(msgFile, 3, data->stats, 20, &ierr) ;
      fprintf(msgFile, "\n thread %d cpus", myid) ;
      DVfprintf(msgFile, 10, data->cpus) ;
   }
   for ( ii = 0 ; ii < 3 ; ii++ ) {
      stats[ii] += data->stats[ii] ;
   }
   for ( ii = 0 ; ii < 10 ; ii++ ) {
      cpus[ii] += data->cpus[ii] ;
   }
}
stats[3] = frontmtx->diagDVL->tsize ;
if ( frontmtx->lowerDVL != NULL ) {
   stats[4] = frontmtx->lowerDVL->tsize ;
} else {
   stats[4] = 0 ;
}
stats[5] = frontmtx->upperDVL->tsize ;
stats[6] = frontmtx->nlocks ;
stats[7] = aggregatelist->nlocks ;
if ( postponedlist != NULL ) {
   stats[8] = postponedlist->nlocks ;
}
stats[9]  = manager->nactive         ;
stats[10] = manager->nbytesactive    ;
stats[11] = manager->nbytesrequested ;
stats[12] = manager->nrequests       ;
stats[13] = manager->nreleases       ;
stats[14] = manager->nlocks          ;
stats[15] = manager->nunlocks        ;
if ( msglvl > 0 ) {
   fprintf(msgFile, 
           "\n\n factorization has finished" 
           "\n %d locks of the front matrix"
           "\n %d locks of the aggregate list",
           frontmtx->nlocks, aggregatelist->nlocks) ;
   if ( postponedlist != NULL ) {
      fprintf(msgFile, "\n %d locks of the aggregate list",
          aggregatelist->nlocks) ;
   }
}
/*
   -------------
   free the data
   -------------
*/
MARKTIME(t1) ;
for ( myid = 0, data = dataObjects ; myid < nthread ; myid++, data++ ) {
   DFactorData_clearData(data) ;
}
FREE(dataObjects) ;
DChvList_free(aggregatelist) ;
if ( postponedlist != NULL ) {
   DChvList_free(postponedlist) ;
}
DChvManager_free(manager) ;
MARKTIME(t2) ;
if ( msglvl > 1 ) {
   fprintf(msgFile, "\n CPU %8.3f : total time", t2 - t1) ;
}

return(rootchv) ; }

/*--------------------------------------------------------------------*/
/*
   ----------------------------------------------------
   purpose -- worker method to factor the matrix


   created -- 97may30, cca
   ----------------------------------------------------
*/
static void *
DFrontMtx_workerFactor (
   void   *arg
) {
char          *status, *willUpdate ;
DFactorData   *data ;
DFrontMtx     *frontmtx ;
double        t1, t2 ;
double        *cpus ;
DV            *tmpDV ;
ETree         *frontETree ;
FILE          *msgFile ;
Ideq          *deq ;
int           J, K, lookahead, msglvl, myid, nfront ;
int           *head, *link, *nactiveChild, *offsets,
              *owners, *par ;
IV            *markIV, *ownersIV, *pivotsizesIV ;
Tree          *tree ;
/*
   -------------------------------
   extract pointers and dimensions
   -------------------------------
*/
MARKTIME(t1) ;
data = (DFactorData *) arg ;
msglvl     = data->msglvl   ;
msgFile    = data->msgFile  ;
myid       = data->myid     ;
frontmtx   = data->frontmtx ;
frontETree = frontmtx->frontETree ;
tree       = ETree_tree(frontETree) ;
nfront     = ETree_nfront(frontETree) ;
par        = ETree_par(frontETree) ;
lookahead  = data->lookahead ;
ownersIV   = data->ownersIV ;
owners     = IV_entries(ownersIV) ;
cpus       = data->cpus ;
#if THREAD_TYPE == TT_SOLARIS
if ( msglvl > 2 ) {
   fprintf(stdout, 
           "\n ### inside workerFactor, myid = %d, thr_self() = %d", 
           myid, thr_self()) ;
   fflush(stdout) ;
}
#endif
#if THREAD_TYPE == TT_POSIX
if ( msglvl > 0 ) {
   fprintf(stdout, "\n ### inside workerFactor, myid = %d" 
                   ", pthread_self() = %d", myid, pthread_self()) ;
   fflush(stdout) ;
}
#endif
/*
   ---------------------------------------------------------
   initialize the willUpdate[] vector,
   willUpdate[J] == 'Y' --> this thread owns at least one
   descendent of J that has support with J
   ---------------------------------------------------------
*/
willUpdate = DFrontMtx_willUpdate(frontmtx, ownersIV, myid) ;
if ( msglvl > 2 ) {
   fprintf(msgFile, "\n\n willUpdate") ;
   CVfprintf(msgFile, nfront, willUpdate) ;
   fflush(msgFile) ;
}
/*
   ----------------------------------------------------------------
   initialize the status[] vector,
   status[J] == 'W' --> J belongs to an active path for this thread
   ----------------------------------------------------------------
*/
status = DFrontMtx_status(frontmtx, ownersIV, myid) ;
if ( msglvl > 2 ) {
   fprintf(msgFile, "\n\n status") ;
   CVfprintf(msgFile, nfront, status) ;
   fflush(msgFile) ;
}
/*
   -----------------------------------------------
   initialize the nactiveChild[] vector,
   nactiveChild[J] measures the number of children 
   that belong to active paths of this thread
   -----------------------------------------------
*/
nactiveChild = DFrontMtx_nactiveChild(frontmtx, status, myid) ;
if ( msglvl > 2 ) {
   fprintf(msgFile, "\n\n nactiveChild") ;
   IVfprintf(msgFile, nfront, nactiveChild) ;
   fflush(msgFile) ;
}
/*
   --------------------------------------------------------
   initialize the Ideq object that holds the initial fronts
   of the active paths, owned fronts with no children that
   are owned or updates by this thread.
   --------------------------------------------------------
*/
deq = DFrontMtx_setUpDequeue(frontmtx, ownersIV, status, myid) ;
/*
   ------------------------------
   initialize the working storage
   ------------------------------
*/
ALLOCATE(data->fronts, struct _DChv *, nfront) ;
for ( J = 0 ; J < nfront ; J++ ) {
   data->fronts[J] = NULL ;
}
head    = IVinit(nfront, -1) ;
link    = IVinit(nfront, -1) ;
offsets = IVinit(nfront, 0) ;
if ( frontmtx->pivotingflag == 1 ) {
   markIV = IV_new() ;
   if ( frontmtx->symmetryflag == 0 ) {
      pivotsizesIV = IV_new() ;
   } else {
      pivotsizesIV = NULL ;
   }
} else {
   pivotsizesIV = markIV = NULL ;
}
tmpDV = DV_new() ;
/*
   ---------------------------
   loop while a path is active
   ---------------------------
*/
while ( (J = Ideq_removeFromHead(deq)) != -1 ) {
   if ( msglvl > 1 ) {
      fprintf(msgFile, "\n\n ### checking out front %d", J) ;
      fflush(msgFile) ;
   }
   DFrontMtx_checkFront(frontmtx, data, J, lookahead, status,
                        willUpdate, nactiveChild, head, link, 
                        offsets, pivotsizesIV, markIV, tmpDV) ;
   if ( status[J] == 'F' ) {
      if ( msglvl > 1 ) {
         fprintf(msgFile, "\n\n front %d is finished", J) ;
         fflush(msgFile) ;
      }
      if ( (K = par[J]) != -1 && nactiveChild[K] == 0 ) {
         if ( msglvl > 1 ) {
            fprintf(msgFile, "\n\n adding front %d to dequeue", K) ;
            fflush(msgFile) ;
         }
         Ideq_insertAtHead(deq, K) ;
      }
   } else {
      if ( msglvl > 1 ) {
         fprintf(msgFile, "\n\n front %d not yet done", J) ;
         fflush(msgFile) ;
      }
      Ideq_insertAtTail(deq, J) ;
   }
}
/*
   ------------------------
   free the working storage
   ------------------------
*/
if ( pivotsizesIV != NULL ) {
   IV_free(pivotsizesIV) ;
}
if ( markIV != NULL ) {
   IV_free(markIV) ;
}
if ( tmpDV != NULL ) {
   DV_free(tmpDV) ;
}
CVfree(status) ;
CVfree(willUpdate) ;
IVfree(nactiveChild) ;
IVfree(head) ;
IVfree(link) ;
IVfree(offsets) ;
Ideq_free(deq) ;
MARKTIME(t2) ;
cpus[9] = t2 - t1 - cpus[0] - cpus[1] - cpus[2] - cpus[3] 
        - cpus[4] - cpus[5] - cpus[6] - cpus[7] - cpus[8] ;

return(NULL) ; }

/*--------------------------------------------------------------------*/
