/*  QRfactorMT.c  */

#include "../DFrontMtx.h"
#include "../../Ideq.h"
#include "../../timings.h"

/*--------------------------------------------------------------------*/
/*
   -----------------------------
   worker method for each thread
   -----------------------------
*/
void * DFrontMtx_QRworkerFactor ( void *arg ) ;
/*--------------------------------------------------------------------*/
/*
   -------------------------------------------------------------------
   parallel QR factorization method.
   all but two input parameters are the same as the serial method.

   ownersIV  -- pointer to IV object that holds map from fronts 
                to threads

   created -- 97aug20, dkw
   -------------------------------------------------------------------
*/
void
DFrontMtx_MT_QRfactor (
   DFrontMtx   *frontmtx,
   DInpMtx     *inpmtxA,
   IV          *ownersIV,
   double      *facops,
   double      cpus[],
   int         msglvl,
   FILE        *msgFile
) {
char          buffer[20] ;
DChvManager   *manager ;
DChvList      *updatelist ;
double        t0, t1, t2, t3 ;
double        *entries ;
DQRFactorData *data, *dataObjects ;
ETree         *frontETree ;
FILE          *fp ;
int           count, ii, irow, jcol, J, myid, ncolA, nrowA, nfront,
              nthread, rc, size ;
int           *head, *indices, *link, *list, *owners, *vtxToFront ;
IVL           *rowsIVL ;
#if THREAD_TYPE == TT_POSIX
pthread_t     *tids ;
#endif
/*
   --------------
   check the data
   --------------
*/
MARKTIME(t0) ;
if (  frontmtx == NULL || inpmtxA == NULL || ownersIV == NULL
   || cpus == NULL || msglvl < 0 
   || (msglvl > 0 && msgFile == NULL) ) {
   fprintf(stderr, "\n fatal error in DFrontMtx_MT_QRfactor()"
           "\n frontmtx = %p, inpmtxA = %p, ownersIV = %p"
           "\n cpus = %p, msglvl = %d, msgFile = %p"
           "\n bad input\n",
           frontmtx, inpmtxA, ownersIV, cpus, msglvl, msgFile) ;
   exit(-1) ;
}
IV_sizeAndEntries(ownersIV, &nfront, &owners) ;
nthread = 1 + IV_max(ownersIV) ;
frontETree   = frontmtx->frontETree ;
vtxToFront   = IV_entries(frontETree->vtxToFrontIV) ;
/*
   -----------------------------------------------------
   get a head[nfront] and link[nrowA] map pair,
   head[J]    = first row from A that goes into front J
   link[irow] = next row from A that goes into the same
                front as irow
   -----------------------------------------------------
*/
if ( msglvl > 1 ) {
   fprintf(msgFile, " \n inside QRfactor inpmtxA \n");
   fflush(msgFile) ;
}
nrowA = 1 + IVmax(DInpMtx_nent(inpmtxA), DInpMtx_ivec1(inpmtxA), &rc) ;
ncolA = 1 + IVmax(DInpMtx_nent(inpmtxA), DInpMtx_ivec2(inpmtxA), &rc) ;
if ( msglvl > 1 ) {
   fprintf(msgFile, "\n inside DFrontMtx_MT_QRfactor nrowA = %d  ncolA = %d \n",
           nrowA, ncolA);
   fflush(msgFile) ;
}
if ( msglvl > 3 ) {
   DInpMtx_writeForHumanEye(inpmtxA, msgFile) ;
   fflush(msgFile) ;
}

head = IVinit(nfront, -1) ;
link = IVinit(nrowA,  -1) ;
for ( irow = 0 ; irow < nrowA ; irow++ ) {
   DInpMtx_vector(inpmtxA, irow, &size, &indices, &entries) ;
   if ( size > 0 ) {
      jcol       = indices[0] ;
      J          = vtxToFront[jcol] ;
      link[irow] = head[J] ;
      head[J]    = irow ;
   }
}
/*
   ------------------------------------------
   get a IVL object to hold the list of rows
   in A that goes into each front
   ------------------------------------------
*/
rowsIVL = IVL_new() ;
IVL_init2(rowsIVL, IVL_CHUNKED, nfront, nrowA) ;
list = IVinit(ncolA, -1) ;
for ( J = 0 ; J < nfront ; J++ ) {
    count = 0 ;
    for ( irow = head[J] ; irow != -1 ; irow = link[irow] ) {
        list[count++] = irow ;
    }
    if ( count > 0 ) {
       IVqsortUp(count, list) ;
       IVL_setList(rowsIVL, J, count, list) ;
    }
}
if ( msglvl > 1 ) {
   fprintf(msgFile, " \n after set rowsIVL objects \n");
   fflush(msgFile) ;
}
if ( msglvl > 3 ) {
   fprintf(msgFile, " \n rowsIVL for all %d fronts \n",J);
   fflush(msgFile) ;
   rc = IVL_writeForHumanEye(rowsIVL, msgFile) ;
}
/*
   ------------------------------------------
   map the matrix entries into their fronts
   ------------------------------------------
*/
MARKTIME(t2) ;
cpus[9] = t2 - t0 ;
/*
   ----------------------------------------------------------------
   create the DChvManager object to manage working DChv objects, 
   a DChvList object to handle the lists of update DChv objects.
   ----------------------------------------------------------------
*/
MARKTIME(t1) ;
manager = DChvManager_new() ;
DChvManager_init(manager, 1) ;
updatelist = DFrontMtx_postList(frontmtx, ownersIV, 1) ;

MARKTIME(t2) ;
if ( msglvl > 1 ) {
   fprintf(msgFile, "\n CPU %8.3f : initialize lists and manager", 
           t2 - t1) ;
   fflush(msgFile) ;
}
/*
   -------------------------------------------------------------
   create nthread DQRFactorData objects and load with their data
   -------------------------------------------------------------
*/
MARKTIME(t1) ;
*facops = 0.0 ;
ALLOCATE(dataObjects, struct _DQRFactorData, nthread) ;
for ( myid = 0, data = dataObjects ; myid < nthread ; myid++, data++ ) {
   DQRFactorData_init(data, frontmtx, inpmtxA, rowsIVL, ownersIV, manager,
                      updatelist, facops) ;
   data->cpus[9] = cpus[9] ;
   if ( msglvl > 0 ) {
      sprintf(buffer, "res.%d", myid) ;
      if ( (fp = fopen(buffer, "w")) == NULL ) {
         fprintf(stderr, "\n fatal error, unable to open file %s",
                 buffer) ;
         exit(-1) ;
      }
      DQRFactorData_setInfo(data, myid, msglvl, fp) ;
   } else {
      DQRFactorData_setInfo(data, myid, msglvl, NULL) ;
   }
}
MARKTIME(t2) ;
if ( msglvl > 1 ) {
   fprintf(msgFile, "\n CPU %8.3f : initialize data objects", t2 - t1) ;
}
/*
   -------------------
   set the concurrency
   -------------------
*/
#if THREAD_TYPE == TT_SOLARIS
MARKTIME(t1) ;
thr_setconcurrency(nthread) ;
MARKTIME(t2) ;
if ( msglvl > 1 ) {
   fprintf(msgFile, "\n CPU %8.3f : set concurrency time", t2 - t1) ;
}
#endif
/*
#####   NOTE: for SGI machines, this command must be present
#####         for the thread scheduling to be efficient.
#####         this is NOT a POSIX call, but SGI needs it anyway
#if THREAD_TYPE == TT_POSIX
pthread_setconcurrency(nthread) ;
#endif
*/
/*
   ------------------
   create the threads
   ------------------
*/
MARKTIME(t1) ;
#if THREAD_TYPE == TT_SOLARIS
for ( myid = 0, data = dataObjects ; 
      myid < nthread - 1 ;
      myid++, data++ ) {
   rc = thr_create(NULL, 0, DFrontMtx_QRworkerFactor, data, 0, NULL) ;
   if ( rc != 0 ) {
      fprintf(stderr, 
              "\n fatal error, myid = %d, rc = %d from thr_create",
              myid, rc) ;
      exit(-1) ;
   }
}
#endif
#if THREAD_TYPE == TT_POSIX
{
pthread_attr_t   attr ;
pthread_attr_init(&attr) ;
pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM) ;
ALLOCATE(tids, pthread_t, nthread) ;
for ( myid = 0, data = dataObjects ; myid < nthread ; myid++, data++ ) {
   rc = pthread_create(&tids[myid], &attr,
                       DFrontMtx_QRworkerFactor, data) ;
   if ( rc != 0 ) {
      fprintf(stderr,
              "\n fatal error, myid = %d, rc = %d from pthread_create",
              myid, rc) ;
      exit(-1) ;
   } else if ( msglvl > 2 ) {
      fprintf(stderr, "\n thread %d created", tids[myid]) ;
   }
}
}
#endif
MARKTIME(t2) ;
if ( msglvl > 1 ) {
   fprintf(msgFile, "\n CPU %8.3f : thread creation time", t2 - t1) ;
}
/*
   ----------------
   join the threads
   ----------------
*/
MARKTIME(t1) ;
#if THREAD_TYPE == TT_SOLARIS
DFrontMtx_QRworkerFactor(data) ;
for ( myid = 0 ; myid < nthread - 1 ; myid++ ) {
   thr_join(0, 0, 0) ;
}
#endif
#if THREAD_TYPE == TT_POSIX
{
void       *status ;
for ( myid = 0 ; myid < nthread ; myid++ ) {
   pthread_join(tids[myid], &status) ;
}
}
#endif
MARKTIME(t2) ;
if ( msglvl > 1 ) {
   fprintf(msgFile, "\n CPU %8.3f : thread join time", t2 - t1) ;
}
if ( updatelist != NULL ) {
   DChvList_getList(updatelist, nfront) ;
}
/*
   -------------------
   fill the statistics
   -------------------
*/
for ( myid = 0, data = dataObjects ; 
      myid < nthread ;
      myid++, data++ ) {
   if ( msglvl > 3 ) {
      fprintf(msgFile, "\n thread %d cpus", myid) ;
      DVfprintf(msgFile, 10, data->cpus) ;
   }
   for ( ii = 0 ; ii < 10 ; ii++ ) {
      cpus[ii] += data->cpus[ii] ;
   }
}
if ( msglvl > 1 ) {
   fprintf(msgFile, "\n\n multithread QR factorization has finished") ;
   fprintf(msgFile, "\n facops = %9.2f", *facops) ;
   fflush(msgFile) ;
}
/*
   -------------
   free the data
   -------------
*/
for ( myid = 0, data = dataObjects ; myid < nthread ; myid++, data++ ) {
   DQRFactorData_clearData(data) ;
}
FREE(dataObjects) ;
if ( updatelist != NULL ) {
   DChvList_free(updatelist) ;
}
DChvManager_free(manager) ;
IVfree(head) ;
IVfree(link) ;
IVfree(list) ;
IVL_free(rowsIVL) ;

MARKTIME(t3) ;
if ( msglvl > 1 ) {
   fprintf(msgFile, "\n CPU %8.3f : total time", t3 - t0) ;
}

return ; }

/*--------------------------------------------------------------------*/
/*
   ----------------------------------------------------
   purpose -- worker method to factor the matrix


   created -- 97aug20, dkw
   ----------------------------------------------------
*/
void *
DFrontMtx_QRworkerFactor (
   void   *arg
) {
char          *status ;
DQRFactorData *data ;
DFrontMtx     *frontmtx ;
double        t1, t2 ;
double        *cpus ;
ETree         *frontETree ;
FILE          *msgFile ;
Ideq          *deq ;
int           J, K, msglvl, myid, nfront, ownerJ ;
int           *ndescLeft, *owners, *par ;
IV            *ownersIV ;
Tree          *tree ;
/*
   -------------------------------
   extract pointers and dimensions
   -------------------------------
*/
MARKTIME(t1) ;
data = (DQRFactorData *) arg ;
msglvl     = data->msglvl   ;
msgFile    = data->msgFile  ;
myid       = data->myid     ;
frontmtx   = data->frontmtx ;
nfront     = frontmtx->nfront ;
frontETree = frontmtx->frontETree ;
tree       = frontETree->tree ;
par        = tree->par ;
ownersIV   = data->ownersIV ;
owners     = IV_entries(ownersIV) ;
cpus       = data->cpus ;
#if THREAD_TYPE == TT_SOLARIS
if ( msglvl > 2 ) {
   fprintf(stdout,
           "\n ### inside QRworkerFactor, myid = %d, thr_self() = %d",
           myid, thr_self()) ;
   fflush(stdout) ;
}
#endif
#if THREAD_TYPE == TT_POSIX
if ( msglvl > 0 ) {
   fprintf(stdout, "\n ### inside QRworkerFactor, myid = %d", myid) ;
   fprintf(stdout, ", pthread_self() = %d", pthread_self()) ;
   fflush(stdout) ;
}
#endif

/*
   ------------------------------------
   compute the local ndescLeft[] vector
   that synchronizes the front
   ------------------------------------
*/
ndescLeft = IVinit(nfront, 0) ;
for ( J = 0 ; J < nfront ; J++ ) {
   ownerJ = owners[J] ;
   for ( K = par[J] ; K != -1 ; K = par[K] ) {
      if ( owners[K] == ownerJ ) {
         ndescLeft[K]++ ;
         break ;
      }
   }
}
if ( msglvl > 2 ) {
   fprintf(msgFile, "\n\n ndescLeft") ;
   IVfprintf(msgFile, nfront, ndescLeft) ;
   fflush(msgFile) ;
}
/*
   ----------------------------------------------------------------
   initialize the status[] vector,
   status[J] == 'W' --> J belongs to an active path for this thread
   ----------------------------------------------------------------
*/
status = DFrontMtx_status(frontmtx, ownersIV, myid) ;
if ( msglvl > 2 ) {
   fprintf(msgFile, "\n\n status") ;
   CVfprintf(msgFile, nfront, status) ;
   fflush(msgFile) ;
}
/*
   --------------------------------------------------------
   initialize the Ideq object that holds the initial fronts
   of the active paths, owned fronts with no children that
   are owned or updates by this thread.
   --------------------------------------------------------
*/

deq = DFrontMtx_setUpDequeue(frontmtx, ownersIV, status, myid) ;

/*
   ---------------------------
   loop while a path is active
   ---------------------------
*/

while ( (J = Ideq_removeFromHead(deq)) != -1 ) {
   if ( msglvl > 1 ) {
      fprintf(msgFile, "\n\n ### checking out front %d", J) ;
      fflush(msgFile) ;
   }
   DFrontMtx_QRcheckFront(frontmtx, data, J, status) ;
   if ( status[J] == 'F' ) {
      if ( msglvl > 1 ) {
         fprintf(msgFile, "\n\n front %d is finished", J) ;
         fflush(msgFile) ;
      }
/*
         --------------------------------
         look for the next owned ancestor
         --------------------------------
*/
      K = par[J] ;
      while ( K != -1 && owners[K] != myid ) {
         K = par[K] ;
      }
      if ( K != -1 && --ndescLeft[K] == 0 ) {
            if ( msglvl > 1 ) {
               fprintf(msgFile,
                  "\n\n placing next owned ancestor %d on dequeue", K) ;
               fflush(msgFile) ;
            }
            Ideq_insertAtHead(deq, K) ;
      }

   } else {
      if ( msglvl > 1 ) {
         fprintf(msgFile,
            "\n\n front %d not finished, placing on tail of queue", J) ;
         fflush(msgFile) ;
      }
      Ideq_insertAtTail(deq, J) ;
   }
}
   fprintf(msgFile, " \n after QRworkerfactor \n");
   fprintf(msgFile, " \n cpus[0] = %6.3f\n", data->cpus[0]);
   fprintf(msgFile, " \n cpus[1] = %6.3f\n", data->cpus[1]);
   fprintf(msgFile, " \n cpus[2] = %6.3f\n", data->cpus[2]);
   fprintf(msgFile, " \n cpus[3] = %6.3f\n", data->cpus[3]);
   fprintf(msgFile, " \n cpus[4] = %6.3f\n", data->cpus[4]);
   fprintf(msgFile, " \n cpus[5] = %6.3f\n", data->cpus[5]);
   fprintf(msgFile, " \n cpus[6] = %6.3f\n", data->cpus[6]);
   fprintf(msgFile, " \n cpus[7] = %6.3f\n", data->cpus[7]);
   fprintf(msgFile, " \n cpus[8] = %6.3f\n", data->cpus[8]);
   fprintf(msgFile, " \n cpus[9] = %6.3f\n", cpus[9]);

/*
   ------------------------
   free the working storage
   ------------------------
*/
CVfree(status) ;
Ideq_free(deq) ;
IVfree(ndescLeft) ;
MARKTIME(t2) ;
cpus[8] = t2 - t1 + cpus[9] ;
cpus[7] = cpus[8] - cpus[0] - cpus[1] - cpus[2] - cpus[3] 
        - cpus[4] - cpus[5] - cpus[6] ;

return(NULL) ; }

/*--------------------------------------------------------------------*/
