/*  maps.c  */

#include "../ETree.h"

#define MYDEBUG  0

#define SUBTREE_SIZE   1
#define SUBTREE_OPS    2
#define SUBTREE_DEFINITION SUBTREE_OPS

/*--------------------------------------------------------------------*/
/*
   --------------------------------------------------------------
   this method constructs and returns an IV object that holds the
   map from fronts to threads for a wrap map of the front tree.

   created -- 96dec12, cca
   --------------------------------------------------------------
*/
IV *
ETree_wrapMap (
   ETree   *frontTree,
   DV      *cumopsDV
) {
double   *cumops, *forwardOps ;
DV       *forwardOpsDV ;
int      jthread, J, nfront, nthread ;
int      *owners ;
IV       *ownersIV ;
Tree     *tree ;
/*
   ---------------
   check the input
   ---------------
*/
if ( frontTree == NULL || cumopsDV == NULL ) {
   fprintf(stderr, "\n fatal error in ETree_wrapMap(%p,%p)"
           "\n bad input\n", frontTree, cumopsDV) ;
   exit(-1) ;
}
tree = frontTree->tree ;
DV_sizeAndEntries(cumopsDV, &nthread, &cumops) ;
/*
   ---------------------------------
   get a vector of forward op counts
   ---------------------------------
*/
forwardOpsDV = ETree_forwardOps(frontTree) ;
DV_sizeAndEntries(forwardOpsDV, &nfront, &forwardOps) ;
/*
   -------------------
   load the map vector
   -------------------
*/
ownersIV = IV_new() ;
IV_init(ownersIV, nfront, NULL) ;
owners = IV_entries(ownersIV) ;
for ( J = Tree_postOTfirst(tree) ;
      J != -1 ;
      J = Tree_postOTnext(tree, J) ) {
   jthread = J % nthread ;
   owners[J] = jthread ;
   cumops[jthread] += forwardOps[J] ;
}
/*
   ------------------------
   free the working storage
   ------------------------
*/
DV_free(forwardOpsDV) ;

return(ownersIV) ; }

/*--------------------------------------------------------------------*/
/*
   ----------------------------------------------------------------
   this method constructs and returns an IV object that holds the
   map from fronts to threads for a balanced map of the front tree.
   the fronts are visited in the post-order traversal.

   created -- 96dec12, cca
   ----------------------------------------------------------------
*/
IV *
ETree_balancedMap (
   ETree   *frontTree,
   DV      *cumopsDV
) {
double   minops ;
double   *cumops, *forwardOps ;
DV       *forwardOpsDV ;
int      ithread, jthread, J, nfront, nthread ;
int      *owners ;
IV       *ownersIV ;
Tree     *tree ;
/*
   ---------------
   check the input
   ---------------
*/
if ( frontTree == NULL || cumopsDV == NULL ) {
   fprintf(stderr, "\n fatal error in ETree_balancedMap(%p,%p)"
           "\n bad input\n", frontTree, cumopsDV) ;
   exit(-1) ;
}
tree = frontTree->tree ;
DV_sizeAndEntries(cumopsDV, &nthread, &cumops) ;
/*
   ---------------------------------
   get a vector of forward op counts
   ---------------------------------
*/
forwardOpsDV = ETree_forwardOps(frontTree) ;
DV_sizeAndEntries(forwardOpsDV, &nfront, &forwardOps) ;
/*
   -------------------
   load the map vector
   -------------------
*/
ownersIV = IV_new() ;
IV_init(ownersIV, nfront, NULL) ;
owners = IV_entries(ownersIV) ;
for ( J = Tree_postOTfirst(tree) ;
      J != -1 ;
      J = Tree_postOTnext(tree, J) ) {
   jthread = 0 ;
   minops  = cumops[0] ;
   for ( ithread = 1 ; ithread < nthread ; ithread++ ) {
      if ( minops > cumops[ithread] ) {
         jthread = ithread ;
         minops  = cumops[ithread] ;
      }
   }
   owners[J] = jthread ;
   cumops[jthread] += forwardOps[J] ;
}
/*
   ------------------------
   free the working storage
   ------------------------
*/
DV_free(forwardOpsDV) ;

return(ownersIV) ; }

/*--------------------------------------------------------------------*/
/*
   -----------------------------------------------
   this method constructs and returns an IV object 
   that holds the map from fronts to threads for a 
   "subtree-subset" map of the front tree.

   created -- 97jan15, cca
   -----------------------------------------------
*/
IV *
ETree_subtreeSubsetMap (
   ETree   *frontTree,
   DV      *cumopsDV
) {
double   offset, total ;
double   *cumops, *forwardOps, *tmetric ;
DV       *forwardOpsDV, *tmetricDV ;
int      I, J, mthread, nfront, nthread, q, qmin ;
int      *fch, *firsts, *lasts, *owners, *par, *sib ;
IV       *ownersIV ;
Tree     *tree ;
/*
   ---------------
   check the input
   ---------------
*/
if ( frontTree == NULL || cumopsDV == NULL ) {
   fprintf(stderr, "\n fatal error in ETree_subtreeSubsetMap(%p,%p)"
           "\n bad input\n", frontTree, cumopsDV) ;
   exit(-1) ;
}
tree = frontTree->tree ;
fch  = tree->fch ;
par  = tree->par ;
sib  = tree->sib ;
DV_sizeAndEntries(cumopsDV, &nthread, &cumops) ;
/*
   ---------------------------------
   get a vector of forward op counts
   ---------------------------------
*/
forwardOpsDV = ETree_forwardOps(frontTree) ;
DV_sizeAndEntries(forwardOpsDV, &nfront, &forwardOps) ;
#if MYDEBUG > 0
fprintf(stdout, "\n\n forwardOpsDV") ;
DV_writeForHumanEye(forwardOpsDV, stdout) ;
fflush(stdout) ;
#endif
/*
   --------------------------------
   get the subtree metric DV object
   --------------------------------
*/
tmetricDV = Tree_setSubtreeDmetric(tree, forwardOpsDV) ;
tmetric = DV_entries(tmetricDV) ;
#if MYDEBUG > 0
fprintf(stdout, "\n\n tmetricDV") ;
DV_writeForHumanEye(tmetricDV, stdout) ;
fflush(stdout) ;
#endif
/*
   -----------------------------------------------------
   fill two vectors that hold the first and last threads
   that are eligible to own a front 
   -----------------------------------------------------
*/
#if MYDEBUG > 0
fprintf(stdout, "\n\n pre-order traversal to determine eligible sets") ;
fflush(stdout) ;
#endif
firsts = IVinit(nfront, -1) ;
lasts  = IVinit(nfront, -1) ;
for ( J = Tree_preOTfirst(tree) ;
      J != -1 ;
      J = Tree_preOTnext(tree, J) ) {
#if MYDEBUG > 0
   fprintf(stdout, "\n\n visiting front %d", J) ;
   fflush(stdout) ;
#endif
   if ( par[J] == -1 ) {
      firsts[J] = 0 ;
      lasts[J]  = nthread - 1 ;
#if MYDEBUG > 0
      fprintf(stdout, ", root front") ;
      fflush(stdout) ;
#endif
   }
#if MYDEBUG > 0
   fprintf(stdout, "\n first = %d, last = %d", firsts[J], lasts[J]) ;
   fflush(stdout) ;
#endif
   if ( fch[J] != -1 ) {
      mthread = lasts[J] - firsts[J] + 1 ;
      total   = tmetric[J] - forwardOps[J] ;
#if MYDEBUG > 0
      fprintf(stdout, "\n mthread = %d, total = %.0f", mthread, total) ;
      fflush(stdout) ;
#endif
      for ( I = fch[J], offset = 0.0 ; I != -1 ; I = sib[I] ) {
         firsts[I] = firsts[J] + (int) (mthread*offset/total) ;
#if MYDEBUG > 0
         fprintf(stdout, "\n child %d, offset = %.0f, firsts[%d] = %d",
                 I, offset, I, firsts[I]) ;
         fflush(stdout) ;
#endif
         offset += tmetric[I] ;
         lasts[I] = firsts[J] + (int) (mthread*offset/total) - 1 ;
         if ( lasts[I] < firsts[I] ) {
            lasts[I] = firsts[I] ;
         }
#if MYDEBUG > 0
         fprintf(stdout, "\n child %d, offset = %.0f, lasts[%d] = %d",
                 I, offset, I, lasts[I]) ;
         fflush(stdout) ;
#endif
      }
   }
}
/*
   ---------------------------------------------------------------
   now fill the map IV object and cumops[*] vector with a
   balanced map using the candidate sets via a postorder traversal
   ---------------------------------------------------------------
*/
ownersIV = IV_new() ;
IV_init(ownersIV, nfront, NULL) ;
owners = IV_entries(ownersIV) ;
for ( J = Tree_postOTfirst(tree) ;
      J != -1 ;
      J = Tree_postOTnext(tree, J) ) {
#if MYDEBUG > 0
   fprintf(stdout, "\n front %d, firsts[%d] = %d, lasts[%d] = %d",
           J, J, firsts[J], J, lasts[J]) ;
   fflush(stdout) ;
#endif
   qmin = firsts[J] ;
   for ( q = firsts[J] + 1 ; q <= lasts[J] ; q++ ) {
      if ( cumops[qmin] > cumops[q] ) {
         qmin = q ;
      }
   }
   owners[J] = qmin ;
   cumops[qmin] += forwardOps[J] ;
#if MYDEBUG > 0
   fprintf(stdout, ", owners[%d] = %d, cumops[%d] = %.0f",
           J, owners[J], qmin, cumops[qmin]) ;
   fflush(stdout) ;
#endif
}
/*
   ------------------------
   free the working storage
   ------------------------
*/
DV_free(forwardOpsDV) ;
DV_free(tmetricDV) ;
IVfree(firsts) ;
IVfree(lasts) ;

return(ownersIV) ; }

/*--------------------------------------------------------------------*/
/*
   ----------------------------------------------------------------
   this method constructs and returns an IV object that holds the
   map from fronts to threads for a domain decomposition 
   balanced map of the front tree.
   the domains are mapped to threads using a balanced map,
   and the schur complement fronts are mapped to threads 
   using a balanced map, but the two balanced maps are independent.

   created -- 97jan17, cca
   ----------------------------------------------------------------
*/
IV *
ETree_ddMap (
   ETree    *frontTree,
   DV       *cumopsDV,
   double   cutoff
) {
double   minops ;
double   *cumops, *domainops, *forwardOps, *schurops, *tmetric ;
DV       *forwardOpsDV, *tmetricDV ;
int      ithread, jthread, J, K, ndom, nfront, nthread, root ;
int      *ms, *owners, *par, *rootAnc ;
IV       *msIV, *ownersIV, *rootAncIV ;
Tree     *tree ;
/*
   ---------------
   check the input
   ---------------
*/
if ( frontTree == NULL || cumopsDV == NULL ) {
   fprintf(stderr, "\n fatal error in ETree_ddMap(%p,%p,%f)"
           "\n bad input\n", frontTree, cumopsDV, cutoff) ;
   exit(-1) ;
}
nfront = frontTree->nfront ;
tree   = frontTree->tree ;
par    = tree->par ;
DV_sizeAndEntries(cumopsDV, &nthread, &cumops) ;
/*
   ---------------------------------
   get a vector of forward op counts
   ---------------------------------
*/
forwardOpsDV = ETree_forwardOps(frontTree) ;
DV_sizeAndEntries(forwardOpsDV, &nfront, &forwardOps) ;
#if MYDEBUG > 0
fprintf(stdout, "\n forwardOpsDV") ;
DV_writeForHumanEye(forwardOpsDV, stdout) ;
fflush(stdout) ;
#endif
#if SUBTREE_DEFINITION == SUBTREE_SIZE
/*
   -----------------------------
   get a vector of subtree sizes
   -----------------------------
*/
{ 
DV   *tempDV ;
IV   *tempIV ;
tempIV = ETree_nvtxMetric(frontTree) ;
fprintf(stdout, "\n\n nvtx metric") ;
IV_writeForHumanEye(tempIV, stdout) ;
tempDV = DV_new() ;
for ( J = 0 ; J < nfront ; J++ ) {
   DV_setEntry(tempDV, J, (double) IV_entry(tempIV, J)) ;
}
#if MYDEBUG > 0
fprintf(stdout, "\n\n double nvtx metric") ;
DV_writeForHumanEye(tempDV, stdout) ;
#endif
tmetricDV = Tree_setSubtreeDmetric(tree, tempDV) ;
IV_free(tempIV) ;
DV_free(tempDV) ;
}
#endif
#if SUBTREE_DEFINITION == SUBTREE_OPS
tmetricDV = Tree_setSubtreeDmetric(tree, forwardOpsDV) ;
#endif
/*
   ------------------------
   get a multisector vector
   ------------------------
*/
msIV = IV_new() ;
IV_init(msIV, nfront, NULL) ;
IV_fill(msIV, 0) ;
ms = IV_entries(msIV) ;
#if MYDEBUG > 0
fprintf(stdout, "\n\n double nvtx subtree metric") ;
DV_writeForHumanEye(tmetricDV, stdout) ;
#endif
tmetric   = DV_entries(tmetricDV) ;
cutoff = cutoff * DV_max(tmetricDV) ;
for ( J = 0 ; J < nfront ; J++ ) {
   if ( tmetric[J] < cutoff ) {
      ms[J] = 1 ;
   }
}
#if MYDEBUG > 0
fprintf(stdout, "\n msIV") ;
IV_writeForHumanEye(msIV, stdout) ;
fflush(stdout) ;
#endif
/*
   --------------------------------------------
   create a rootAnc vector, 
   if J is in a domain then
      rootAnc[J] is the root node of the domain
   else
      rootAnc[J] is the root node of the tree
   endif
   --------------------------------------------
*/
rootAncIV = IV_new() ;
IV_init(rootAncIV, nfront, NULL) ;
rootAnc   = IV_entries(rootAncIV) ;
for ( J = nfront - 1 ; J >= 0 ; J-- ) {
   if ( (K = par[J]) == -1 || ms[J] != ms[K] ) {
      rootAnc[J] = J ;
   } else {
      rootAnc[J] = rootAnc[K] ;
   }
}
#if MYDEBUG > 0
fprintf(stdout, "\n rootAncIV") ;
IV_writeForHumanEye(rootAncIV, stdout) ;
fflush(stdout) ;
#endif
/*
   ------------------------------
   initialize the ownersIV object
   ------------------------------
*/
ownersIV = IV_new() ;
IV_init(ownersIV, nfront, NULL) ;
owners = IV_entries(ownersIV) ;
/*
   --------------------------------------------------
   assign the domains to threads using a balanced map
   --------------------------------------------------
*/
domainops = DVinit(nthread, 0.0) ;
root = -1 ;
ndom =  0 ;
for ( J = Tree_postOTfirst(tree) ;
      J != -1 ;
      J = Tree_postOTnext(tree, J) ) {
   if ( ms[J] == 1 ) {
      if ( root != rootAnc[J] ) {
         ndom++ ;
         root    = rootAnc[J] ;
         jthread = 0 ;
         minops  = domainops[0] ;
         for ( ithread = 1 ; ithread < nthread ; ithread++ ) {
            if ( minops > domainops[ithread] ) {
               jthread = ithread ;
               minops  = domainops[ithread] ;
            }
         }
      }
      owners[J] = jthread ;
      domainops[jthread] += forwardOps[J] ;
   }
}
#if MYDEBUG > 0
fprintf(stdout, "\n %d domains", ndom) ;
fprintf(stdout, "\n domainops") ;
DVfprintf(stdout, nthread, domainops) ;
fflush(stdout) ;
#endif
/*
   ------------------------------------------------------------------
   assign the schur complement fronts to threads using a balanced map
   ------------------------------------------------------------------
*/
schurops = DVinit(nthread, 0.0) ;
for ( J = Tree_postOTfirst(tree) ;
      J != -1 ;
      J = Tree_postOTnext(tree, J) ) {
   if ( ms[J] == 0 ) {
      jthread = 0 ;
      minops  = schurops[0] ;
      for ( ithread = 1 ; ithread < nthread ; ithread++ ) {
         if ( minops > schurops[ithread] ) {
            jthread = ithread ;
            minops  = schurops[ithread] ;
         }
      }
      owners[J] = jthread ;
      schurops[jthread] += forwardOps[J] ;
   }
}
#if MYDEBUG > 0
fprintf(stdout, "\n schurops") ;
DVfprintf(stdout, nthread, schurops) ;
fflush(stdout) ;
#endif
/*
   -------------------------------------
   fill the cumulative operations vector
   -------------------------------------
*/
for ( jthread = 0 ; jthread < nthread ; jthread++ ) {
   cumops[jthread] = domainops[jthread] + schurops[jthread] ;
}
#if MYDEBUG > 0
fprintf(stdout, "\n cumops") ;
DVfprintf(stdout, nthread, cumops) ;
fflush(stdout) ;
#endif
/*
   ------------------------
   free the working storage
   ------------------------
*/
DV_free(forwardOpsDV) ;
DV_free(tmetricDV) ;
IV_free(msIV) ;
IV_free(rootAncIV) ;
DVfree(domainops) ;
DVfree(schurops) ;

return(ownersIV) ; }

/*--------------------------------------------------------------------*/
