#include "Bdef.h"
/*
 *  This topology supports trees with arbitrary numbers of branches at
 *  each step.  The following pictures show the tree that should be visualized
 *  when examining the algorithm.
 *
 *    TREE GLOBAL OP, NBRANCHES = 2     *    TREE GLOBAL OP, NBRANCHES = 3
 *                                      *
 * i=4   &______________                *
 *       |              \               *
 * i=2   &______         &______        * i=3     &______________________
 *       |      \        |      \       *         |          \           \
 * i=1   &__     &__     &__     &__    * i=1     &______     &______     &__
 *       |  \    |  \    |  \    |  \   *         |  \   \    |  \   \    |  \
 *       0   1   2   3   4   5   6   7  *         0   1   2   3   4   5   6   7
 */

void tree_comb(ctxt, scope, nbranches, N, length, bp, bp2, rdest1, cdest1, Xvvop)
BLACSCONTEXT  *ctxt;
char  scope;
int  nbranches;
int  N;
int  length;
BLACBUFF  *bp;
BLACBUFF  *bp2;
int  rdest1;
int  cdest1;
VVFUNPTR  Xvvop;
/*
 *  -- V1.0 BLACS routine --
 *  University of Tennessee, February 28, 1995
 *  Written by Clint Whaley.
 *
 *  Purpose
 *  =======
 *  Perform a element-by-element combine on vectors.
 *  If rdest1 = -1, the answer will be left on all participating processes.
 *  Otherwise, only the process at grid coordinates {rdest1, cdest1} will
 *  have the final answer.  Other Processes will have intermediate (useless)
 *  values.
 *
 *  Arguments
 *  =========
 *  CTXT      (input) pointer to BLACSCONTEXT
 *            The BLACS context where operation is taking place.
 *
 *  SCOPE     (input) char
 *            Limit the scope of the operation.
 *            = 'r' :   Operation is performed by a process row.
 *            = 'c' :   Operation is performed by a process column.
 *            = 'a' :   Operation is performed by all processes in grid.
 *
 *  NBRANCHES (input) int
 *            Indicates the degree of the tree to use (see picture above).
 *
 *  N         (input) int
 *            The number of elements in the vector.  N >= 0.
 *
 *  LENGTH    (input) int
 *            The length, in bytes, of the vector to be combined.
 *
 *  BP        (input/output) pointer to BLACBUFF.
 *            BLACBUFF is a special data type used by the BLACS to control
 *            buffers and the asynchronous operations coming out of them.
 *            This BLACBUFF should have a buffer who's first N elements
 *            contain the data to be combined. Additional space may be
 *            required, depending upon what combine is being performed.
 *            In any case, the buffer's length should be LENGTH.
 *
 *  BP2       (workspace) pointer to BLACBUFF.
 *            This BLACBUFF is used to receive information for combining with
 *            this process's information.  It should point to a buffer of size
 *            LENGTH.
 *
 *  RDEST1    (input) int
 *            Process row coordinate of node to receive the answer.
 *            If RDEST1 == -1, all nodes in scope receive the answer.
 *
 *  CDEST1    (input) int
 *            Process column coordinate of node to receive the answer.
 *            If RDEST1 == -1, CDEST is ignored.
 *  Xvvop     (input) pointer to typed operation function
 *            Points to a typed function which performs the required operation
 *            (e.g. summation) on the two N-element vectors.
 *
 * ------------------------------------------------------------------------
 */
{
   char *getbuff();
   void Ssend2d00();
   void Srecv2d00();
   int BuffIsFree();
#if (BeginForceType < EndForceType)
   void Arecv2d00();
#endif

   char *buff, *buff2;
   int nnodes, msgid, dest, rdest, cdest, i, j;
   int nrcvs=0;	  /* Number of ReCeiVeS to do */
   int REBS;	  /* should info be RE-BroadcaSt? */
   int rightedge; /* right-most receiving node */
   int mydist;    /* my distance from destination node */
   int dist;
   int FTmsgid;  /* ID for force type bcast */
   extern int FTrmin, FTcmin, FTamin, FTrcount, FTccount, FTacount;

   if ( (REBS = (rdest1 == -1)) ) rdest1 = cdest1 = 0;

   switch (scope)
   {
   case 'r':
      nnodes = ctxt->npcol;
      if (nnodes < 2) return;
      mydist = (nnodes+ctxt->mycol-cdest1)%nnodes;
      msgid = Mrid(ctxt);
#if (BeginForceType < EndForceType)
         if (REBS)
         {
            dist = mydist;
            FTmsgid = MFTrid();
            if (mydist != 0) Arecv2d00(ctxt, bp, length, FTmsgid);
         }
#endif
      break;
   case 'c':
      nnodes = ctxt->nprow;
      if (nnodes < 2) return;
      mydist = (nnodes+ctxt->myrow-rdest1)%nnodes;
      msgid = Mcid(ctxt);
#if (BeginForceType < EndForceType)
         if (REBS)
         {
            dist = mydist;
            FTmsgid = MFTcid();
            if (mydist != 0) Arecv2d00(ctxt, bp, length, FTmsgid);
         }
#endif
      break;
   case 'a':
      nnodes = ctxt->Ng;
      if (nnodes < 2) return;
      dest = Mvkpnum(ctxt, rdest1, cdest1);
      mydist = (nnodes+ctxt->vIam-dest)%nnodes;
      msgid = Maid(ctxt);
#if (BeginForceType < EndForceType)
         if (REBS)
         {
            dist = mydist;
            FTmsgid = MFTaid();
            if (mydist != 0) Arecv2d00(ctxt, bp, length, FTmsgid);
         }
#endif
      break;
   default :
      return;
   }

   buff = bp->Buff;
   buff2 = bp2->Buff;
   rdest = ctxt->myrow;
   cdest = ctxt->mycol;
   if (nbranches == FULLCON) nbranches = nnodes;
   rightedge = nnodes - 1 - (nnodes-1)%nbranches;

   for (i=1; (i < nnodes); i *= nbranches)
   {

      if (mydist%nbranches)	/* nodes that send to other nodes */
      {

         switch (scope)
         {
         case 'r':
            cdest = (cdest1 + (mydist-mydist%nbranches)*i) % nnodes;
	    break;
	 case 'c':
	    rdest = (rdest1 + (mydist-mydist%nbranches)*i) % nnodes;
	    break;
	 case 'a':
            j = (dest + (mydist-mydist%nbranches)*i) % nnodes;
            Mvpcoord(ctxt, j, rdest, cdest);
	 }
         Ssend2d00(ctxt, buff, length, rdest, cdest, msgid);
	 break;		/* I'm done */
      }
      else
      {
         if (mydist != rightedge) nrcvs = nbranches - 1;
         else nrcvs = (nnodes + i - 1) / i - rightedge - 1;
         mydist /= nbranches;
         rightedge /= nbranches;
         rightedge -= (rightedge % nbranches);

         for (j=0; j < nrcvs; j++)
         {
            Srecv2d00(ctxt, buff2, length, msgid);
            Xvvop(N, buff, buff2);
         }
      }
   }

/*
 * Broadcast answer to everyone if RDEST == -1
 */
   if (REBS)
   {
#if (BeginForceType < EndForceType)
         mydist = dist;
         for (i=2; i < nnodes; i <<= 1);
         if (mydist > 0) BuffIsFree(bp, 1);

         while (i > 1)
         {
            if ( !(mydist%i) )
            {
               i >>= 1;
	       dist = mydist + i;
	       if (dist < nnodes)
	       {
                  switch (scope)
                  {
                  case 'r':
                     Ssend2d00(ctxt, buff, length, rdest, dist, FTmsgid);
	             break;
	          case 'c':
                     Ssend2d00(ctxt, buff, length, dist, cdest, FTmsgid);
	             break;
	          case 'a':
                     Mvpcoord(ctxt, dist, rdest, cdest);
                     Ssend2d00(ctxt, buff, length, rdest, cdest, FTmsgid);
	          }
	       }
            }
            else i >>= 1;
         }
#else
         if (mydist == 0) i = Stree_bs(ctxt, scope, bp->Buff, length, 2);
         else
         {
            i = Stree_br(ctxt, scope, bp->Buff, length, 2, rdest1, cdest1);
         }
#endif
   }
} /* end tree_comb */
