/* ---------------------------------------------------------------------
*
*  -- PSBLAS routine (version 1.0) --
*
*  ---------------------------------------------------------------------
*/
/*
*  Include files
*/
/* This here selects another, and better, algorithm, in that: 
   1. It works well, whereas ours has to be fully debugged
   2. It  delegates the collective operation optimization to the BLACS, 
      so we don't have to do it ourselves. 
   However this means that globX has to be used on all processors, and 
   it will be broadcast first. 
*/
#define LARGE_MEMORY

#include "psblas.h"
void pszscatterm_(m, n, globX,  iglobX, jglobX, lda_globX, 
		 locX, ilocX, jlocX,  lda_locX, 
		 root, desc_data, desc_halo, desc_overlap, overlap_elem,
		 loc_to_glob,work, lwork)
     /*
      *  .. Scalar Arguments ..
      */
     int          * iglobX, * jglobX, * lda_globX, * n, * m, * lwork;
     int          * ilocX, * jlocX, * lda_locX, *root;
     

     /* ..
      *  .. Array Arguments ..
      */
     int         desc_data[],desc_halo[],desc_overlap[],
                 overlap_elem[], loc_to_glob[];
     int         work[];
     double      globX[],locX[];
{
  /*
   *  Purpose
   *  =======
   *
   * PSZSCATTERM distribute dense matrix from root process
   * (which possess it) to every process of the BLACS grid according 
   * to PSBLAS data distribution. 
  */
  
 
/*   *
   *  Notes
   *  =====
   *
   *  Some description vectors are associated with each distributed sparse
   *  matrix. These vectors stores the information required to the
   *  communication needed to perform distributed operations.
   *
   *  They are:
   *
   *  NAME		 EXPLANATION
   *  ------------------ -------------------------------------------------------
   *  MATRIX_DATA	 Array of INTEGER that contains some local and global
   *			 information of matrix.
   *
   *  DESC_HALO          Array of INTEGER that contains informations for local 
   *                     halo points.
   *
   *  DESC_OVRLAP        Array of INTEGER that contains a list of local overlap 
   *                     elements.
   *                     
   *  OVRLAP_ELEM        Array of INTEGER that contains a list of local overlap
   *                     elements with their multiplicity.
   *
   *  Now we explain each of the above vectors.
   *
   *  Let A be a generic sparse matrix. We denote with MATDATA_A the MATRIX_DATA
   *  array for matrix A.
   *  Data stored in MATRIX_DATA array are:
   *
   *  NOTATION        STORED IN		     EXPLANATION
   *  --------------- ---------------------- -------------------------------------
   *  DEC_TYPE        MATDATA_A[DEC_TYPE_]   Decomposition type
   *  M 	      MATDATA_A[M_]          Total number of equations
   *  N 	      MATDATA_A[N_]          Total number of variables
   *  N_ROW           MATDATA_A[N_ROW_]      Number of local equations
   *  N_COL           MATDATA_A[N_COL_]      Number of local variables
   *  CTXT_A          MATDATA_A[CTXT_]       The BLACS context handle, indicating
   *					     the global context of the operation
   *					     on the matrix.
   *					     The context itself is global.
   *
   *  Let DESCHALO_P be the array DESC_HALO for local process.
   *  This is composed of variable dimension blocks for each process to 
   *  communicate to.
   *  Each block contain indexes of local halo elements to exchange with other 
   *  process.
   *  Let P be the pointer to the first element of a block in DESCHALO_P.
   *  This block is stored in DESCHALO_P as :
   *
   *  NOTATION        STORED IN		          EXPLANATION
   *  --------------- --------------------------- -----------------------------------
   *  PROCESS_ID      DESCHALO_P[P+PROC_ID_]      Identifier of process which exchange 
   *						  data with.
   *  N_ELEMENTS_RECV DESCHALO_P[P+N_ELEM_RECV_]  Number of elements to receive.
   *  ELEMENTS_RECV   DESCHALO_P[P+ELEM_RECV_+i]  Indexes of local elements to
   *					          receive. These are stored in the
   *					          array from location P+ELEM_RECV_ to
   *					          location P+ELEM_RECV_+
   *						  DESCHALO_P[P+N_ELEM_RECV_]-1.
   *  N_ELEMENTS_SEND DESCHALO_P[P+N_ELEM_SEND_]  Number of elements to send.
   *  ELEMENTS_SEND   DESCHALO_P[P+ELEM_SEND_+i]  Indexes of local elements to
   *					          send. These are stored in the
   *					          array from location P+ELEM_SEND_ to
   *					          location P+ELEM_SEND_+
   *						  DESCHALO_P[P+N_ELEM_SEND_]-1.
   *
   *
   *  Let DESCOVRLP_P be the array DESC_OVRLAP for local process.
   *  This is composed of variable dimension blocks for each process to 
   *  communicate to.
   *  Each block contain indexes of local overlap elements to exchange with
   *  other process.
   *  Let P be the pointer to the first element of a block in DESCOVRLP_P.
   *  This block is stored in DESCOVRLP_P as :
   *
   *  NOTATION        STORED IN		            EXPLANATION
   *  ------------- ------------------------------- -----------------------------------
   *  PROCESS_ID    DESCOVRLP_P[P+PROC_ID_]         Identifier of process which exchange
   *						    data with.
   *  N_OVRLAP_ELEM DESCOVRLP_P[P+N_OVRLP_ELEM_]    Number of elements to exchange.
   *  OVRLAP_ELEM   DESCOVRLP_P[P+OVRLP_ELEM_TO_+i] Indexes of local elements to
   *					            exchange. These are stored in the
   *					            array from location P+OVRLP_ELEM_ to
   *					            location P+OVRLP_ELEM_+
   *						    DESCOVRLP_P[P+N_OVRLP_ELEM_]-1.
   *
   *
   *  Let OVR_ELEM_P be the array OVERLAP_ELEM for local process.
   *  This is composed of blocks of two elements. The block
   *  corresponding to the i-th overlapped elements, begin at index 
   *  P = i*2 in array OVR_ELEM_P.
   *  This block is stored in OVR_ELEM_P as :
   *
   *  NOTATION      STORED IN		       EXPLANATION
   *  ------------- -------------------------- ----------------------------------
   *  OVRLAP_ELEM   OVR_ELEM_P[P+OVRLP_ELEM_]  The index of local overlapped 
   *					       element.
   *  N_DOMAINS     OVR_ELEM_P[P+N_DOM_OVR_]   The number of copies of
   *					       local overlapped element.
   *
   *
   *  Parameters
   *  ==========
   *
   *  M       (global input) pointer to INTEGER.
   *          The number of rows of X to consider. M >= 0.
   *
   *  N	      (global input) pointer to INTEGER.
   *	      The number of column of X to consider.
   *
   *  globX   (local input valid only for root process) REAL array containing 
   *          all entries of global matrix.
   *
   *  iglobX  (local input valid only for root process) pointer to INTEGER
   *          The global row index of the array Globx
   *
   *  JglobX  (local input  valid only for root process) pointer to INTEGER
   *          The global column index  of the array Globx
   *
   *  lda_globX(local input) pointer to INTEGER
   *          The leading dimension of the array Globx.
   *
   *  locX    (local output) REAL array containing the local pieces of 
   *          a distributed matrix.
   *          This array contains the entries of the distributed vector
   *          sub( X ).
   *
   *  ilocX   (global input) pointer to INTEGER
   *          The global row index of the subvector of the distributed
   *          matrix X, to operate on.
   *
   *  JlocX   (global input) pointer to INTEGER
   *          The global column index of the subvector of the distributed
   *          matrix X to operate on.
   *
   *  lda_locX(local input) pointer to INTEGER
   *          The leading dimension of local dense matrix locX.
   *
   *  DESCDATA (global and local input) INTEGER array. Is the MATRIX_DATA
   *           array.
   *
   *  DESCHALO (local input) INTEGER array. Is the DESC_HALO array.
   *
   *  DESCOVRLAP (local input) INTEGER array. Is the DESC_OVRLAP array.
   *
   *  DESCOVRLAPELEM (local input) INTEGER array. Is the OVRLAP_ELEM array.
   *
   *  loc_to_glob (local input) INTEGER array. It contains for each local element i
   *               the coresponding global index. 
   *
   *  WORK       (local input) REAL array. Work area to memorize intermediate 
   *		 results.
   *
   *  LWORK      (local input) pointer to INTEGER. Dimension of Work area.
   *
   *  =====================================================================
*/
     /*
   *  .. Local Scalars ..
   */
   int i,j,k;
   int nDim, ictxt, iilocX, jjlocX, max_size, proc,lwork1,
   mycol, myrow, nprow, npcol, incx=1, ione=1,WorkSize,pnt_X3;

   /*
    * .. Local Array ..
    */
   double *tmpx,*tmpglobX,*tmplocX;
   int *X;
   /* ..
    *  .. External Functions ..
    */
   void        blacs_gridinfo();
   void        igamx2d();
   void        pbchkvect();
   void        pberror_();
   
  /* Error variables */
  int         info=0, ierror=0, err;
  int         int_err[5];
  double      real_err[5];

  /* ..
   *  .. Executable Statements ..
   *
   *  Get grid parameters
   */
  ictxt = desc_data[CTXT_];
  Cblacs_gridinfo( ictxt, &nprow, &npcol, &myrow, &mycol );
  /*
   *  Test the input parameters
   */
  info = 0;
  if ( nprow == -1 ) {
    info = 2010;
  } else {
    pbchkglobvect( *m, *n, 1, *iglobX, *jglobX, *lda_globX, desc_data, 12, 3,
		   &info, int_err);
    pbchkvect( *m, *n, 1, *ilocX, *jlocX, *lda_locX, desc_data, 12, 7,
	       &iilocX, &jjlocX, &info, int_err);
    err = info;
    Cdgamx2d(ictxt, ALL, TOPDEF, 1, 1, &err, 1, tmpx, tmpx, -1 ,-1,-1);
    
    if (err == 0)
      if ((*n != 0) && (*m != 0))
	if (iilocX == 1) /* Column vector */  {
	  if (*root == -1) {
	    
	    for (j=0; j<*n; j++) {
	      tmpglobX = &globX[2*j*(*lda_globX)]; 
	      tmplocX = &locX[2*j*(*lda_locX)]; 
	      for (i=0; i<desc_data[N_ROW_]; i++) {
		k = loc_to_glob[i] -1;
		tmplocX[2*i] = tmpglobX[2*k] ;
		tmplocX[2*i+1] = tmpglobX[2*k+1] ;
	      }		
	    }
	    
	  } else { 
	    
	    if (*root == myrow)  {
#if defined (LARGE_MEMORY) 
	      Cdgebs2d(ictxt,"All"," ", *m, *n, globX, 2*(*lda_globX));
	      for (j=0; j<*n; j++) {
		tmpglobX = &globX[2*j*(*lda_globX)]; 
		tmplocX = &locX[2*j*(*lda_locX)]; 
		for (i=0; i<desc_data[N_ROW_]; i++) {
		  k = loc_to_glob[i] -1;
		  tmplocX[2*i] = tmpglobX[2*k] ;
		  tmplocX[2*i+1] = tmpglobX[2*k+1] ;
		}		
	      }	      
#else 	      
	      max_size=0;
	      /* put on work[i] i-th proc of DESC_DATA[N_ROW_] */
	      for (proc=0; proc < nprow ; proc ++)  {
		if (proc!=*root) { 
		  Cigerv2d(ictxt,ione,ione,&work[proc],ione,proc,mycol);
		} else
		  work[proc]=desc_data[N_ROW_];
		if (work[proc]>max_size) max_size=work[proc];
	      }
	      WorkSize= ((nprow+1)*sizeof(int)+2*(max_size+1)*sizeof(double)+
			 max_size*sizeof(int))/sizeof(int);
	      if (*lwork < WorkSize) {
		if ((X = (int *)malloc(WorkSize*sizeof(int))) == NULL)
		  ierror = WorkSize*sizeof(int);
		
#ifdef PS_CONTROL_LEVEL
		printf("Alloc memory in PSI_dSwapData: %d elements",WorkSize);
#endif
		for (i=0;i<nprow;i++)
		  X[i]=work[i];
	      }  else
		X = (int *)work;
	      pnt_X3=((nprow+max_size+1)/(2*sizeof(double)+1))*(2*sizeof(double));  
	      /* third work area
		 must be allineated
		 respect "double" type*/     
	      for (j=0;j<*n; j++)/* for each column vector */ 	{
		tmpglobX = &globX[2*j*(*lda_globX)]; /* Pointer to the globX vector */
		tmplocX = &locX[2*((jjlocX+j-1)*(*lda_locX)+iilocX-1)]; 
		/* Pointer to the locX vector */
		for (proc=0; proc< nprow; proc++)
		  /* send to each process "proc" its internal elements*/
		  PSI_zSendv(desc_data,proc,loc_to_glob,X[proc],&X[nprow+1],
			     (double*) (&X[pnt_X3]),tmpglobX,tmplocX);
		
	      }
	      /* update halo */
	      lwork1=(WorkSize/(2*sizeof(double)))*sizeof(int);
	      
	      tmplocX = &locX[(jjlocX-1)*(*lda_locX)+iilocX-1]; 
	      /* Pointer to the locX vector */
	      /* Swap Data*/
	      PSI_zSwapData(*n, tmplocX,2*(*lda_locX), desc_data, desc_halo, (double *)X,&lwork1,&info);
	      /* Check for allocation error in PSI_zSwapData */
	      if (info != 0) {
		err = 1;
		if (info < 0)
		  info = 0 ;
		else  {
		  int_err[0] = info;
		  info = 2020;
		}
	      }
	      
#endif 
	    } else {
#if defined(LARGE_MEMORY) 
	      Cdgebr2d(ictxt,"All"," ", *m, *n, globX, *lda_globX,*root,mycol);
	      for (j=0; j<*n; j++) {
		tmpglobX = &globX[2*j*(*lda_globX)]; 
		tmplocX = &locX[2*j*(*lda_locX)]; 
		for (i=0; i<desc_data[N_ROW_]; i++) {
		  k = loc_to_glob[i] -1;
		  tmplocX[2*i] = tmpglobX[2*k] ;
		  tmplocX[2*i+1] = tmpglobX[2*k+1] ;
		}		
	      }	      
#else 
	      /* send number of local col*/
	      Cigesd2d(ictxt,ione,ione,&desc_data[N_ROW_],ione,*root,mycol);
	      lwork1=(*lwork/(2*sizeof(double)))*sizeof(int);
	      
	      /* for each column vector */
	      for (j=0; j < *n; j++) {
		tmplocX = &locX[2*((jjlocX+j-1)*(*lda_locX)+iilocX-1)]; 
		/* Pointer to the locX vector */
		
		/* receive internal values*/
		PSI_dRecvv(desc_data,loc_to_glob,tmplocX,*root);
	      }		
		/* Swap Data*/
	      tmplocX = &locX[2*((jjlocX-1)*(*lda_locX)+iilocX-1)]; 		
	      PSI_zSwapData(*n, tmplocX, 2*(*lda_locX), desc_data, desc_halo,
			    (double *) work,&lwork1,&info);
	      /* Check for allocation error in PSI_zSwapData */
	      if (info != 0)  {
		err = 1;
		if (info < 0)
		  info = 0 ;
		else  {
		    int_err[0] = info;
		    info = 2020;
		}
	      }
	      
#endif
	    }	    	    
	  }	  	  
	}  else			/* Case: x is not Column vector */
	  info = 3060 ;
  }
  
  if( (err != 0) || (info != 0) ) {
    psderror_( &ictxt, &info, "PSZSCATTERM", int_err, real_err );
    return;
  }
}




         

