/* ---------------------------------------------------------------------
*
*  -- PSBLAS routine (version 1.0) --
*
*  ---------------------------------------------------------------------
*/
/*
*  Include files
*/
#include "psblas.h"
#include <stdlib.h>

/* #define DEBUGSPSV__ */

void pszspsm_( trans, m, n, alpha, unitd, D, pr, A, ia1, ia2, fida, descra, infoa,
	      pc, ia, ja, X, ix, jx, lldx, beta, Y, iy, jy, lldy, update, desc_data,
	      desc_halo, desc_ovrlap, desc_ovrlap_elem, work, lwork)
     /*
      *  .. Scalar Arguments ..
      */
     F_CHAR      trans, unitd;
     int         * ia, * ix, * ja, * jx, * lldx, * lldy, 
                 * iy, * jy, * n, * m, * lwork;
     unsigned int * update;

     /* ..
      * .. Array Arguments ..
      */
     int         desc_data[], desc_halo[],
                 desc_ovrlap[], desc_ovrlap_elem[];
     int	 ia1[], ia2[], infoa[], pr[], pc[];
     char	 fida[5], descra[11];
     double      A[], D[], X[], Y[], work[], * alpha, * beta;
{
  /*
   *  Purpose
   *  =======
   *
   *  PSZSPSM performs one of the distributed matrix-vector operations
   *
   *     sub( Y ) := alpha*Pr*sub( T )-1 * Pc*sub( X ) + beta*sub (Y ),   or
   *     sub( Y ) := alpha*D*Pr*sub( T )-1 * Pc*sub( X ) + beta*sub (Y ), or
   *     sub( Y ) := alpha*Pr*sub( T )-1 * Pc*D*sub( X ) + beta*sub (Y ),  or
   *     sub( Y ) := alpha*Pr*sub( T )-T * Pc*sub( X ) + beta*sub (Y ),   or
   *     sub( Y ) := alpha*D*Pr*sub( T )-T * Pc*sub( X ) + beta*sub (Y ), or
   *     sub( Y ) := alpha*Pr*sub( T )-T * Pc*D*sub( X ) + beta*sub (Y ),  or
   *
   *  where sub( T ) denotes T(IT:IT+M-1,JT:JT+M-1),
   *
   *        sub( X ) denotes if TRANS = 'N'
   *                       X(IX:IX+M-1,JX:JX+N-1),
   *                    else if TRANS = 'T'
   *                       X(IX:IX+N-1,JX:JX+M-1)
   *                    else if TRANS = 'C'
   *                       conjug(X(IX:IX+N-1,JX:JX+M-1))
   *                    end if
   *
   *  sub( X ) is a distributed
   *  vector and sub( T ) is a M-by-M distributed triangular submatrix.
   *
   *  Notes
   *  =====
   *
   *  Some description vectors are associated with each distributed sparse
   *  matrix. These vectors stores the information required to the
   *  communication needed to perform distributed operations.
   *
   *  They are:
   *
   *  NAME		 EXPLANATION
   *  ------------------ -------------------------------------------------------
   *  MATRIX_DATA	 Array of INTEGER that contains some local and global
   *			 information of matrix.
   *
   *  DESC_HALO          Array of INTEGER that contains informations for local 
   *                     halo points.
   *
   *  DESC_OVRLAP        Array of INTEGER that contains a list of local overlap 
   *                     elements.
   *                     
   *  OVRLAP_ELEM        Array of INTEGER that contains a list of local overlap
   *                     elements with their multiplicity.
   *
   *  Now we explain each of the above vectors.
   *
   *  Let A be a generic sparse matrix. We denote with MATDATA_A the MATRIX_DATA
   *  array for matrix A.
   *  Data stored in MATRIX_DATA array are:
   *
   *  NOTATION        STORED IN		     EXPLANATION
   *  --------------- ---------------------- -------------------------------------
   *  DEC_TYPE        MATDATA_A[DEC_TYPE_]   Decomposition type
   *  M 	      MATDATA_A[M_]          Total number of equations
   *  N 	      MATDATA_A[N_]          Total number of variables
   *  N_ROW           MATDATA_A[N_ROW_]      Number of local equations
   *  N_COL           MATDATA_A[N_COL_]      Number of local variables
   *  CTXT_A          MATDATA_A[CTXT_]       The BLACS context handle, indicating
   *					     the global context of the operation
   *					     on the matrix.
   *					     The context itself is global.
   *
   *  Let DESCHALO_P be the array DESC_HALO for local process.
   *  This is composed of variable dimension blocks for each process to 
   *  communicate to.
   *  Each block contain indexes of local halo elements to exchange with other 
   *  process.
   *  Let P be the pointer to the first element of a block in DESCHALO_P.
   *  This block is stored in DESCHALO_P as :
   *
   *  NOTATION        STORED IN		          EXPLANATION
   *  --------------- --------------------------- -----------------------------------
   *  PROCESS_ID      DESCHALO_P[P+PROC_ID_]      Identifier of process which exchange 
   *						  data with.
   *  N_ELEMENTS_RECV DESCHALO_P[P+N_ELEM_RECV_]  Number of elements to receive.
   *  ELEMENTS_RECV   DESCHALO_P[P+ELEM_RECV_+i]  Indexes of local elements to
   *					          receive. These are stored in the
   *					          array from location P+ELEM_RECV_ to
   *					          location P+ELEM_RECV_+
   *						  DESCHALO_P[P+N_ELEM_RECV_]-1.
   *  N_ELEMENTS_SEND DESCHALO_P[P+N_ELEM_SEND_]  Number of elements to send.
   *  ELEMENTS_SEND   DESCHALO_P[P+ELEM_SEND_+i]  Indexes of local elements to
   *					          send. These are stored in the
   *					          array from location P+ELEM_SEND_ to
   *					          location P+ELEM_SEND_+
   *						  DESCHALO_P[P+N_ELEM_SEND_]-1.
   *
   *
   *  Let DESCOVRLP_P be the array DESC_OVRLAP for local process.
   *  This is composed of variable dimension blocks for each process to 
   *  communicate to.
   *  Each block contain indexes of local overlap elements to exchange with
   *  other process.
   *  Let P be the pointer to the first element of a block in DESCOVRLP_P.
   *  This block is stored in DESCOVRLP_P as :
   *
   *  NOTATION        STORED IN		            EXPLANATION
   *  ------------- ------------------------------- -----------------------------------
   *  PROCESS_ID    DESCOVRLP_P[P+PROC_ID_]         Identifier of process which exchange
   *						    data with.
   *  N_OVRLAP_ELEM DESCOVRLP_P[P+N_OVRLP_ELEM_]    Number of elements to exchange.
   *  OVRLAP_ELEM   DESCOVRLP_P[P+OVRLP_ELEM_TO_+i] Indexes of local elements to
   *					            exchange. These are stored in the
   *					            array from location P+OVRLP_ELEM_ to
   *					            location P+OVRLP_ELEM_+
   *						    DESCOVRLP_P[P+N_OVRLP_ELEM_]-1.
   *
   *
   *  Let OVR_ELEM_P be the array OVERLAP_ELEM for local process.
   *  This is composed of blocks of two elements. The block
   *  corresponding to the i-th overlapped elements, begin at index 
   *  P = i*2 in array OVR_ELEM_P.
   *  This block is stored in OVR_ELEM_P as :
   *
   *  NOTATION      STORED IN		       EXPLANATION
   *  ------------- -------------------------- ----------------------------------
   *  OVRLAP_ELEM   OVR_ELEM_P[P+OVRLP_ELEM_]  The index of local overlapped 
   *					       element.
   *  N_DOMAINS     OVR_ELEM_P[P+N_DOM_OVR_]   The number of copies of
   *					       local overlapped element.
   *
   *  Parameters
   *  ==========
   *  TRANS   (local input) pointer to CHARACTER
   *          On entry, TRANS specifies if to perfom transposed operations
   *
   *  M       (global input) pointer to INTEGER
   *          The dimension of the distributed tringular submatrix 
   *	      sub( T ). N >= 0.
   *
   *  N       (global input) pointer to INTEGER
   *          The number of columns of dense matrix X to operate on.
   *
   *  UNITD   (local input) pointer to CHARACTER
   *          Specify some type of operation with the diagonal matrix D.
   *
   *  D       (local input) array of REAL
   *          Diagonal scaling matrix.
   *
   *  PR      (local input) array of INTEGER
   *          Permutation matrix.
   *
   *  T       (local input) REAL pointer into the local memory
   *          to an array containing the values of
   *          local pieces of the distributed sparse matrix sub( A ).
   *
   *  IT1     (local input) INTEGER array containing the column
   *          coordinates of the values of local pieces of the
   *          distributed sparse matrix sub( T ).
   *
   *  IT2     (local input) INTEGER array containing the coordinates
   *          of the first element of a row of the
   *          distributed sparse matrix sub( T ) in the array of values 'T'.
   *          The coordinates of the first element of row i of the
   *          distributed sparse matrix sub( T ) in the array of values 'T' 
   *          is IT2[i].
   *
   *  FIDT    (global input) pointer to CHARACTER containing a string
   *          that indicates the type of sparse matrix representation.
   *
   *  DESCRT  (global input) pointer to CHARACTER containing the type of
   *          sparse matrix.
   *             DESCRT[1] = 'G' General
   *             DESCRT[1] = 'S' Symmetric
   *             DESCRT[1] = 'H' Hermitian
   *             DESCRT[1] = 'T' Triangular
   *             DESCRT[1] = 'A' Anti-symmetric
   *             DESCRT[1] = 'D' Diagonal
   *             DESCRT[2] = 'U' Upper
   *             DESCRT[2] = 'L' Lower
   *             DESCRT[3] = 'U' Unit diagonal
   *             DESCRT[3] = 'N' Non Unit diagonal
   *
   *  INFOT   (global output) INTEGER array containing user defined 
   *          information.
   *
   *  PC      (local input) array of INTEGER
   *          Permutation matrix.
   *
   *  IT      (global input) pointer to INTEGER
   *          The global row index of the submatrix of the distributed
   *          sparse matrix T to operate on.
   *
   *  JT      (global input) pointer to INTEGER
   *          The global column index of the submatrix of the distributed
   *          sparse matrix T to operate on.
   *
   *  X       (local input) REAL array containing the local
   *          pieces of a distributed matrix.
   *          This array contains the entries of the distributed vector
   *          sub( X ).
   *
   *  IX      (global input) pointer to INTEGER
   *          The global row index of the submatrix of the distributed
   *          matrix X to operate on.
   *
   *  JX      (global input) pointer to INTEGER
   *          The global column index of the submatrix of the distributed
   *          matrix X to operate on.
   *
   *  LLDX    (local input) pointer to INTEGER
   *          The leading dimension of local dense matrix X.
   *          
   *  Y       (local input/local output) REAL array
   *          containing the local pieces of a distributed matrix.
   *          This array contains the entries of the distributed vector
   *          sub( Y ).
   *          On exit sub( Y ) contains the result of the operation.
   *
   *  IY      (global input) pointer to INTEGER
   *          The global row index of the submatrix of the distributed
   *          matrix Y to operate on.
   *
   *  JY      (global input) pointer to INTEGER
   *          The global column index of the submatrix of the distributed
   *          matrix Y to operate on.
   *
   *  LLDY    (local input) pointer to INTEGER
   *          The leading dimension of local dense matrix Y.
   *
   *  UPDATE  (global input) pointer to INTEGER
   *          If UPDATE == 1 update halo and overlap elements otherwise, 
   *          don't update.
   *
   *  DESCDATA (global and local input) INTEGER array. Is the MATRIX_DATA
   *           array.
   *
   *  DESCHALO (local input) INTEGER array. Is the DESC_HALO array.
   *
   *  DESCOVRLAP (local input) INTEGER array. Is the DESC_OVRLAP array.
   *
   *  DESCOVRLAPELEM (local input) INTEGER array. Is the OVRLAP_ELEM array.
   *
   *  WORK	(local input) REAL array. Is a work array for routines DCSSM
   *		and PSI_sSwapOverlap.
   *
   *  LWORK	(local input/iutput) pointer to INTEGER. The dimension of array
   *		WORK.
   *
   *  =====================================================================
   *
   *  .. Local Scalars ..
   */
  char        TrA;
  int         ictxt, iia, iix, iiy,
              jja, jjx, jjy, i, j,
              mycol, myrow,
              nota, nprow, npcol;

  /* Error variables */
  int         info=0, ierror=0, err;
  int         int_err[5];
  double      real_err[5];

  /* Work area */
  double *Result, *work1;
  int lwork1;

  /*
   * .. Local Array
   */
  double  *tmpx, *tmpy;

  /* ..
   *  .. External Functions ..
   */
  void        blacs_gridinfo_();
  void        pbchkmat();
  void        pbchkvect();
  void        psderror_();
  void        zcssm();
  /* ..
   *  .. Executable Statements ..
   *
   *  Get grid parameters
   */
  ictxt = desc_data[CTXT_];
  Cblacs_gridinfo( ictxt, &nprow, &npcol, &myrow, &mycol );
  /*
   *  Test the input parameters
   */
  info = 0;
  if( nprow == -1 ) {
    info = 2010;
  } else {
    TrA = Mupcase( F2C_CHAR( trans )[0] );
    nota = ( TrA == 'N' );
    
    if ((*lldx < desc_data[N_COL_]) || (*lldy < desc_data[N_COL_]))
      info = 3010;
    
    if( (TrA != 'N') && (TrA != 'T') && (TrA != 'C')) {
      info = 70;	        /* Error Code */
      int_err[0] = 1;	/* Argument position */
    }
    /* Now it's implemented
       else
       if( TrA == 'C')
       {
       info = 3020;       
       int_err[0] = 1;	
       }
    */
    
    pbchkmat( *m, 2, *m, 3, *ia, *ja, desc_data, 27, 8, &iia, &jja,
	      &info, int_err);
    
    pbchkvect( *m, *n, 2, *ix, *jx, *lldx, desc_data, 27, 17, &iix, &jjx,
	       &info, int_err);
    pbchkvect( *m, *n, 2, *iy, *jy, *lldy, desc_data, 27, 22, &iiy, &jjy,
	       &info, int_err);
    
    err = info;
    Cigamx2d(ictxt, ALL, TOPDEF, 1, 1, &err, 1, tmpx, tmpx, -1 ,-1,-1);
    
    if( err == 0 ) {
      if (*ja == *ix) {
	/* Initialize work array Result */
	if ((iix == 1) && (iiy == 1))
	  if (desc_data[N_ROW_] > *lwork) {
	    /* Allocate memory */
	    if ((Result = (double *)
		 malloc(desc_data[N_ROW_] * 2 * sizeof(double))) == NULL) {
	      info = 2020;
	      int_err[0] = desc_data[N_ROW_] * 2 * sizeof(double);
	    }
	    
	    work1 = work;
	    work1[0] = 0;
	    lwork1 = *lwork;
#ifdef PS_CONTROL_LEVEL
	    printf("Alloc memory in psdspsm: %d elements",desc_data[N_ROW_]);
#endif
	  } else {
	    Result = work;
	    lwork1 = *lwork - desc_data[N_ROW_];
	    work1 = work + desc_data[N_ROW_]; 
	    work1[0] = 0;
	  }
	else
	  info = 3040;
	
	/* Check for ERRORS */
	err = info;
	Cigamx2d(ictxt, ALL, TOPDEF, 1, 1, &err, 1, tmpx, tmpx, -1 ,-1,-1);
	
	if (err != 0) {
	  psderror_( &ictxt, &info, "PSZSPSM\0", int_err, real_err );
	  return;
	}
	
	/* Triangular solve */
	if ((iix == 1) && (iiy == 1)) {
	  tmpx = &X[2 * ((jjx-1)*(*lldx)+iix-1)];
	  tmpy = &Y[2 * ((jjy-1)*(*lldy)+iiy-1)];
#ifdef DEBUGSPSV__
	  printf("PSZSPSM:Before X computation\n");
	  for (i = 0; i < desc_data[N_ROW_]; i++)
	    printf ("X[%d] = (%lf,%lf)\n", i+1, tmpx[2 * i],
		    tmpx[2 * i + 1]);
#endif
	  zcssm(&TrA, &desc_data[N_ROW_], n, alpha, unitd, D, pr, 
		fida, descra, A, ia1, ia2, infoa, pc, tmpx, lldx, 
		beta, tmpy, lldy, work1, &lwork1, &ierror);
	  
	  /* Check for ERRORS */
	  if (ierror != 0) {
	    info = 2010;
	    int_err[0] = ierror;
	  }  else  {
#ifdef DEBUGSPSV__
	    printf("\nPSZSPSM:After local computation\n");
	    printf("Vector Y\n");
	    for (i = 0; i < desc_data[N_ROW_]; i++)
	      printf ("Y[%d] = (%lf,%lf)\n", i+1, tmpy[2 * i],
		      tmpy[2 * i + 1]);
#endif
	  }
	}
	
	err = info;
	Cigamx2d(ictxt, ALL, TOPDEF, 1, 1, &err, 1, tmpx, tmpx, -1 ,-1,-1);
	
	if (err != 0) {
	  psderror_( &ictxt, &info, "PSZSPSM\0", int_err, real_err );
	  return;
	}
	
	if ((iix == 1) && (iiy == 1))
	  if (*update)  {
	    /* Update Overlap and Halo */
	    for (j=0; j < *n; j++) {
	      tmpy = &Y[2 * ((jjy + j - 1) * (*lldy) + iiy - 1)];	      
	      i = 0;
	      while (desc_ovrlap_elem[i] != -1)  {
		Result[2 * (desc_ovrlap_elem[i + OVRLP_ELEM_] - 1)] = 
		  tmpy[2 * (desc_ovrlap_elem[i + OVRLP_ELEM_] - 1)];
		Result[2 * (desc_ovrlap_elem[i + OVRLP_ELEM_] - 1) + 1] = 
		  tmpy[2 * (desc_ovrlap_elem[i + OVRLP_ELEM_] - 1) + 1];
		i += 2;
	      } 
	      
#ifdef DEBUGSPSV__
	      printf("\nPSZSPSM:Before exchange Overlap\n");
	      printf("Result Vector\n");
	      for (i = 0; i < desc_data[N_ROW_]; i++)
		printf ("Sum[%d] = (%lf,%lf)\n", i+1, Result[2 * i],
			Result[2 * i + 1]);
#endif
	      
	      PSI_zSwapOverlap(tmpy, Result, desc_data, 
			       desc_ovrlap, work1, &lwork1, &info ) ;
	      
	      /* Check for allocation error in PSI_zSwapOverlap */
	      if (info != 0) {
		err = 1;
		if (info < 0)
		  info = 0 ;
		else {
		  int_err[0] = info;
		  info = 2020;
		}
	      }
	      
	      if (info == 0) {
#ifdef DEBUGSPSV__
		printf("\nPSZSPSM:After data exchange\n");
		printf("Result Vector\n");
		for (i = 0; i < desc_data[N_ROW_]; i++)
		  printf ("Sum[%d] = (%lf,%lf)\n", i+1, Result[2 * i],
			  Result[2 * i + 1]);
#endif
		/* Update tmpy with the average of overlapped
		   elements over all domains */
		i = 0;
		while (desc_ovrlap_elem[i] != -1) {
		  tmpy[2 * (desc_ovrlap_elem[i + OVRLP_ELEM_] - 1)] = 
		    Result[2 * (desc_ovrlap_elem[i + OVRLP_ELEM_] - 1)]
		    / desc_ovrlap_elem[i + N_DOM_OVR_];
		  tmpy[2 * (desc_ovrlap_elem[i + OVRLP_ELEM_] - 1)
		       + 1] = 
		    Result[2 * (desc_ovrlap_elem[i + OVRLP_ELEM_] - 1)
			   + 1] / desc_ovrlap_elem[i + N_DOM_OVR_];
		  i += 2;
		}
		
	      }
	    }
	  }
	
	/* Deallocate memory if necessary */
	if (desc_data[N_ROW_] > *lwork)
	  /* DeAllocate memory */
	  free(Result);
      }
    }
  }
  
  if( (err != 0) || (info != 0)) {
    psderror_( &ictxt, &info, "PSZSPSM\0", int_err, real_err );
    return;
  }
}








