/* ---------------------------------------------------------------------
*
*  -- PSBLAS routine (version 1.0) --
*
*  ---------------------------------------------------------------------
*/
/*
*  Include files
*/
#include "psblas.h"

/* #define DEBUG_SWAP_ */
/* #define DEBUG */
/* Internal Functions */
static void Update_Data(int Y[], int X[], int point_to_proc, int desc_halo[]);
static void Extract(int Y[], int X[], int point_to_proc, int desc_halo[]);

#define Send_Data(X,ndim,proc,ictxt)	Cigesd2d(ictxt,ndim,1,X,ndim,proc,0)
#define Receive_Data(X,ndim,proc,ictxt) Cigerv2d(ictxt,ndim,1,X,ndim,proc,0)

void PSI_iSwapData(n,Y,ly, desc_data, desc_halo, work, lwork, ierror)
     /* ..
      *  .. Scalar Arguments ..
      */
     int n, ly;
     int *lwork, *ierror;
     /* ..
      *  .. Array Arguments ..
      */
     int         desc_data[], desc_halo[];
     int      Y[], work[];

{
  /*
   *  Purpose
   *
   *  Updates the HALO points.
   *
   *
   *************************
   *  =======
   *  Parameters
   *  ==========
   *
   *  N       (global input) INTEGER
   *          The number of columns of Y to operate on.
   *
   *  Y       (local input) REAL array
   *          This array contains the entries of the distributed vector
   *          sub( Y ).  On exit, sub( Y ) is overwritten by the updated
   *          distributed vector sub( Y ).
   *
   *  LY      (local input) INTEGER
   *          The leading dimension of vector Y.
   * 
   *  DESCDATA (global and local input) INTEGER array of dimension 8.
   *          The array descriptor of the decomposition.
   *
   *  DESCHALO (local input) INTEGER array.It contains informations for local 
   *                     halo points. 
   *
   *  WORK       (local input) REAL array. Work area to memorize intermediate 
   *		 results.
   *
   *  LWORK      (local input) pointer to INTEGER. Dimension of Work area.
   *
   *
   *  =====================================================================
   *
   *  .. Local Scalars ..
   */
  int domain;
  int ictxt;
  int nprow, npcol, myrow, mycol;
  int point_to_proc;		/* Pointer to the current process to communicate to */
  int proc_to_comm;		/* Processor to communicate to */
  int ndim;			/* Dimension of vector to send/receive */
  int *X;			/* Work Array */
  int LW,WorkSize;			/* Dimension of Work area */
  int i, loc_dom;
  int err;
  int int_err[5];
  double real_err[5];

  ictxt = desc_data[CTXT_];
  Cblacs_gridinfo( ictxt, &nprow, &npcol, &myrow, &mycol );

#ifdef DEBUG_SWAP_
  printf("PSI_sSwap_Data\n");
  printf("ictxt= %d myrow = %d mycol = %d nprow =%d npcol %d\n", ictxt, myrow, mycol, nprow, npcol);
#endif
  
  /* Initialize Work Area */
  point_to_proc = 0;
  
  proc_to_comm = desc_halo[PROC_ID_];

  WorkSize = 0;
 
  /* Search for max Work area size */

  /* For each processor to send/receive to/from */
  while (proc_to_comm != -1) {
    /* Update size of Worka Area */
    WorkSize = max(WorkSize, desc_halo[point_to_proc + N_ELEM_RECV_]);
    WorkSize = max(WorkSize, desc_halo[point_to_proc + N_ELEM_SEND_ + 
				      desc_halo[point_to_proc + N_ELEM_RECV_]]);
    
    /* Update proc_to_send and proc_to_recv */
    point_to_proc += desc_halo[point_to_proc + N_ELEM_RECV_]+
      desc_halo[point_to_proc + N_ELEM_SEND_ + desc_halo[point_to_proc + N_ELEM_RECV_]]+3;
    
    proc_to_comm = desc_halo[point_to_proc + PROC_ID_];
  }
  LW = WorkSize;
  WorkSize *= n; 
#ifdef DEBUG
  printf("PSI_sSwap_Data\n");
  printf("WorkSize: %d LW %d\n", WorkSize,LW);
#endif
  *ierror = 0;
  /* Alloc memory if necessary */
  if (WorkSize > *lwork)  {
    if ((X = (int *)malloc(WorkSize*sizeof(int))) == NULL)
      *ierror = WorkSize*sizeof(int);
    
#ifdef PS_CONTROL_LEVEL
    printf("Alloc memory in PSI_dSwapData: %d elements",WorkSize);
#endif
  }  else {
    X = (int *)work;
  }
  
  /* Check for ERRORS */
  err = *ierror;
  Cigamx2d(ictxt, ALL, TOPDEF, 1, 1, &err, 1, X, X, -1 ,-1,-1);

  if (err)
    return;

  point_to_proc = 0;
  proc_to_comm  = desc_halo[PROC_ID_];
  
  /* For each processor to send/receive to/from */
  while (proc_to_comm != -1)  {
    if (proc_to_comm < myrow) {
      /* First, I send */
      for (i=0; i<n; i++) 
	Extract(&Y[i*ly], &X[i*LW], point_to_proc, desc_halo);		
      /* Send_Data(X,desc_halo[point_to_proc+desc_halo[point_to_proc+N_ELEM_RECV_]+ */
/*  			   N_ELEM_SEND_],proc_to_comm,ictxt); */
      Cigesd2d(ictxt,desc_halo[point_to_proc+desc_halo[point_to_proc+N_ELEM_RECV_]+
			      N_ELEM_SEND_],n,X,LW,proc_to_comm,0);
      /* then, I receive */
/*        Receive_Data(X,desc_halo[point_to_proc+N_ELEM_RECV_],proc_to_comm,ictxt); */
      Cigerv2d(ictxt,desc_halo[point_to_proc+N_ELEM_RECV_],n,X,LW,proc_to_comm,0);
      for (i=0; i<n; i++) 
	Update_Data(&Y[i*ly], &X[i*LW], point_to_proc, desc_halo);
    } else if (proc_to_comm > myrow) {
      /* First, I receive */
      Cigerv2d(ictxt,desc_halo[point_to_proc+N_ELEM_RECV_],n,X,LW,proc_to_comm,0);
      for (i=0; i<n; i++) 
	Update_Data(&Y[i*ly], &X[i*LW], point_to_proc, desc_halo);
      /* then, I send */
      for (i=0; i<n; i++) 
	Extract(&Y[i*ly], &X[i*LW], point_to_proc, desc_halo);		
      /* Send_Data(X,desc_halo[point_to_proc+desc_halo[point_to_proc+N_ELEM_RECV_]+ */
/*  			   N_ELEM_SEND_],proc_to_comm,ictxt); */
      Cigesd2d(ictxt,desc_halo[point_to_proc+desc_halo[point_to_proc+N_ELEM_RECV_]+
			      N_ELEM_SEND_],n,X,LW,proc_to_comm,0);
    } else {
      /* I send to myproc */
      for (i=0; i<n; i++) 
	Extract(&Y[i*ly], &X[i*LW], point_to_proc, desc_halo);		
      /* Update Data */
      for (i=0; i<n; i++) 
	Update_Data(&Y[i*ly], &X[i*LW], point_to_proc, desc_halo);
    }
    
    /* Update proc_to_comm and proc_to_recv */
    point_to_proc += desc_halo[point_to_proc + N_ELEM_RECV_]+
      desc_halo[point_to_proc + desc_halo[point_to_proc + N_ELEM_RECV_]+ N_ELEM_SEND_]+3;
    
    proc_to_comm = desc_halo[point_to_proc + PROC_ID_];
  }  
  /* Free memory if necessary */
  if (WorkSize > *lwork)
    free(X);
}

static void Update_Data(Y, X, point_to_proc, desc_halo)
     int Y[]; /* (input/output) Local array of elements */
     int X[]; /* (input) Array of elements to insert in Y */
     int point_to_proc; /* (input) Pointer to desc_to_recv array.
			   In input it points to the block of indexs of elements
			   to update.
			*/
     int desc_halo[]; /* (input) Array of blocks of indexes */
{
  /* PURPOSE: Updates elements in Y with values in X */
  /* Local scalars */
  int i;
  
  for (i = 0; i < desc_halo[point_to_proc + N_ELEM_RECV_]; i++)
    Y[desc_halo[point_to_proc + ELEM_RECV_ + i]-1] = X[i];
}

static void Extract(Y, X, point_to_proc, desc_halo)
     int Y[]; /* (input) Local array of elements */
     int X[]; /* (output) Array of elements extracted from Y */
     int point_to_proc; /* (input) Pointer to the desc_halo array.
			   It points to the block of indexes of elements
			   to extract. */
     int desc_halo[]; /* (input) Array of blocks of indexes */
{
  /* PURPOSE: Extracts elements to send from array Y, to array X */
  
  /* Local scalars */
  int i, ndim, recv_dim;
  
  recv_dim = desc_halo[point_to_proc + N_ELEM_RECV_];
  ndim = desc_halo[point_to_proc + N_ELEM_SEND_ + recv_dim];
  
  /* Extract elements to send */
  for (i = 0 ; i < ndim ; i++)
    X[i] = Y[desc_halo[point_to_proc + recv_dim + ELEM_SEND_ + i]-1] ;
}

