/* ---------------------------------------------------------------------
*
*  -- PSBLAS routine (version 1.0) --
*
*  ---------------------------------------------------------------------
*/
/*
*  Include files
*/
#include "psblas.h"

/* #define DEBUG_SWAP_ */

/* Internal Functions */
static void Update_Data_ovrlap(double Y[], double X[], double Ovrlap[], int ovr_points, 
			int point_to_proc, int desc_halo[]);
void PSI_dSwapTran(int n, double Y[], int ly, int desc_data[], int desc_halo[], int desc_ovrlap[],
		   int desc_ovrlap_elem[], double *work, int *lwork);
static void Extract_to_send(double Y[], double X[], int point_to_proc, int desc_halo[]);
static void Update_Data(double Y[], double X[], int point_to_proc, int desc_ovrlap[]);
static void Extract(double Ovrlap[], double X[], int Master[], int point_to_proc, 
	     int desc_ovrlap[]);
static int Length(int desc_ovrlap_elem[]);
static int Search_indx(int indx, int desc_ovrlap_elem[], int length);

#define Send_Data(X,ndim,proc,ictxt)	Cdgesd2d(ictxt,ndim,1,X,ndim,proc,0)
#define Receive_Data(X,ndim,proc,ictxt) Cdgerv2d(ictxt,ndim,1,X,ndim,proc,0)

void PSI_dSwapTran(n, Y, ly, desc_data, desc_halo, desc_ovrlap,
		    desc_ovrlap_elem, work, lwork)
     /* ..
      *  .. Scalar Arguments ..
      */
     int n, ly; 
     int *lwork;
     /* ..
      *  .. Array Arguments ..
      */
     int         desc_data[], desc_halo[], desc_ovrlap[], desc_ovrlap_elem[];
     double      Y[], work[];

{
  /*
   *  Purpose
   *
   *  Updates the HALO points.
   * 
   *  WARNING: The OVRLAP parameter is not initialized rigth now!!
   *
   *
   *************************
   *  ==========
   *  Parameters
   *  ==========
   *
   *  N       (global input) INTEGER
   *          The number of columns of Y to operate on.
   *
   *  Y       (local input) REAL array
   *          This array contains the entries of the distributed vector
   *          sub( Y ).  On exit, sub( Y ) is overwritten by the updated
   *          distributed vector sub( Y ).
   *
   *  LY      (local input) INTEGER
   *          The leading dimension of vector Y.
   * 
   *  DESCDATA (global and local input) INTEGER array of dimension 8.
   *           The array descriptor of the decomposition.
   *
   *  DESCHALO (local input) INTEGER array.It contains informations for local 
   *                     halo points. 
   *
   *  WORK       (local input) REAL array. Work area to memorize intermediate 
   *		 results.
   *
   *  LWORK      (local input) pointer to INTEGER. Dimension of Work area.
   *
   *  DESCOVRLAP (local input) INTEGER array. Is the DESC_OVRLAP array.
   *
   *  DESCOVRLAPELEM (local input) INTEGER array. Is the OVRLAP_ELEM array.
   *
   *
   *  =====================================================================
   * 
   *
   *  .. Local Scalars ..
   */
  int ictxt;
  int nprow, npcol, myrow, mycol;
  int point_to_proc;		/* Pointer to the current process to communicate to */
  int proc_to_comm;		/* Processor to communicate to */
  int LW, WorkSize;			/* Dimension of Work area */
  int i, loc_dom, length, n_to_recv, n_to_send, loc_indx;
  int ovr_points;               /* 0 if no overlap points */
  int err, info=0;
  int int_err[5];
  double real_err[5];

  /*
   * .. Local Arrays ..
   */
  double *X;			/* Work Array */
  double *Ovrlap;               /* Work Array */
  int    *Master;               /* Indicate for each point if is a overlap master */


  ictxt = desc_data[CTXT_];
  Cblacs_gridinfo( ictxt, &nprow, &npcol, &myrow, &mycol );

#ifdef DEBUG_SWAP_
  printf("PSI_dSwapTran\n");
  printf("ictxt= %d myrow = %d mycol = %d nprow =%d npcol %d\n", ictxt, myrow, mycol, nprow, npcol);
#endif
  
  /* Initialize ovr_points */

  ovr_points = (desc_ovrlap_elem[0] != -1);

  /* Initialize Work Area */
  point_to_proc = 0;
  
  proc_to_comm = desc_halo[PROC_ID_];

  WorkSize = 0;
 
  /* Search for max Work area size */

  /* For each processor to send/receive to/from */
  while (proc_to_comm != -1) {
    /* Update size of Worka Area */
    WorkSize = max(WorkSize, desc_halo[point_to_proc + N_ELEM_RECV_]);
    WorkSize = max(WorkSize, desc_halo[point_to_proc + N_ELEM_SEND_ + 
				      desc_halo[point_to_proc + N_ELEM_RECV_]]);
    
    /* Update proc_to_send and proc_to_recv */
    point_to_proc += desc_halo[point_to_proc + N_ELEM_RECV_]+
      desc_halo[point_to_proc + N_ELEM_SEND_ + desc_halo[point_to_proc + N_ELEM_RECV_]]+3;
    
    proc_to_comm = desc_halo[point_to_proc + PROC_ID_];
  }
  LW = WorkSize;
  WorkSize *= n;
  
  /* Alloc memory if necessary */
  if (WorkSize > *lwork)  {
    /* Allocate memory for X vector */
    if ((X = (double *)malloc(WorkSize*sizeof(double))) == NULL)  {
      info = 2020;
      int_err[0] = WorkSize*sizeof(double);
    }
    
#ifdef PS_CONTROL_LEVEL
    printf("Alloc memory in PSI_dSwapTran: %d elements\n",WorkSize);
#endif
  }  else {
    /* Initialize X */
    X = (double *)work;
  }
  
  /* Check for ERRORS */
  err = info;
  Cigamx2d(ictxt, ALL, TOPDEF, 1, 1, &err, 1, X, X, -1 ,-1,-1);
  
  if (err != 0) {
    psderror_( &ictxt, &info, "PSI_dSwapData\0", int_err, real_err );
    return;
  }
  
  point_to_proc = 0;
  proc_to_comm = desc_halo[PROC_ID_];
  
  /* For each processor to send/receive to/from */
  while (proc_to_comm != -1) {
    n_to_recv = desc_halo[point_to_proc+N_ELEM_RECV_];
    n_to_send = desc_halo[point_to_proc+n_to_recv+N_ELEM_SEND_];
    if (proc_to_comm < myrow) {
      /* First, I send */
      for (i=0; i<n; i++) 
	Extract_to_send(&Y[i*ly], &X[i*LW], point_to_proc, desc_halo);		
/*        Send_Data(X,n_to_recv,proc_to_comm,ictxt); */
      Cdgesd2d(ictxt,n_to_recv,n,X,LW,proc_to_comm,0);

      /* then, I receive */
/*        Receive_Data(X,desc_halo[point_to_proc+n_to_recv+N_ELEM_SEND_],proc_to_comm,ictxt); */
/*        Update_Data_ovrlap(Y, X, Ovrlap, ovr_points, point_to_proc, desc_halo); */
      Cdgerv2d(ictxt,n_to_send,n,X,LW,proc_to_comm,0);
      for (i=0; i<n; i++) 
	Update_Data_ovrlap(&Y[i*ly], &X[i*LW], Ovrlap, ovr_points, point_to_proc, desc_halo);
    }  else if (proc_to_comm > myrow) {
      /* First, I receive */
/*        Receive_Data(X,desc_halo[point_to_proc+n_to_recv+N_ELEM_SEND_],proc_to_comm,ictxt); */
/*        Update_Data_ovrlap(Y, X, Ovrlap, ovr_points, point_to_proc, desc_halo); */
      Cdgerv2d(ictxt,n_to_send,n,X,LW,proc_to_comm,0);
      for (i=0; i<n; i++) 
	Update_Data_ovrlap(&Y[i*ly], &X[i*LW], Ovrlap, ovr_points, point_to_proc, desc_halo);
      /* then, I send */
      for (i=0; i<n; i++) 
	Extract_to_send(&Y[i*ly], &X[i*LW], point_to_proc, desc_halo);		
      /* Send_Data(X,desc_halo[point_to_proc+desc_halo[point_to_proc+N_ELEM_RECV_]+ */
/*  			   N_ELEM_SEND_],proc_to_comm,ictxt); */
      Cdgesd2d(ictxt,n_to_recv,n,X,LW,proc_to_comm,0);
/*        Extract_to_send(Y, X, point_to_proc, desc_halo); */
/*        Send_Data(X,desc_halo[point_to_proc+N_ELEM_RECV_],proc_to_comm,ictxt); */
    } else {
      /* I send to myproc */
      for (i=0; i<n; i++) {
	Extract_to_send(&Y[i*ly], &X[i*LW], point_to_proc, desc_halo);		
	/* Update Data */
	Update_Data_ovrlap(&Y[i*ly], &X[i*LW], Ovrlap,
			   ovr_points, point_to_proc, desc_halo);
      }
    }
    
    /* Update proc_to_comm and proc_to_recv */
    point_to_proc += n_to_recv+
      desc_halo[point_to_proc + n_to_recv + N_ELEM_SEND_]+3;
    
    proc_to_comm = desc_halo[point_to_proc + PROC_ID_];
  }
  
  if (WorkSize > *lwork)
    free(X);
}

static void Update_Data_ovrlap(Y, X, Ovrlap, ovr_points, point_to_proc, desc_halo)
     double Y[]; /* (input/output) Local array of elements */
     double X[]; /* (input) Array of elements to insert in Y */
     double Ovrlap[]; /* (input/output) Work array of elements */ 
     int    ovr_points; /* (input) If != 0 update Ovrlap vector */
     int point_to_proc; /* (input) Pointer to desc_to_recv array.
			    In input it points to the block of indexs of elements
			    to update.
			    */
     int desc_halo[]; /* (input) Array of blocks of indexes */
{
  /* PURPOSE: Updates elements in Y e Ovrlap with values in X */
  /* Local scalars */
  int i, recv_dim, ndim;
  
  recv_dim = desc_halo[point_to_proc + N_ELEM_RECV_];
  ndim = desc_halo[point_to_proc + recv_dim + N_ELEM_SEND_];

  if (ovr_points) {
    for (i = 0; i < ndim; i++) {
      Y[desc_halo[point_to_proc + recv_dim + ELEM_SEND_ + i]-1] += X[i];
      Ovrlap[desc_halo[point_to_proc + recv_dim + ELEM_SEND_ + i]-1] += X[i];
    } 
  } else {
    for (i = 0; i < ndim; i++)
      Y[desc_halo[point_to_proc + recv_dim + ELEM_SEND_ + i]-1] += X[i];
  }
}

static void Extract_to_send(Y, X, point_to_proc, desc_halo)
     double Y[]; /* (input) Local array of elements */
     double X[]; /* (output) Array of elements extracted from Y */
     int point_to_proc; /* (input) Pointer to the desc_halo array.
			    It points to the block of indexes of elements
			    to extract. */
     int desc_halo[]; /* (input) Array of blocks of indexes */
{
  /* PURPOSE: Extracts elements to send from array Y, to array X */
  
  /* Local scalars */
  int i, recv_dim;
  
  recv_dim = desc_halo[point_to_proc + N_ELEM_RECV_];
  
  /* Extract elements to send */
  for (i = 0 ; i < recv_dim ; i++)
    X[i] = Y[desc_halo[point_to_proc + ELEM_RECV_ + i]-1] ;
}

static void Update_Data(Y, X, point_to_proc, desc_ovrlap)
     double Y[]; /* (input/output) Local array of elements */
     double X[]; /* (input) Array of elements to insert in Y */
     int point_to_proc; /* (input) Pointer to desc_ovrlap array.
			    In input it points to the block of indexs of elements
			    to update.
			    */
     int desc_ovrlap[]; /* (input) Array of blocks of indexes */
{
  /* PURPOSE: Updates elements in Y with values in X */
  /* Local scalars */
  int i;

  for (i = 0; i < desc_ovrlap[point_to_proc + N_OVRLP_ELEM_]; i++)
    Y[desc_ovrlap[point_to_proc + ELEM_RECV_ + i]-1] += X[i];
}

static void Extract(Ovrlap, X, Master, point_to_proc, desc_ovrlap)
     double Ovrlap[]; /* (input) Local array of elements */
     double X[]; /* (output) Array of elements extracted from Ovrlap */
     int Master[]; /* (input) Master array */
     int point_to_proc; /* (input) Pointer to the desc_ovrlap array.
			    It points to the block of indexes of elements
			    to extract. */
     int desc_ovrlap[]; /* (input) Array of blocks of indexes */
{
  /* PURPOSE: Extracts elements to send from array Ovrlap, to array X */
  
  /* Local scalars */
  int i, ndim;
  
  ndim = desc_ovrlap[point_to_proc + N_OVRLP_ELEM_];

  /* Extract elements to send */
  for (i = 0 ; i < ndim ; i++)
    if (Master[desc_ovrlap[point_to_proc + ndim + ELEM_SEND_ + i]-1] == 1)
      X[i] = Ovrlap[desc_ovrlap[point_to_proc + ndim + ELEM_SEND_ + i]-1] ;
    else
      X[i] = 0.0;
}

static int Search_indx(indx, desc_ovrlap_elem, length)
     int indx;   /* (input) Index to looking for */
     int desc_ovrlap_elem[]; /* (input) Desc_ovrlap_elem array */
     int length; /* (input) Number of local overlap elements */
{
  /* Search for indx in desc_ovrlap_elem array */
  
  /* BINARY SEARCH or SEARCH WITH GUARD */
  int i;
  
  i = 0;

  desc_ovrlap_elem[length*2] = indx;
  
  while (desc_ovrlap_elem[i+OVRLP_ELEM_] != indx)
    i += 2;

  desc_ovrlap_elem[length*2] = -1;

  if ((i+OVRLP_ELEM_) == (length*2))
    return 0;
  else
    return 1;
}
     
static int Length(desc_ovrlap_elem)
     int desc_ovrlap_elem[]; /* (input) Desc_ovrlap_elem array */
{
  /* Count local overlap elements */
  int i;

  i = 0;
  
  while (desc_ovrlap_elem[i+OVRLP_ELEM_] != -1)
    i += 2;
  
  return i/2;
}
