/*
 * This version of Cpdlaread.c reads from a matrix file, with the assumption
 * that the matrix has been stored in column major order, and using lseek()
 * acquires the data for one processor.  Then it sends that data to the
 * processor in pieces the maximum size of that processors number of rows
 * by the column blocking size.  This has been done in order to 
 *  1) reduce the amount of data stored on disk and 
 *  2) reduce the posibility that the BLACS will run out of buffer space.
 *
 */

#include <sys/types.h>
#include <unistd.h>
#include <fcntl.h>
#include "core.h"
#include "scalapackservice.h"

int Cpdlaread(infile,data,desca)
char *infile;
double *data;
int *desca;
{

  int i,x;
  int maxrow,maxcol;			/* largest possoble rows/cols */
  int prow,pcol;			/* current process */
  int size=sizeof(double);
  int num;				/* is there actually something ?*/
  int seekval;				/* file position */
  int input_fd;		                /* input fd */
  int *rows;				/* array by process or num rows */
  int *cols;				/*  ditto columns */
  int readblock;			/* default size to read from file */
  int nrows,ncols;			/* number of row/col blocks */
  int rowblock,colblock;		/* iterators over blocks */
  int row_extra,col_extra;		/* excess rows/columns */
  int send_col_extra;

  double *buf;				/* the buffer to read from file */
  double *ptr;				/* pointer to access the buffer*/
  double *myptr;			/* pointer to access the buffer*/


  if((MYROW==0) &&(MYCOL==0)) {

      input_fd=open(infile,O_RDWR);
      if(input_fd <0 ) {
	perror("input file");
	exit(1);
	}

      rows=(int *)malloc(sizeof(int)*PROC_ROWS);
      cols=(int *)malloc(sizeof(int)*PROC_COLS);
      if((rows==NULL) || (cols==NULL) ) {
	perror("row/col arrays ");
	exit(1);
	}


      maxrow=0;
      for(i=0;i<PROC_ROWS;i++) {
	rows[i]=HPC_numroc(desca[M_],ROW_BLOCK,i,0,PROC_ROWS);
	if(rows[i]>maxrow) {
	  maxrow=rows[i];
	  }
	}
      
      maxcol=0;
      for(i=0;i<PROC_COLS;i++) {
	cols[i]=HPC_numroc(desca[N_],COL_BLOCK,i,0,PROC_COLS);
	if(cols[i]>maxcol) {
	  maxcol=cols[i];
	  }
	}
      
      buf=(double *)malloc(size*maxrow*maxcol);
      if(buf==NULL) {
	perror("buffer");
	exit(1);
	}
      readblock=ROW_BLOCK*8;

      nrows=desca[M_] / ROW_BLOCK;
      ncols=desca[N_] / COL_BLOCK;
      row_extra=desca[M_] % ROW_BLOCK;
      col_extra=desca[N_] % COL_BLOCK;

      myptr=data;
      for(prow=0;prow<PROC_ROWS;prow++) {
	for(pcol=0;pcol<PROC_COLS;pcol++) {
	  ptr=buf;
	  send_col_extra=0;

	  /*if this test is true then the current */
	  /*processor actually has data */

	  num=rows[prow]*cols[pcol];
	  if(num > 0) {
	    if((MYROW == prow) && (MYCOL == pcol)) {

	    /* this is me and I don't need to store input in buffer*/
	    /*seek from start for this block */

	    seekval=(pcol*desca[M_]*COL_BLOCK);
	    lseek(input_fd,(seekval*size)+(6*sizeof(int)),SEEK_SET);
	    seekval=0;

	    for(colblock=pcol;colblock<ncols;colblock++) {
	      if((colblock%PROC_COLS)==pcol) {
	      for(i=0;i<COL_BLOCK;i++) {
		for(rowblock=0;rowblock<nrows;rowblock++) {
		  if((rowblock % PROC_ROWS)==prow) {
		    if(seekval>0) {
		      lseek(input_fd,seekval*size,SEEK_CUR);
		      seekval=0;
		      }
		    read(input_fd,myptr,readblock);
		    myptr+=ROW_BLOCK;
		    }
		  else {
		    seekval+=ROW_BLOCK;
		    }
		  }
		if(row_extra > 0) {
		  if((rowblock % PROC_ROWS)==prow) {
		    if(seekval>0) {
		      lseek(input_fd,seekval*size,SEEK_CUR);
		      seekval=0;
		      }
		    read(input_fd,myptr,row_extra*size);
		    myptr+=row_extra;
		    }
		  else {
		    seekval+=row_extra;
		    }
		  }
		}
	      }
	    else {
	      seekval+=desca[M_]*COL_BLOCK;
	      }
	    }

	    if(col_extra > 0) {
	      if((colblock%PROC_COLS)==pcol) {
	      for(i=0;i<col_extra;i++) {
		for(rowblock=0;rowblock<nrows;rowblock++) {
		  if((rowblock % PROC_ROWS)==prow) {
		    if(seekval>0) {
		      lseek(input_fd,seekval*size,SEEK_CUR);
		      seekval=0;
		      }
		    read(input_fd,myptr,readblock);
		    myptr+=ROW_BLOCK;
		    }
		  else {
		    seekval+=ROW_BLOCK;
		    }
		  }
		if(row_extra > 0) {
		  if((rowblock % PROC_ROWS)==prow) {
		    if(seekval>0) {
		      lseek(input_fd,seekval*size,SEEK_CUR);
		      seekval=0;
		      }
		    read(input_fd,myptr,row_extra*size);
		    myptr+=row_extra;
		    }
		  else {
		    seekval+=row_extra;
		    }
		  }
		}
	      }
	      }
	    } /* end of me */
	else {
	    /*read for a processor */
	    /*seek from start for this block */

	    seekval=(pcol*desca[M_]*COL_BLOCK);
	    lseek(input_fd,(seekval*size)+(6*sizeof(int)),SEEK_SET);
	    seekval=0;

	    for(colblock=pcol;colblock<ncols;colblock++) {
	      if((colblock%PROC_COLS)==pcol) {
	      for(i=0;i<COL_BLOCK;i++) {
		for(rowblock=0;rowblock<nrows;rowblock++) {
		  if((rowblock % PROC_ROWS)==prow) {
		    if(seekval>0) {
		      lseek(input_fd,seekval*size,SEEK_CUR);
		      seekval=0;
		      }
		    read(input_fd,ptr,readblock);
		    ptr+=ROW_BLOCK;
		    }
		  else {
		    seekval+=ROW_BLOCK;
		    }
		  }
		if(row_extra > 0) {
		  if((rowblock % PROC_ROWS)==prow) {
		    if(seekval>0) {
		      lseek(input_fd,seekval*size,SEEK_CUR);
		      seekval=0;
		      }
		    read(input_fd,ptr,row_extra*size);
		    ptr+=row_extra;
		    }
		  else {
		    seekval+=row_extra;
		    }
		  }
		}
	      }
	    else {
	      seekval+=desca[M_]*COL_BLOCK;
	      }
	    }

	    if(col_extra > 0) {
	      if((colblock%PROC_COLS)==pcol) {
	      send_col_extra=1;
	      for(i=0;i<col_extra;i++) {
		for(rowblock=0;rowblock<nrows;rowblock++) {
		  if((rowblock % PROC_ROWS)==prow) {
		    if(seekval>0) {
		      lseek(input_fd,seekval*size,SEEK_CUR);
		      seekval=0;
		      }
		    read(input_fd,ptr,readblock);
		    ptr+=ROW_BLOCK;
		    }
		  else {
		    seekval+=ROW_BLOCK;
		    }
		  }
		if(row_extra > 0) {
		  if((rowblock % PROC_ROWS)==prow) {
		    if(seekval>0) {
		      lseek(input_fd,seekval*size,SEEK_CUR);
		      seekval=0;
		      }
		    read(input_fd,ptr,row_extra*size);
		    ptr+=row_extra;
		    }
		  else {
		    seekval+=row_extra;
		    }
		  }
		}
	      }
	    }
	    /*
	     * Send the data to the processor I read for.
	     */
	    ptr=buf;
	    /*
	    if((prow==0) && (pcol==1))
	    fprintf(stderr,"Sending %d rows by col_block to %d - %d\n",
		rows[prow],prow,pcol);
		*/
	    for(x=COL_BLOCK;x<=cols[pcol];x+=COL_BLOCK) {
	     /*
	      fprintf(stderr,"Sending col_block to %d x %d\n",prow,pcol);
	      */
	      Cdgesd2d(desca[CTXT_],rows[prow],COL_BLOCK,ptr,
			rows[prow],prow,pcol);
	      ptr=ptr+(rows[prow]*COL_BLOCK);
	      }
	    if(send_col_extra) {
	    /*
	    if((prow==0) && (pcol==1))
	    fprintf(stderr,"Sending %d rows by %d extra columns to %d - %d\n",
			rows[prow],col_extra,prow,pcol);
			*/
	      /*
	      fprintf(stderr,"Sending col_extra to %d x %d\n",prow,pcol);
	      */
	      Cdgesd2d(desca[CTXT_],rows[prow],col_extra,ptr,
			rows[prow],prow,pcol);
	      }
	  } /* end of not me */
	  } /* end of num > 0 bytes to process */
	}
      }
      
    close(input_fd);

    /* 
     * just don't want a seg fault 
     * but do need to free memory
     */

    if(buf) free(buf);
    if(rows) free(rows);
    if(cols) free(cols);
    return(0);
    }
  else {
    /*
     * determine number of rows/cols I have and if there are any left-
     * over columns I have to handle.
     */

    maxrow=HPC_numroc(desca[M_],desca[MB_],MYROW,0,PROC_ROWS);
    maxcol=HPC_numroc(desca[N_],desca[NB_],MYCOL,0,PROC_COLS);
    col_extra=maxcol%COL_BLOCK;
    if((maxrow==0) || (maxcol==0)) {
      return(0);
      }
    ptr=data;
    for(x=COL_BLOCK;x<=maxcol;x+=COL_BLOCK) {
      Cdgerv2d(desca[CTXT_],maxrow,COL_BLOCK,ptr,maxrow,0,0);
      /*
      if((MYROW==0) && (MYCOL==1))
      fprintf(stderr,"RCVD, col_block x %d rows; %d x %d\n",maxrow,MYROW,MYCOL);
      */
      ptr+=(maxrow*COL_BLOCK);
      }
    if(col_extra>0) {
      /*
      if((MYROW==0) && (MYCOL==1))
      fprintf(stderr,"RCVing col_extra %d; %d x %d\n",col_extra,MYROW,MYCOL);
      */
      Cdgerv2d(desca[CTXT_],maxrow,col_extra,ptr,maxrow,0,0);
      }	
    return(0);
    }

}
  

