/*
 * Netsolve sparse test matrix functions
 */
#include <stdio.h>
#include <stdlib.h>
#include "error_returns.h"
#include "matrix_auxs.h"
#include "qs.h"

int construct_crs_system
(double **values,int **indices,int **pointers,int *nnzeros,
 int first,int local_size,int domain_size,int total_size)
{
  int IJ,ijk,i,j;

  *nnzeros = 5*total_size;

  /* allocate */
  *pointers = (int *) malloc((local_size+1)*sizeof(int));
  if (!(*pointers)) ERR_REPORT("Could not allocate pointers");
  *indices = (int *) malloc((*nnzeros+1)*sizeof(int));
  if (!(*indices)) ERR_REPORT("Could not allocate indices");
  *values = (double *) malloc((*nnzeros+1)*sizeof(double));
  if (!(*values)) ERR_REPORT("Could not allocate values");

  IJ = 0; ijk = 0;
  for ( i=0; i<domain_size; i++ ) {
    for ( j=0; j<domain_size; j++ ) {
      int I,J; double v;
      if (IJ>=first && IJ<first+local_size) {
	(*pointers)[IJ-first] = ijk;
	I = j + domain_size*i;
	v = -1.0;
	if ( i>0 ) {
	  J = I - domain_size; (*values)[ijk] = v; (*indices)[ijk] = J; ijk++;
	}
	if ( j>0 )   {
	  J = I - 1; (*values)[ijk] = v; (*indices)[ijk] = J; ijk++;
	}
	{
	  double v = 4.0; (*values)[ijk] = v; (*indices)[ijk] = I; ijk++;
	}
	if ( j<domain_size-1 ) {
	  J = I + 1; (*values)[ijk] = v; (*indices)[ijk] = J; ijk++;
	}
	if ( i<domain_size-1 ) {
	  J = I + domain_size; (*values)[ijk] = v; (*indices)[ijk] = J; ijk++;
	}
      }
      IJ++;
    }
  }
  (*pointers)[local_size] = ijk;

  return 0;
}

int destruct_crs_system
(double *mv,int *ptr,int *idx,double *xv,double *yv,double *tmp)
{
  free(mv); free(ptr); free(idx); free(xv); free(yv); free(tmp);
  return 0;
}

int crs_transpose
(double **mv,int **idx,int **ptr,int first,int local_size)
{
  int *point,*ptr_ret,*idx_ret, nnz = (*ptr)[local_size], i,j,s;
  double *mv_ret;

  if (first!=0) ERR_REPORT("Sorry, can only transpose on one processor");

  ptr_ret = (int*) malloc((local_size+1)*sizeof(int));
  if (!ptr_ret) ERR_REPORT("Could not allocate column count");
  point = (int*) malloc((local_size+1)*sizeof(int));
  if (!point) ERR_REPORT("Could not allocate transposition pointer");
  ptr_ret = (int*) malloc((local_size+1)*sizeof(int));
  if (!ptr_ret) ERR_REPORT("Could not allocate transposition pointer");
  idx_ret = (int*) malloc((nnz+1)*sizeof(int));
  if (!point) ERR_REPORT("Could not allocate transposition pointer");
  mv_ret = (double*) malloc((nnz+1)*sizeof(int));
  if (!mv_ret) ERR_REPORT("Could not allocate transposition pointer");
  for (i=0; i<local_size; i++) ptr_ret[i] = 0;
  for (i=0; i<local_size; i++) point[i] = 0;
  for (i=0; i<local_size; i++)
    for (j=(*ptr)[i]; j<(*ptr)[i+1]; j++) ptr_ret[(*idx)[j]]++;
  s = 0;
  for (i=0; i<local_size; i++) {
    s += ptr_ret[i]; point[i] = ptr_ret[i] = s-ptr_ret[i];}
  point[local_size] = ptr_ret[local_size] = s;
  for (i=0; i<local_size; i++)
    for (j=(*ptr)[i]; j<(*ptr)[i+1]; j++) {
      int id=(*idx)[j];
      mv_ret[point[id]] = (*mv)[j];
      idx_ret[point[id]] = (*idx)[j]; point[id]++;}
  *mv = mv_ret; *ptr = ptr_ret; *idx = idx_ret ;
  free(*mv); free(*ptr); free(*idx); free(point);

  return 0;
}

int proc_grid_permutation
(int pi,int pj,int isize,int jsize,
 int **nat_to_per,int **per_to_nat)
{
  int ip,jp,IJp;

  *nat_to_per = (int *) malloc((isize*jsize+1)*sizeof(int));
  if (!(*nat_to_per)) ERR_REPORT("Could not allocate nat_to_per");
  *per_to_nat = (int *) malloc((isize*jsize+1)*sizeof(int));
  if (!(*per_to_nat)) ERR_REPORT("Could not allocate per_to_nat");

  IJp = 0;
  for (ip=0; ip<pi; ip++) {
    for (jp=0; jp<pj; jp++) {
      int i,j;
      for (i=(ip*isize)/pi; i<((ip+1)*isize)/pi; i++) {
	for (j=(jp*jsize)/pj; j<((jp+1)*jsize)/pj; j++) {
	  int IJ = i*jsize+j;
	  (*nat_to_per)[IJ] = IJp;
	  (*per_to_nat)[IJp] = IJ;
	  IJp++;
	}
      }
    }
  }
  /*
  printf("permutation:");
  for (IJp=0; IJp<isize*jsize; IJp++) printf("%d, ",(*nat_to_per)[IJp]);
  printf("\n");
  printf("back permutation:");
  for (IJp=0; IJp<isize*jsize; IJp++) printf("%d, ",(*per_to_nat)[IJp]);
  printf("\n");
  */
  return 0;
}

int permute_crs_system
(double *mat1,int *ind1,int *ptr1,double *mat2,int *ind2,int *ptr2,
 int size,int nnzeros,int *perm_forw,int *perm_back)
{
  int row,ierr;

  ptr2[0] = 0;
  for (row=0; row<size; row++) {
    ptr2[row+1] = ptr2[row] + (ptr1[perm_back[row]+1]-ptr1[perm_back[row]]);
  }
  /*  printf("permuted pointers:"); 
      for (row=0; row<=size; row++) printf(" %d",ptr2[row]);
      printf("\n");*/
  for (row=0; row<size; row++) {
    int tar=ptr2[perm_forw[row]],loc=0,col;
    /*    printf("row %d permuted to %d, starting location %d; ",
	  row,perm_forw[row],tar);*/
    for (col=ptr1[row]; col<ptr1[row+1]; col++) {
      mat2[tar+loc] = mat1[col]; ind2[tar+loc] = perm_forw[ind1[col]]; loc++;
    } 
    ierr = qsir(ind2+tar,mat2+tar,ptr1[row+1]-ptr1[row]); ERR_RETURN(ierr);
  }
  return 0;
}

int crs_system_on_proc_grid
(double **mv,int **idx,int **ptr,int *nnzeros,
 int first,int local_size,int domain_size,int total_size,
 int pi,int pj)
{
  int *perm_forw,*perm_back, *t_ptr,*t_idx,ierr;
  double *t_mv;

  if ( ! (first==0 && local_size==total_size) ) {
    ERR_REPORT("Can only generate matrix on grid globally");
  }
  ierr = proc_grid_permutation
    (pi,pj,domain_size,domain_size,&perm_forw,&perm_back); ERR_RETURN(ierr);
  ierr = construct_crs_system
    (&t_mv,&t_idx,&t_ptr,nnzeros,
     first,local_size,domain_size,total_size); ERR_RETURN(ierr);
  /*  print_crs_matrix(t_mv,t_idx,t_ptr,0,first,local_size);*/
  *mv = (double*) malloc((*nnzeros+1)*sizeof(double));
  if (!(*mv)) ERR_REPORT("Could not allocate mv");
  *idx = (int*) malloc((*nnzeros+1)*sizeof(int));
  if (!(*idx)) ERR_REPORT("Could not allocate idx");
  *ptr = (int*) malloc((local_size+1)*sizeof(int));
  if (!(*ptr)) ERR_REPORT("Could not allocate ptr");
  ierr = permute_crs_system
    (t_mv,t_idx,t_ptr,*mv,*idx,*ptr,local_size,*nnzeros,
     perm_forw,perm_back); ERR_RETURN(ierr);
  /*print_crs_matrix(t_mv,t_idx,t_ptr,0,first,local_size);*/
  free(t_mv); free(t_idx); free(t_ptr);
  free(perm_forw); free(perm_back);
  return 0;
}

int construct_crs_vectors
(int first,int local_size,int domain_size,int total_size,
 double **sol_vector,double **rhs_vector,double **tmp_vector)
{
  int IJ,i,j;
  *sol_vector = (double *) malloc(local_size*sizeof(double));
  if (!(*sol_vector)) ERR_REPORT("Could not allocate sol_vector");
  *rhs_vector = (double *) malloc(local_size*sizeof(double));
  if (!(*rhs_vector)) ERR_REPORT("Could not allocate rhs_vector");
  *tmp_vector = (double *) malloc(local_size*sizeof(double));
  if (!(*tmp_vector)) ERR_REPORT("Could not allocate tmp_vector");

  IJ = 0;
  for ( i=0; i<domain_size; i++ ) {
    for ( j=0; j<domain_size; j++ ) {
      if (IJ>=first && IJ<first+local_size) {
	double v=0;
	if (i==0 || i==domain_size-1) v += 1.;
	if (j==0 || j==domain_size-1) v += 1.;
	(*rhs_vector)[IJ-first] = /*v*/1.;
	(*sol_vector)[IJ-first] = 1.;
	(*tmp_vector)[IJ-first] = 0.;
      }
      IJ++;
    }
  }
  return 0;
}

int crs_vectors_on_proc_grid
(int first,int local_size,int domain_size,int total_size,int pi,int pj,
 double **sol,double **rhs,double **tmp)
{
  int *perm_forw,*perm_back, ierr,i; double *t_rhs;
  ierr = proc_grid_permutation
    (pi,pj,domain_size,domain_size,&perm_forw,&perm_back); ERR_RETURN(ierr);
  ierr = construct_crs_vectors
    (first,local_size,domain_size,total_size,sol,&t_rhs,tmp); ERR_RETURN(ierr);
  *rhs = (double*) malloc((local_size+1)*sizeof(double));
  if (!(*rhs)) ERR_REPORT("Could not allocate rhs");
  for (i=0; i<total_size; i++) (*rhs)[perm_forw[i]] = t_rhs[i];
  free(perm_forw); free(perm_back); free(t_rhs);
  return 0;
}

int crs_mat_tobase1
(double *values,int *indices,int *pointers,int order)
{
  int i,j;
  for (i=0; i<order; i++) {
    for (j=pointers[i]; j<pointers[i+1]; j++)
      indices[j]++;
    pointers[i]++;
  }
  pointers[order]++;
  return 0;
}

int crs_mat_tobase0
(double *values,int *indices,int *pointers,int order)
{
  int i,j;
  for (i=0; i<=order; i++)
    pointers[i]--;
  for (i=0; i<order; i++)
    for (j=pointers[i]; j<pointers[i+1]; j++)
      indices[j]--;
  return 0;
}

/* convert to format where the diagonal comes first in every row */
int crs_to_drs(double *val,int *idx,int *ptr,int first,int size,int base)
{
  int row,col;

  for (row=0; row<size; row++) {
    int dia=first+row;
    for (col=ptr[row]-base; col<ptr[row+1]-base; col++) {
      if (idx[col]==dia) {
	int i; double diag=val[col];
	if (col>ptr[row]-base) {
	  for (i=col; i>ptr[row]-base; i--) {
	    idx[i] = idx[i-1]; val[i] = val[i-1];
	  }
	  val[ptr[row]-base] = diag; idx[ptr[row]-base] = dia;
	}
	break;
      }
    }
  }
  return 0;
}

int drs_to_crs(double *val,int *idx,int *ptr,int first,int size,int base)
{
  int row,col;

  for (row=0; row<size; row++) {
    double diag=val[ptr[row]-base];
    for (col=ptr[row]+1-base;
	 col<ptr[row+1]-base && idx[col]<first+row; col++) {
      idx[col-1] = idx[col]; val[col-1] = val[col];}
    col--;
    val[col] = diag; idx[col] = first+row;
  }
  return 0;
}

int print_crs_matrix
(double *mv,int *idx,int *ptr, int base,int first,int local_size)
{
  int i,j,nnz,ierr;

  ierr = crs_nnzeros(ptr,local_size,base,&nnz); ERR_RETURN(ierr);
  printf("CRS matrix of base %d; shown on base 0\n  #nzeros=%d\n",
	 base,nnz);
  for (i=0; i<local_size; i++) {
    printf("row [f:%d,l:%d]=g:%d: cols",first-base,i,first+i-base);
    for (j=ptr[i]-base; j<ptr[i+1]-base; j++)
      printf(" %d=>%7.3e",idx[j]-base,mv[j]);
    printf("\n");
  }
  return 0;
}

int print_crs_matrix_i
(double *mv,int *idx,int *ptr, int base,int *ind,int local_size)
{
  int i,j,nnz,ierr;

  ierr = crs_nnzeros(ptr,local_size,base,&nnz); ERR_RETURN(ierr);
  printf("CRS matrix of base %d; shown on base 0\n  #nzeros=%d\n",
	 base,nnz);
  for (i=0; i<local_size; i++) {
    printf("row [l:%d]=g:%d: cols",i,ind[i-base]);
    for (j=ptr[i]-base; j<ptr[i+1]-base; j++)
      printf(" %d=>%7.3e",idx[j]-base,mv[j]);
    printf("\n");
  }
  return 0;
}

int print_drs_matrix
(double *mv,int *idx,int *ptr, int base,int first,int local_size)
{
  int i,j,nnz,ierr;

  ierr = crs_nnzeros(ptr,local_size,base,&nnz); ERR_RETURN(ierr);
  printf("DRS matrix of base %d; shown on base 0\n  #nzeros=%d\n",
	 base,nnz);
  for (i=0; i<local_size; i++) {
    printf("row [f:%d,l:%d]=g:%d: cols",first-base,i,first+i-base);
    for (j=ptr[i]-base; j<ptr[i+1]-base; j++)
      printf(" %d=>%7.3e",idx[j]-base,mv[j]);
    printf("\n");
  }
  return 0;
}

/* this is the actual number of nonzeros; the last index is one less */
int crs_nnzeros(int *ptr,int local_size,int base,int *size)
{
  if (ptr[0]!=base) {
    printf("crs_nnzeros: bad input, ptr[0]=%d, purported local size = %d\n",
	   ptr[0],local_size);
    return 1;
  } else {
    *size = ptr[local_size]-base;
    return 0;
  }
}

int crs_matmult(double *values,int *indices,int *pointers,int local_size,
		double *in,double *out)
{
  int i,j;
  for (i=0; i<local_size; i++) {
    double r = 0;
    for (j=pointers[i]; j<pointers[i+1]; j++)
      r += values[j]*in[indices[j]];
    out[i] = r;
  }
  return 0;
}

int crs_get_blockstructure
(MPI_Comm comm,
 int *idx,int *ptr,int band,int first,int local_size,
 int **blocks,int *nblocks)
{
  int irow,*left,*above,nb,*b;

  /* find skyline structure */
  ALLOCATE(left,local_size,int,"leftmost");
  ALLOCATE(above,local_size,int,"topmost");
  for (irow=0; irow<local_size; irow++) {left[irow] = 0; above[irow] = 0;}
  for (irow=0; irow<local_size; irow++) {
    int row=irow+first,col;
    for (col=ptr[irow]; col<ptr[irow+1]; col++) {
      int i=row,j=idx[col];
      if (j<i && j>i-band && i-j>left[i-first]) left[i-first] = i-j;
      if (j>i && j<i+band && i-j>above[j-first]) above[j-first] = i-j;
    }
  }
  /* overestimate number of blocks. this can be improved */
  nb = 1;
  for (irow=0; irow<local_size; irow++)
    if (left[irow]==0 && above[irow]==0) nb++;
  /* find block structure locally */
  ALLOCATE(b,nb+1,int,"block structure");
  nb = 0;
  for (irow=0; irow<local_size; irow++)
    if (left[irow]==0 && above[irow]==0) {
      int k,found=1;
      for (k=1; k<band && irow+k<local_size; k++)
	if (left[irow+k]>k || above[irow+k]>k) {found=0; break;}
      if (found) b[nb++] = irow+first;
    }
  b[nb] = local_size+first;
  printf("Blocks found %d:",nb);
  for (irow=0; irow<=nb; irow++) printf(" %d",b[irow]);
  printf("\n");
  *nblocks = nb; *blocks = b;

  return 0;
}

/* convert from coordinate storage to compressed row */
int coo_to_crs(int *ico,int **ptr,int *jco,double *val,
	       int first,int local_size,int nnz)
{
  int i,irow,ierr;
  ierr = qsijr(jco,ico,val,nnz); ERR_RETURN(ierr);
  /*{
    int start=0,end,row;
  next_row:
    row = ico[start]; printf("row %d: ",row);
    for (i=start; i<nnz && ico[i]==row; i++)
      printf("%d ",jco[i]);
    printf("\n"); if (i<nnz) {start = i; goto next_row;}
    }*/
  ALLOCATE(*ptr,local_size+1,int,"pointers");
  irow = 0; (*ptr)[irow] = 0;
  for (i=0; i<nnz; i++) {
    int row,new_row = ico[i];
   new_pointer:
    row = first+irow;
    if (new_row>row) {
      (*ptr)[++irow] = i; goto new_pointer;}
  }
  {
    int new_row = first+local_size,row,i = nnz;
   also_new_pointer:
    row = first+irow;
    if (new_row>row) {
      (*ptr)[++irow] = i; goto also_new_pointer;}
  }
  for (i=0; i<local_size; i++) {
    int p = (*ptr)[i], q = (*ptr)[i+1];
    ierr = qsir(jco+p,val+p,q-p); ERR_RETURN(ierr);
  }
  return 0;
}
