#include <stdlib.h>
#include <stdio.h>
#include "analysis.h"
#include "error_returns.h"
/*#include "petsc.h"*/

static int find_block_structure_in_band
(int *ptr,int *idx,double *val,int size,int band,int *nsplits,int **splits)
{
  int *left,*above,nb,*blocks, row;
  
  /* find skyline structure */
  ALLOCATE(left,size,int,"leftmost"); ALLOCATE(above,size,int,"topmost");
  for (row=0; row<size; row++) {left[row] = 0; above[row] = 0;}
  for (row=0; row<size; row++) {
    int col;
    for (col=ptr[row]; col<ptr[row+1]; col++) {
      int i=row,j=idx[col]; double v=val[col];
      if (j<i && j>i-band && i-j>left[i] && v!=0.) left[i] = i-j;
      if (j>i && j<i+band && i-j>above[j] && v!=0.) above[j] = i-j;
    }
  }
  /* overestimate number of blocks. this can be improved */
  nb = 1;
  for (row=0; row<size; row++) if (left[row]==0 && above[row]==0) nb++;
  ALLOCATE(blocks,nb+1,int,"block structure");
  nb = 0;
  for (row=0; row<size; row++)
    if (left[row]==0 && above[row]==0) {
      int k,found=1;
      for (k=1; k<band && row+k<size; k++)
	if (left[row+k]>k || above[row+k]>k) {found=0; break;}
      if (found) blocks[nb++] = row;
    }
  blocks[nb] = size;
  free(left); free(above);
  *nsplits = nb; *splits = blocks;

  return 0;
}

int find_block_structure
(MPI_Comm comm,int *ptr,int *idx,double *val,int size,
 int *r_nb,int **r_blocks)
{
  int ntids,*bands,*cbands,nbands,nsplits=0,*splits=0,opt, row,ierr;

  if (ptr[0]!=0)
    ERR_REPORT("finding block structure needs whole matrix on one processor");

  MPI_Comm_size(comm,&ntids);

  /* find all nonzero bands in wasteful storage */
  ALLOCATE(bands,size,int,"bands");
  nbands = 0; for (row=0; row<size; row++) bands[row] = 0;
  for (row=0; row<size; row++) {
    int icol;
    for (icol=ptr[row]; icol<ptr[row+1]; icol++) {
      int col=idx[icol],band;
      band=abs(row-idx[col]); if (bands[band]) nbands++; bands[band] = 1;
    }
  }

  /* compress the array of bands */
  ALLOCATE(cbands,nbands,int,"compact bands");
  nbands = 0;
  for (row=0; row<size; row++) 
    if (bands[row]) cbands[nbands++] = row;
  free(bands);

  /* for each nonzero band, see how many blocks it gives;
     preserve the last band that gives more blocks than processors,
     but take the smallest of all bands that give equal numbers */
  for (row=0; row<nbands; row++) {
    int ntmp,*splitstmp;
    ierr = find_block_structure_in_band
      (ptr,idx,val,size,cbands[row],&ntmp,&splitstmp); ERR_RETURN(ierr);
    if (ntmp<nsplits)
      /*PetscPrintf(comm,*/printf("found %d blocks at %d\n",ntmp,cbands[row]);
    if (ntmp<ntids) break;
    if (ntmp<nsplits || nsplits==0) {
      if (splits) free(splits);
      nsplits = ntmp; splits = splitstmp; opt = row;}
  }
  /*PetscPrintf(comm,*/printf("going to split %d blocks on band %d.\n",
	      nsplits,cbands[opt]);
  free(cbands);

  *r_nb = nsplits; *r_blocks = splits;

  /*PetscPrintf(comm,*/printf("Blocks found %d:",nsplits);
  for (row=0; row<=nsplits; row++) 
    /*PetscPrintf(comm,*/printf(" %d",splits[row]);
  /*PetscPrintf(comm,*/printf("\n");

  return 0;
}
