#define TRACING 0
#define RUGE 0
/*
   Defines a parallel multilevel preconditioner 
*/
#include "vec.h"
#include "src/vec/impls/mpi/pvecimpl.h"
#include "src/vec/utils/vpipe.h"
#include "parpre_pc.h"
#include "src/pc/pcparallel.h"
#include "src/pc/pcextra.h"
#include "src/sles/slesimpl.h"
#include "src/pc/pcimpl.h"
#include "options.h"
#include "src/mat/impls/aij/mpi/mpiaij.h"
#include "src/is/isimpl.h"

#define CHUNCKSIZE   100
#define MAXLEVELS 100

extern int PCParallelInitCommStruct(PC pc);
extern int MatMultAXBY_AIJ
(Scalar a, Mat aijin,Vec xx, Scalar b, Vec yy,Vec zz);
extern int MatMatMult_MPIAIJ(Mat A,Mat B,Mat *C);
extern int MatDiagonalScale_MPIAIJ(Mat A,Vec ll,Vec rr);
extern int MatMatSubtract_AIJ(Mat a, Mat b, Mat *c);
extern int MatMaxRowLen_MPIAIJ(Mat A, int *rowlen);
extern int MatMaxRowOffDiagElement_MPIAIJ(Mat A,Vec e);
extern int MatMaxColOffDiagElement_MPIAIJ(Mat A,Vec e);

struct OneLevel {
  int level,size1,size2,rstart,rend,local_size,global_size;
#if RUGE
  int isize,jsize;
#endif
  Mat mat; Vec diag,cdiag,cdiag1; 
  /* diagonal; id with lumping of small elements, id but 1-block only */
  MPI_Comm comm; IS indices,indices1;
  Mat g11,g12,g21,g22; /* global versions of same */
  Vec u,v,w,u1,v1,u2,v2,g1,g2,h1,h2;
  SLES a11_solve,a22_solve,pre_smooth,post_smooth;
  VecScatter get_clr,put_clr,get_rest,put_rest;
  AMLSolveScheme solve_scheme;
  AMLCoarseGridChoice grid_choice;
  AMLFillMethod fill_method;
  struct OneLevel *next_level;
} ;

typedef struct OneLevel MC_OneLevel_struct;

/* typedef enum {AMLFillNone, AMLFillDiag, AMLFillStrong, AMLFillFull } AMLFillMethod;*/
/* typedef enum {AMLSolveMG, AMLSolvePolynomial, AMLSolveILU} AMLSolveScheme;*/
/* typedef enum {AMLCoarseGridDependent, AMLCoarseGridIndependent} AMLCoarseGridChoice; */
/*typedef enum {AMLSmoothNone=0, AMLPreSmooth=1, AMLPostSmooth=2, AMLPrePostSmooth=3} AMLSmootherChoice;*/

typedef struct {
  PCPstruct par_info;
  MC_OneLevel_struct *level_stuff;
  AMLFillMethod fill_method;
  AMLSolveScheme solve_scheme;
  AMLCoarseGridChoice grid_choice;
  AMLSmootherChoice smoother_choice;
  SLES pre_smoother,post_smoother,last_solve;
  int my_high_level,the_high_level,cutoff,it11;
} PC_MCol_struct;

/****************************************************************
 * Auxiliary routines, may belong in some file else             *
 ****************************************************************/
#undef __FUNC__
#define __FUNC__ "ISGetGlobalContent"
static int ISGetGlobalContent(IS is,IS *gis)
{
  MPI_Comm comm = is->comm; IS nis;
  int np,n_loc,n,*n_ar,*idxs_loc,*idxs,*disp,ierr;

  MPI_Comm_size(comm,&np);

  /* make an array of local sizes of all processors concerned */
  n_ar = (int *) PetscMalloc(np*sizeof(int)); CHKPTRQ(n_ar);
  ierr = ISGetSize(is,&n_loc); CHKERRQ(ierr);
  MPI_Allgather((void *)&n_loc,1,MPI_INT,(void *)n_ar,1,MPI_INT,comm);

  /* compute processor offsets, total size, and allocate an array that big */
  disp = (int *) PetscMalloc((np+1)*sizeof(int)); CHKPTRQ(disp);
  {int i; disp[0] = 0; for (i=0; i<np; i++) disp[i+1] = disp[i]+n_ar[i];}
  n = disp[np];
  idxs = (int *) PetscMalloc( (n+1)*sizeof(int) ); CHKPTRQ(idxs);
  ierr = ISGetIndices(is,&idxs_loc); CHKERRQ(ierr);
  MPI_Allgatherv((void *)idxs_loc,n_loc,MPI_INT,
		 (void *)idxs,n_ar,disp,MPI_INT,comm);
  ierr = ISRestoreIndices(is,&idxs_loc); CHKERRQ(ierr);

  /* make actual index set */
  ierr = ISCreateGeneral(comm,n,idxs,&nis); CHKERRQ(ierr); *gis = nis;

  /* clean up */
  PetscFree(n_ar); PetscFree(disp); PetscFree(idxs);

  return 0;
}

#undef __FUNC__
#define __FUNC__ "MatrixAij2Mpi"
static int MatrixAij2Mpi(int part,Mat in,MPI_Comm comm,Mat *out)
{
  Mat res; int m,n,irow,rstart,rend,ierr;

  ierr = MatGetLocalSize(in,&m,&n); CHKERRQ(ierr);
  ierr = MatCreateMPIAIJ(comm,m,part,PETSC_DECIDE,PETSC_DECIDE,
			 0,0,0,0,&res); CHKERRQ(ierr);
  ierr = MatGetOwnershipRange(res,&rstart,&rend); CHKERRQ(ierr);
  for (irow=0; irow<m; irow++) {
    int row=rstart+irow,ncols,*cols; Scalar *vals;
    ierr = MatGetRow(in,irow,&ncols,&cols,&vals); CHKERRQ(ierr);
    ierr = MatSetValues(res,1,&row,ncols,cols,vals,INSERT_VALUES); CHKERRQ(ierr);
    ierr = MatRestoreRow(in,irow,&ncols,&cols,&vals); CHKERRQ(ierr);
  }
  ierr = MatAssemblyBegin(res,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
  ierr = MatAssemblyEnd(res,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);

  *out = res;
  return 0;
}

/****************************************************************
 * User Interface                                               *
 ****************************************************************/

#undef __FUNC__
#define __FUNC__ "AMLSetFillMethod"
int AMLSetFillMethod(PC pc,AMLFillMethod fill_method)
{
  PCType meth; int ierr;
  ierr = PCGetType(pc,&meth,PETSC_NULL); CHKERRQ(ierr);
  if (!(meth==PCMultiLevel)) SETERRQ(1,0,"Fill method can only be set for AML");
  {
    PC_MCol_struct *pc_data = (PC_MCol_struct *) pc->data;
    pc_data->fill_method = fill_method;
  }
  return 0;
}

#undef __FUNC__
#define __FUNC__ "AMLSetSolutionScheme"
int AMLSetSolutionScheme(PC pc,AMLSolveScheme solve_scheme)
{
  PCType meth; int ierr;
  ierr = PCGetType(pc,&meth,PETSC_NULL); CHKERRQ(ierr);
  if (!(meth==PCMultiLevel)) SETERRQ(1,0,"Solve scheme can only be set for AML");
  {
    PC_MCol_struct *pc_data = (PC_MCol_struct *) pc->data;
    pc_data->solve_scheme = solve_scheme;
  }
  return 0;
}

#undef __FUNC__
#define __FUNC__ "AMLSetCoarseGridDependent"
int AMLSetCoarseGridDependent(PC pc)
{
  PCType meth; int ierr;
  ierr = PCGetType(pc,&meth,PETSC_NULL); CHKERRQ(ierr);
  if (!(meth==PCMultiLevel)) SETERRQ(1,0,"Grid choice can only be set for AML");
  {
    PC_MCol_struct *pc_data = (PC_MCol_struct *) pc->data;
    pc_data->grid_choice = AMLCoarseGridDependent;
  }
  return 0;
}

#undef __FUNC__
#define __FUNC__ "AMLSetCoarseGridIndependent"
int AMLSetCoarseGridIndependent(PC pc)
{
  PCType meth; int ierr;
  ierr = PCGetType(pc,&meth,PETSC_NULL); CHKERRQ(ierr);
  if (!(meth==PCMultiLevel)) SETERRQ(1,0,"Grid choice can only be set for AML");
  {
    PC_MCol_struct *pc_data = (PC_MCol_struct *) pc->data;
    pc_data->grid_choice = AMLCoarseGridIndependent;
  }
  return 0;
}

#undef __FUNC__
#define __FUNC__ "AMLSetCutoffSize"
int AMLSetCutoffSize(PC pc,int siz)
{
  PCType meth; int ierr;
  ierr = PCGetType(pc,&meth,PETSC_NULL); CHKERRQ(ierr);
  if (!(meth==PCMultiLevel)) SETERRQ(1,0,"Cutoff sizes can only be set for AML");
  {
    PC_MCol_struct *pc_data = (PC_MCol_struct *) pc->data;
    pc_data->cutoff = siz;
  }
  return 0;
}

#undef __FUNC__
#define __FUNC__ "AMLSet11JacobiIterations"
int AMLSet11JacobiIterations(PC pc,int it)
{
  PCType meth; int ierr;
  ierr = PCGetType(pc,&meth,PETSC_NULL); CHKERRQ(ierr);
  if (!(meth==PCMultiLevel))
    SETERRQ(1,0,"11Block iterations can only be set for AML");
  {
    PC_MCol_struct *pc_data = (PC_MCol_struct *) pc->data;
    pc_data->it11 = it;
  }
  return 0;
}

#undef __FUNC__
#define __FUNC__ "AMLSetSmootherChoice"
int AMLSetSmootherChoice(PC pc,AMLSmootherChoice smoother)
{
  PCType meth; int ierr;
  ierr = PCGetType(pc,&meth,PETSC_NULL); CHKERRQ(ierr);
  if (!(meth==PCMultiLevel))
    SETERRQ(1,0,"Smoothers can only be set for AML");
  {
    PC_MCol_struct *pc_data = (PC_MCol_struct *) pc->data;
    pc_data->smoother_choice = smoother;
  }

  return 0;
}

#undef __FUNC__
#define __FUNC__ "PCMultiLevelGetPreSmoother"
int PCMultiLevelGetPreSmoother(PC pc,PC *local_pc)
{
  PC_MCol_struct *pc_data = (PC_MCol_struct *) pc->data;
  int ierr;

  ierr = SLESGetPC(pc_data->pre_smoother,local_pc); CHKERRQ(ierr);
  return 0;
}

#undef __FUNC__
#define __FUNC__ "PCMultiLevelGetPostSmoother"
int PCMultiLevelGetPostSmoother(PC pc,PC *local_pc)
{
  PC_MCol_struct *pc_data = (PC_MCol_struct *) pc->data;
  int ierr;

  ierr = SLESGetPC(pc_data->post_smoother,local_pc); CHKERRQ(ierr);
  return 0;
}

#undef __FUNC__
#define __FUNC__ "PCMultiLevelGetLastLevelSolver"
int PCMultiLevelGetLastLevelSolver(PC pc,PC *local_pc)
{
  PC_MCol_struct *pc_data = (PC_MCol_struct *) pc->data;
  int ierr;

  ierr = SLESGetPC(pc_data->last_solve,local_pc); CHKERRQ(ierr);
  return 0;
}

/****************************************************************
 * Inits and such
 ****************************************************************/

#undef __FUNC__
#define __FUNC__ "InitOneLevel"
static int InitOneLevel
(MC_OneLevel_struct *this_level,int lvl,Mat mat,IS idx,
#if RUGE
 int old_isize,int old_jsize,
#endif
 AMLSolveScheme solve_scheme,AMLCoarseGridChoice grid_choice,
 AMLFillMethod fill_method)
{
  int ierr;

  this_level->level = lvl; this_level->indices = idx;
  this_level->mat = mat; this_level->comm = mat->comm;

  ierr = MatGetOwnershipRange
    (mat,&(this_level->rstart),&(this_level->rend)); CHKERRQ(ierr);
  this_level->local_size = this_level->rend-this_level->rstart;
  MPI_Allreduce
    ((void*)&this_level->local_size,(void*)&this_level->global_size,
     1,MPI_INT,MPI_SUM,this_level->comm);
#if RUGE
  {
    int isize,jsize;
    if (lvl==0) {
      isize = (int)sqrt(this_level->global_size);
      jsize = this_level->global_size/isize;
    } else {
      if (lvl-2*(lvl/2)==0) {
	isize = old_isize/2; jsize = old_jsize;
      } else {
	isize = old_isize; jsize = old_jsize/2;
      }
    }
    if (isize*jsize!=this_level->global_size) SETERRQ(1,0,"Grid not divisible");
printf("Grid is %dx%d\n",isize,jsize);
    this_level->isize = isize; this_level->jsize = jsize;
  }
#endif

  this_level->pre_smooth = this_level->post_smooth = 0;
  this_level->solve_scheme = solve_scheme;
  this_level->grid_choice = grid_choice;
  this_level->fill_method = fill_method;
  {
    int s,ss;
    ierr = MatGetLocalSize(mat,&s,&ss); CHKERRQ(ierr);
    ierr = VecCreateMPI
      (mat->comm,s,PETSC_DECIDE,&(this_level->u)); CHKERRQ(ierr);
    ierr = VecDuplicate(this_level->u,&(this_level->v)); CHKERRQ(ierr);
    ierr = VecDuplicate(this_level->u,&(this_level->w)); CHKERRQ(ierr);
  }
/* !!! */
  ierr = VecDuplicate(this_level->u,&(this_level->diag)); CHKERRQ(ierr);

  return 0;
}

#if TRACING
#undef __FUNC__
#define __FUNC__ "LambdaOneEst"
static int LambdaOneEst(Mat mat,MC_OneLevel_struct *this_level)
{
  Scalar one=1.0, xax,xx;
  ierr = VecSet(&one,this_level->u); CHKERRQ(ierr);
  ierr = MatMult(mat,this_level->u,this_level->v); CHKERRQ(ierr);
  ierr = VecPointwiseMult(this_level->u,this_level->v,this_level->w);
  CHKERRQ(ierr);
  ierr = VecPointwiseMult(this_level->u,this_level->u,this_level->v);
  CHKERRQ(ierr);
  ierr = VecSum(this_level->w,&xax); CHKERRQ(ierr);
  ierr = VecSum(this_level->v,&xx); CHKERRQ(ierr);
  printf("lambda_1 approx: %e\n",xax/xx);
}
#endif
/****************************************************************
 * Colouring and finding independent sets                       *
 ****************************************************************/

#if !RUGE
#undef __FUNC__
#define __FUNC__ "FreeClr"
static int FreeClr(int nn,int *n)
{
  int ret=1,i;
 try:
  for (i=0; i<nn; i++)
    if (n[i]==ret) {ret++; goto try;}
  return ret;
}

#undef __FUNC__
#define __FUNC__ "LookAtThisRow"
static int LookAtThisRow
(int this_var, Scalar this_rand,int *idx,int nidx, int *colour_now,
 Scalar *ran,Scalar *clr,int *n,int *neigh)
{
  int j,other;

  for (j=0; j<nidx; j++) {
    other=idx[j];
    if (other==this_var) continue;
    if (ran[other]>this_rand) {
      if (clr[other]==0.) {
	*colour_now = 0;
	return 0;
      } else {
	neigh[(*n)++] = (int)clr[other];
      }
    }
  }
  return 0;
}

#undef __FUNC__
#define __FUNC__ "LookAtUncolouredVar"
static int LookAtUncolouredVar
(int this_var,Mat A,Mat B, int *colour_now,
 Scalar *clr_array,Scalar *clrb_array,Scalar *ran_array,Scalar *ranb_array,
 int *neighb,int *clr)
{
  int nzA,nzB, *pcA,*pcB, nneigh=0, ierr;

  ierr = MatGetRow(A,this_var,&nzA,&pcA,0); CHKERRQ(ierr);
  ierr = LookAtThisRow
    (this_var,ran_array[this_var], pcA,nzA, colour_now,ran_array,clr_array,
     &nneigh,neighb);
  CHKERRQ(ierr);
  ierr = MatRestoreRow(A,this_var,&nzA,&pcA,0); CHKERRQ(ierr);
  
  if (*colour_now) {
    ierr = MatGetRow(B,this_var,&nzB,&pcB,0); CHKERRQ(ierr);
    ierr = LookAtThisRow
      (-1,ran_array[this_var], pcB,nzB, colour_now,ranb_array,clrb_array,
       &nneigh,neighb);
    CHKERRQ(ierr);
    ierr = MatRestoreRow(B,this_var,&nzB,&pcB,0); CHKERRQ(ierr);
  }
  *clr = FreeClr(nneigh,neighb);

  return 0;
}

#undef __FUNC__
#define __FUNC__ "FindIndSet"
static int FindIndSet
(Mat mat,Vec randoms,Vec clrs,IS *clrd_vars,IS *rest_vars) 
{
  Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
  Scalar *ran_array,*ranb_array, *clr_array,*clrb_array, zero=0.0;
  Vec ran_bord,clr_bord = aij->lvec;
  int *neighb, rstart,rend,local_size,var,coloured, ierr;

  ierr = VecGetOwnershipRange(randoms,&rstart,&rend); CHKERRQ(ierr);
  local_size = rend-rstart;
  ierr = VecDuplicate(aij->lvec,&ran_bord); CHKERRQ(ierr);
  ierr = VecGetArray(randoms,&ran_array); CHKERRQ(ierr);
  ierr = VecGetArray(ran_bord,&ranb_array); CHKERRQ(ierr);
  ierr = VecGetArray(clrs,&clr_array); CHKERRQ(ierr);
  ierr = VecGetArray(clr_bord,&clrb_array); CHKERRQ(ierr);
  ierr = VecSet(&zero,clrs); CHKERRQ(ierr);
  ierr = VecSet(&zero,clr_bord); CHKERRQ(ierr);
  {
    int rl;
    ierr = MatMaxRowLen_MPIAIJ(mat,&rl); CHKERRQ(ierr);
    neighb = (int *) PetscMalloc(rl*sizeof(int)); CHKPTRQ(neighb);
  }
  /*>>>> Loop until completely coloured <<<<*/
  ierr = VecScatterBegin
    (randoms,ran_bord,INSERT_VALUES,SCATTER_FORWARD/*ALL*/,aij->Mvctx); CHKERRQ(ierr);
  ierr = VecScatterEnd
    (randoms,ran_bord,INSERT_VALUES,SCATTER_FORWARD/*ALL*/,aij->Mvctx); CHKERRQ(ierr);
  coloured = 0;
 pass:
  {
    int l_rem,g_rem;
    for (var=0; var<local_size; var++) {
      if (!clr_array[var]) {
	int colour_now = 1,clr;
	ierr = LookAtUncolouredVar
	  (var,aij->A,aij->B,&colour_now,clr_array,clrb_array,
	   ran_array,ranb_array,neighb,&clr); CHKERRQ(ierr);
	if (colour_now) {
	  coloured++; clr_array[var] = (Scalar) clr;
	}
      }
    }
    l_rem = local_size-coloured;
    MPI_Allreduce(&l_rem,&g_rem,1,MPI_INT,MPI_MAX,mat->comm);
    if (!g_rem) goto finished;
    if (g_rem<0) SETERRQ(1,0,"Cannot happen: negative points to colour");
    ierr = VecScatterBegin
      (clrs,clr_bord,INSERT_VALUES,SCATTER_FORWARD/*ALL*/,aij->Mvctx); CHKERRQ(ierr);
    ierr = VecScatterEnd
      (clrs,clr_bord,INSERT_VALUES,SCATTER_FORWARD/*ALL*/,aij->Mvctx); CHKERRQ(ierr);
    goto pass;
  }

 finished:
  ierr = VecRestoreArray(randoms,&ran_array); CHKERRQ(ierr);
  ierr = VecRestoreArray(ran_bord,&ranb_array); CHKERRQ(ierr);
  ierr = VecDestroy(ran_bord); CHKERRQ(ierr);
  {
    IndStash points_coloured,points_not_coloured;
    
    ierr = NewIndexStash(&points_coloured); CHKERRQ(ierr);
    ierr = NewIndexStash(&points_not_coloured); CHKERRQ(ierr);

    for (var=0; var<local_size; var++)
      if ((int)clr_array[var]==1) {
	ierr = StashIndex(points_coloured,1,&var);
	CHKERRQ(ierr);
      } else {
	ierr = StashIndex(points_not_coloured,1,&var);
	CHKERRQ(ierr);
      }

    {
      int i;
      for (i=0; i<points_coloured->n; i++)
	points_coloured->array[i] += rstart;
    }
    ierr = ISCreateGeneral
      (mat->comm,points_coloured->n,points_coloured->array,clrd_vars);
    CHKERRQ(ierr); ierr = DestroyIndexStash(points_coloured); CHKERRQ(ierr);
    {
      int i;
      for (i=0; i<points_not_coloured->n; i++)
	points_not_coloured->array[i] += rstart;
    }
    ierr = ISCreateGeneral
      (mat->comm,points_not_coloured->n,points_not_coloured->array,rest_vars);
    CHKERRQ(ierr);
    ierr = DestroyIndexStash(points_not_coloured); CHKERRQ(ierr);
  }
  
  return 0;
}
#endif

#if RUGE
#undef __FUNC__
#define __FUNC__ "FakeIndset"
static int FakeIndset(MC_OneLevel_struct *this_level,
		      IS *clrd_vars,IS *rest_vars)
{
  MPI_Comm comm = this_level->comm;
  int istep,jstep,ierr;
  IndStash points_coloured,points_not_coloured;

  ierr = NewIndexStash(&points_coloured); CHKERRQ(ierr);
  ierr = NewIndexStash(&points_not_coloured); CHKERRQ(ierr);
  for (istep=0; istep<this_level->isize; istep++)
    for (jstep=0; jstep<this_level->jsize; jstep++) {
      int num = istep*this_level->jsize+jstep;
      int lvl = this_level->level;
      
      if (num<this_level->rend & !(num<this_level->rstart)) {
	if (lvl-2*(lvl/2)==0) {
	  if (istep-2*(istep/2)+jstep-2*(jstep/2)!=1) {
	    ierr = StashIndex(points_coloured,1,&num); CHKERRQ(ierr);
	  } else {
	    ierr = StashIndex(points_not_coloured,1,&num); CHKERRQ(ierr);
	  }
	} else {
	  if (istep-2*(istep/2)==0) {
	    ierr = StashIndex(points_coloured,1,&num); CHKERRQ(ierr);
	  } else {
	    ierr = StashIndex(points_not_coloured,1,&num); CHKERRQ(ierr);
	  }
	}
      }
    }
  ierr = ISCreateGeneral
    (comm,points_coloured->n,points_coloured->array,clrd_vars);
  CHKERRQ(ierr); ierr = DestroyIndexStash(points_coloured); CHKERRQ(ierr);
  ierr = ISCreateGeneral
    (comm,points_not_coloured->n,points_not_coloured->array,rest_vars);
  CHKERRQ(ierr); ierr = DestroyIndexStash(points_not_coloured); CHKERRQ(ierr);
  
  return 0;
}
#endif

#undef __FUNC__
#define __FUNC__ "SplitClrAndRest"
static int SplitClrAndRest(MC_OneLevel_struct *this_level,Mat mat,
			   IS *clr,IS *rest)
{/* create random vector, derive one independent set */
  IS clr_set,rest_set;
#if !RUGE
  PetscRandom rctx;
#endif
  int ierr;
  
#if !RUGE
  ierr = PetscRandomCreate(MPI_COMM_SELF,RANDOM_DEFAULT,&rctx); CHKERRQ(ierr);
  ierr = VecSetRandom(rctx,this_level->u); CHKERRQ(ierr);

  ierr = FindIndSet
    (mat,this_level->u,this_level->v,&clr_set,&rest_set); CHKERRQ(ierr);

  ierr = PetscRandomDestroy(rctx); CHKERRQ(ierr);
#endif
#if RUGE
  ierr = FakeIndset(this_level,&clr_set,&rest_set); CHKERRQ(ierr);
#endif
  *clr = clr_set; *rest = rest_set;

  return 0;
}

#undef __FUNC__
#define __FUNC__ "SplitOffOneLevel"
int SplitOffOneLevel
(MC_OneLevel_struct *this_level,Mat mat,
 IS *set1,IS *set2,IS *set2_g,int *global_r)
{
  IS clr,rest; int local_r,local_c,ierr;

  ierr = SplitClrAndRest(this_level,mat,&clr,&rest); CHKERRQ(ierr);
/*printf("Independent set on level %d\n",this_level->level);ISView(clr,0);*/
  ierr = ISGetSize(clr,&local_c); CHKERRQ(ierr);
  ierr = ISGetSize(rest,&local_r); CHKERRQ(ierr);
  MPI_Allreduce(&local_r,global_r,1,MPI_INT,MPI_SUM,this_level->comm);
  CHKERRQ(ierr);
  if (*global_r==0 | this_level->grid_choice==AMLCoarseGridDependent) {
    *set1 = clr; *set2 = rest;
    this_level->size1 = local_c; this_level->size2 = local_r; 
 } else {
    *set1 = rest; *set2 = clr;
    this_level->size1 = local_r; this_level->size2 = local_c;
  }
printf("Colour %d has %d points, %d points remaining\n",
       this_level->level,this_level->size1,this_level->size2);

  {/* get the global indices of the next set */
    int s,*i,*ig1,*ig2,*ig,var;

    s = this_level->size2;
    ig2 = (int *) PetscMalloc((s+1)*sizeof(int)); CHKPTRQ(ig2);
    ierr = ISGetIndices(*set2,&i); CHKERRQ(ierr);
    ierr = ISGetIndices(this_level->indices,&ig); CHKERRQ(ierr);
    for (var=0; var<s; var++) ig2[var] = ig[i[var]-this_level->rstart];
    ierr = ISRestoreIndices(*set2,&i); CHKERRQ(ierr);
    ierr = ISCreateGeneral(this_level->comm,s,ig2,set2_g); CHKERRQ(ierr);

    s = this_level->size1;
    ig1 = (int *) PetscMalloc((s+1)*sizeof(int)); CHKPTRQ(ig1);
    ierr = ISGetIndices(*set1,&i); CHKERRQ(ierr);
    for (var=0; var<s; var++) ig1[var] = ig[i[var]-this_level->rstart];
    ierr = ISRestoreIndices(*set1,&i); CHKERRQ(ierr);
    ierr = ISRestoreIndices(this_level->indices,&ig); CHKERRQ(ierr);
    ierr = ISCreateGeneral(this_level->comm,s,ig1,&(this_level->indices1));
    CHKERRQ(ierr);
  }
  return 0;
}

/****************************************************************
 * Strong and weak connections                                  *
 ****************************************************************/
#undef __FUNC__
#define __FUNC__ "MakeStrongMatrix"
static int MakeStrongMatrix(MC_OneLevel_struct *this_level,Mat *strong_matrix)
{
  Mat mat = this_level->mat,strong_mat;
  MPI_Comm comm = this_level->comm;
  int rstart = this_level->rstart,local_size = this_level->local_size;
  int iRow,ierr;
#if TRACING
  int K=0,D=0;
#endif
  Scalar zero=0.0;
  
#define WEIGH 1
  Mat_MPIAIJ  *Aij = (Mat_MPIAIJ *) mat->data;
  Mat A = Aij->A, B = Aij->B;
  Scalar *rA,*cA,*cB,weight=.45;
  
  /* compare strong connections to max off diag element per row */
  ierr = MatMaxRowOffDiagElement_MPIAIJ(mat,this_level->u); CHKERRQ(ierr);
  ierr = VecSet(&zero,this_level->v); CHKERRQ(ierr);
  ierr = MatMaxColOffDiagElement_MPIAIJ(mat,this_level->v); CHKERRQ(ierr);
  ierr = VecScatterBegin(this_level->v,Aij->lvec,INSERT_VALUES,
			 SCATTER_FORWARD/*ALL*/,Aij->Mvctx); CHKERRQ(ierr);
  ierr = VecScatterEnd(this_level->v,Aij->lvec,INSERT_VALUES,
		       SCATTER_FORWARD/*ALL*/,Aij->Mvctx); CHKERRQ(ierr);
  ierr = VecGetArray(this_level->u,&rA); CHKERRQ(ierr);
  ierr = VecGetArray(this_level->v,&cA); CHKERRQ(ierr);
  ierr = VecGetArray(Aij->lvec,&cB); CHKERRQ(ierr);
  
  ierr = MatGetLocalSize(mat,&local_size,&iRow); CHKERRQ(ierr);
  ierr = MatCreateMPIAIJ(comm,local_size,local_size,
			 PETSC_DECIDE,PETSC_DECIDE,4,0,2,0, &strong_mat);
  CHKERRQ(ierr);
  for (iRow=0; iRow<local_size; iRow++) {
    int Row=rstart+iRow,ncols,*cols,iCol,auto_accept=1;
    Scalar *vals,d;
    /* get the row for as far as it's in A */
    ierr = MatGetRow(A,iRow,&ncols,&cols,&vals); CHKERRQ(ierr);
    if (ncols>5) auto_accept = 0;
    for (iCol=0; iCol<ncols; iCol++) {
      int Col=rstart+cols[iCol]; Scalar v=vals[iCol];
      if (WEIGH) d=sqrt(rA[iRow]*cA[cols[iCol]]);
      if (auto_accept | Col==Row | fabs(v)>weight*d) {
#if TRACING
	K++;
#endif
	ierr = MatSetValues
	  (strong_mat,1,&Row,1,&Col,&v,ADD_VALUES); CHKERRQ(ierr);
      } else {
#if TRACING
	D++;
#endif
	ierr = MatSetValues
	  (strong_mat,1,&Row,1,&Row,&v,ADD_VALUES); CHKERRQ(ierr);
      }
    }
    ierr = MatRestoreRow(A,iRow,&ncols,&cols,&vals); CHKERRQ(ierr);
    /* get the row for as far as it's in B */
    ierr = MatGetRow(B,iRow,&ncols,&cols,&vals); CHKERRQ(ierr);
    for (iCol=0; iCol<ncols; iCol++) {
      int Col=Aij->garray[cols[iCol]]; Scalar v=vals[iCol];
      if (WEIGH) d=sqrt(rA[iRow]*cB[cols[iCol]]);
      if (fabs(v)>weight*d) {
#if TRACING
	K++;
#endif
	ierr = MatSetValues
	  (strong_mat,1,&Row,1,&Col,vals+iCol,ADD_VALUES); CHKERRQ(ierr);
      } else {
#if TRACING
	D++;
#endif
	ierr = MatSetValues
	  (strong_mat,1,&Row,1,&Row,vals+iCol,ADD_VALUES); CHKERRQ(ierr);
      }
    }
    ierr = MatRestoreRow(B,iRow,&ncols,&cols,&vals); CHKERRQ(ierr);
  }
  ierr = VecRestoreArray(this_level->u,&rA); CHKERRQ(ierr);
  ierr = VecRestoreArray(this_level->v,&cA); CHKERRQ(ierr);
  ierr = VecRestoreArray(Aij->lvec,&cB); CHKERRQ(ierr);
  ierr = MatAssemblyBegin(strong_mat,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
  ierr = MatAssemblyEnd(strong_mat,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
  *strong_matrix = strong_mat;
/*printf("strong matrix\n"); MatView(strong_mat,0);*/

  ierr = VecDuplicate(this_level->diag,&(this_level->cdiag)); CHKERRQ(ierr);
  ierr = MatGetDiagonal(strong_mat,this_level->cdiag);
  CHKERRQ(ierr);
  ierr = VecReciprocal(this_level->cdiag); CHKERRQ(ierr);

#if TRACING
  {int b,a; MatMaxRowLen_MPIAIJ(this_level->mat,&b);
   MatMaxRowLen_MPIAIJ(strong_mat,&a); if(!K)K=1;
   printf("Row lengths original=%d, strong=%d; discard %d=%d pct\n",
	  b,a,D,100*D/(D+K));
 }
#endif
  
  return 0;
}

/****************************************************************
 * Sub solvers
 ****************************************************************/
extern int CopySLES(SLES old,SLES *new);

#undef __FUNC__
#define __FUNC__ "SetSmoothers"
static int SetSmoothers
(AMLSmootherChoice smoother_choice,MC_OneLevel_struct *this_level,
 SLES pre,SLES post,char *prefix)
{
  MPI_Comm comm = this_level->comm;
  int ierr;

/*
  ierr = PCCreate(comm,&(this_level->global_solve)); CHKERRQ(ierr);
  ierr = PCSetOperators(this_level->global_solve,
			this_level->mat,this_level->mat,0); CHKERRQ(ierr);
  ierr = PCSetType(this_level->global_solve,PCBJACOBI); CHKERRQ(ierr);
  ierr = PCSetVector(this_level->global_solve,this_level->u); CHKERRQ(ierr);
  ierr = PCSetUp(this_level->global_solve); CHKERRQ(ierr);
*/

  /* == create the smoothers == */
  if (smoother_choice) {
    PC local_pc; KSP subksp;

    ierr = SLESCreate
      (this_level->comm,&(this_level->pre_smooth)); CHKERRQ(ierr);
    ierr = SLESGetKSP(this_level->pre_smooth,&subksp); CHKERRQ(ierr);
    ierr = KSPSetType(subksp,KSPPREONLY); CHKERRQ(ierr);
    ierr = SLESSetOptionsPrefix
      (this_level->pre_smooth,prefix); CHKERRQ(ierr);
    ierr = SLESAppendOptionsPrefix
      (this_level->pre_smooth,"presmoother_"); CHKERRQ(ierr);
    ierr = SLESSetFromOptions(this_level->pre_smooth); CHKERRQ(ierr);

    ierr = SLESCreate
      (this_level->comm,&(this_level->post_smooth)); CHKERRQ(ierr);
    ierr = SLESGetKSP(this_level->post_smooth,&subksp); CHKERRQ(ierr);
    ierr = KSPSetType(subksp,KSPPREONLY); CHKERRQ(ierr);
    ierr = SLESSetOptionsPrefix
      (this_level->post_smooth,prefix); CHKERRQ(ierr);
    ierr = SLESAppendOptionsPrefix
      (this_level->post_smooth,"postsmoother_"); CHKERRQ(ierr);
    ierr = SLESSetFromOptions(this_level->post_smooth); CHKERRQ(ierr);

    if (smoother_choice && AMLPreSmooth) {
      ierr = SLESSetOperators
	(this_level->pre_smooth,this_level->mat,this_level->mat,0);
      CHKERRQ(ierr);
    }
    if (smoother_choice && AMLPostSmooth) {
      ierr = SLESSetOperators
	(this_level->post_smooth,this_level->mat,this_level->mat,0);
      CHKERRQ(ierr);
    }
  }

  return 0;
}

#undef __FUNC__
#define __FUNC__ "SetupLastLevel"
static int SetupLastLevel
(MC_OneLevel_struct *this_level,Mat last_mat,SLES last_solve)
{
  PC local_pc; int ierr;

  ierr = CopySLES(last_solve,&(this_level->a22_solve)); CHKERRQ(ierr);
  ierr = SLESSetOperators(this_level->a22_solve,last_mat,last_mat,0);
  CHKERRQ(ierr);
/*printf("Deep matrix\n");MatView(last_mat,0);*/
  ierr = SLESGetPC(this_level->a22_solve,&local_pc); CHKERRQ(ierr);
  ierr = PCSetVector(local_pc,this_level->g2); CHKERRQ(ierr);
  ierr = PCSetUp(local_pc); CHKERRQ(ierr);

  return 0;
}

struct _DirectPC {Mat cof; Mat xpl; Vec res; Vec dif; int it;};
typedef struct _DirectPC* DirectPC;
#undef __FUNC__
#define __FUNC__ "DirectPCApply"
static int DirectPCApply(void *ptr,Vec in, Vec out)
{
  Mat cof = ((DirectPC)ptr)->cof;
  Mat xpl = ((DirectPC)ptr)->xpl;
  int max_it = ((DirectPC)ptr)->it;
  Vec res = ((DirectPC)ptr)->res, dif = ((DirectPC)ptr)->dif;
  int it,ierr; Scalar mone = -1.0;

  /* for it==0, start with a zero guess */
  ierr = MatMult(xpl,in,out); CHKERRQ(ierr);

  /* now the higher iterations */
  for (it=1; it<max_it; it++) {
    ierr = MatMult(cof,out,res); CHKERRQ(ierr);
    ierr = VecAXPY(&mone,in,res); CHKERRQ(ierr);
    ierr = MatMult(xpl,res,dif); CHKERRQ(ierr);
    ierr = VecAXPY(&mone,dif,out); CHKERRQ(ierr);
  }

  return 0;
}

#undef __FUNC__
#define __FUNC__ "Setup11Solve"
static int Setup11Solve
(PCType a11_solve_type,MC_OneLevel_struct *this_level,int it11,Mat inv)
{
  MPI_Comm comm = this_level->comm;
  PC local_pc; int ierr;
  /* separate solver on F points */
  ierr = PCParallelInstallSubSolve(comm,&(this_level->a11_solve));
  CHKERRQ(ierr);
  ierr = SLESSetOperators
    (this_level->a11_solve,this_level->g11,this_level->g11,0);
  CHKERRQ(ierr);
  ierr = SLESGetPC(this_level->a11_solve,&local_pc); CHKERRQ(ierr);
  if (0) {
    ierr = PCSetType(local_pc,a11_solve_type); CHKERRQ(ierr);
  } else {
    DirectPC pc_struct;
    ierr = PCSetType(local_pc,PCSHELL); CHKERRQ(ierr);
    pc_struct = (DirectPC) PetscMalloc(sizeof(struct _DirectPC));
    CHKPTRQ(pc_struct);
    pc_struct->cof = this_level->g11; pc_struct->xpl = inv;
    pc_struct->it = it11;
    ierr = VecDuplicate(this_level->g1,&(pc_struct->res)); CHKERRQ(ierr);
    ierr = VecDuplicate(this_level->g1,&(pc_struct->dif)); CHKERRQ(ierr);
    ierr = PCShellSetApply(local_pc,&DirectPCApply,(void*)pc_struct);
    CHKERRQ(ierr);
  }
  ierr = PCSetVector(local_pc,this_level->g1); CHKERRQ(ierr);
  ierr = PCSetUp(local_pc); CHKERRQ(ierr);
/*  ierr = MatDestroy(inv); CHKERRQ(ierr);*/
  return 0;
}

/****************************************************************
 * Submatrix handling                                           *
 ****************************************************************/
#undef __FUNC__
#define __FUNC__ "ApxInv"
static int ApxInv(MC_OneLevel_struct *this_level,int it11,Mat *apx)
{
  Mat mat = this_level->g11,off,inv;
  MPI_Comm comm = this_level->comm;
  int rstart,rend,loc,ierr;
  
  ierr = MatGetDiagonal(mat,this_level->g1); CHKERRQ(ierr);
  ierr = VecReciprocal(this_level->u1); CHKERRQ(ierr);

  ierr = MatGetOwnershipRange(mat,&rstart,&rend); CHKERRQ(ierr);
  loc = rend-rstart;

  /* set up scaled off diagonal part */
  ierr = MatCreateMPIAIJ(comm,loc,loc,PETSC_DECIDE,PETSC_DECIDE,
			 0,0,0,0,&off); CHKERRQ(ierr);
  {
    int iRow;
    for (iRow=0; iRow<loc; iRow++) {
      int Row=iRow+rstart,ncols,*cols,iCol; Scalar *vals;
      ierr = MatGetRow(mat,Row,&ncols,&cols,&vals); CHKERRQ(ierr);
      for (iCol=0; iCol<ncols; iCol++) {
	int Col=cols[iCol]/*+rstart*/; Scalar v = -vals[iCol];
	if (Col!=Row) {
	  ierr = MatSetValues(off,1,&Row,1,&Col,&v,INSERT_VALUES);
	  CHKERRQ(ierr);
	}
      }
      ierr = MatRestoreRow(mat,Row,&ncols,&cols,&vals); CHKERRQ(ierr);
    }
  }
  ierr = MatAssemblyBegin(off,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
  ierr = MatAssemblyEnd(off,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
  ierr = MatDiagonalScale_MPIAIJ(off,this_level->g1,0); CHKERRQ(ierr);

  /* iterate to form approximate inverse */
  {
    Mat tmp; int it=it11-1,iRow;
    ierr = MatConvert(off,MATSAME,&inv); CHKERRQ(ierr);
  loop:
    for (iRow=0; iRow<loc; iRow++) {
      int Row=iRow+rstart; Scalar one=1.0;
      ierr = MatSetValues(inv,1,&Row,1,&Row,&one,ADD_VALUES); CHKERRQ(ierr);
    }
    ierr = MatAssemblyBegin(inv,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
    ierr = MatAssemblyEnd(inv,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
    if (it) {/* iterate */
      ierr = MatMatMult_MPIAIJ(off,inv,&tmp); CHKERRQ(ierr);
      ierr = MatDestroy(inv); CHKERRQ(ierr);
      ierr = MatConvert(tmp,MATSAME,&inv); CHKERRQ(ierr);
      ierr = MatDestroy(tmp); CHKERRQ(ierr);
      it--; goto loop;
    }
  }
  ierr = MatDiagonalScale_MPIAIJ(inv,0,this_level->g1); CHKERRQ(ierr);

  {
    Vec t1; Scalar mone = -1.0;

    ierr = VecDuplicate(this_level->g1,&t1); CHKERRQ(ierr);

    /* t1 <- A_{12} e */
    ierr = VecSet(&mone,this_level->g2); CHKERRQ(ierr);
    ierr = MatMult(this_level->g12,this_level->g2,t1);
    CHKERRQ(ierr);

    {/* Get the (generalised) row sums of A^{-1} in g1/u1 */
      PC pc;
      ierr = PCCreate(comm,&pc); CHKERRQ(ierr);
      ierr = PCSetType(pc,PCSOR); CHKERRQ(ierr);
      ierr = PCSORSetSymmetric(pc,SOR_LOCAL_SYMMETRIC_SWEEP); CHKERRQ(ierr);
      ierr = PCSORSetIterations(pc,3); CHKERRQ(ierr);
      ierr = PCSetOperators(pc,mat,mat,0); CHKERRQ(ierr);
      ierr = PCSetVector(pc,this_level->g1); CHKERRQ(ierr);
      ierr = PCSetUp(pc); CHKERRQ(ierr);
      ierr = PCApply(pc,t1,this_level->g1); CHKERRQ(ierr);
      ierr = PCDestroy(pc); CHKERRQ(ierr);
    }
    {/* get the row sums of the approx inv in h1/v1, and compensate */
      Scalar *v; int iRow;

      ierr = MatMult(inv,t1,this_level->h1); CHKERRQ(ierr);
      ierr = VecAXPY(&mone,this_level->h1,this_level->g1); CHKERRQ(ierr);
      ierr = VecPointwiseDivide(this_level->g1,t1,this_level->h1);
      CHKERRQ(ierr);
/*printf("compensation\n");VecView(this_level->h1,0);*/

      ierr = VecGetArray(this_level->h1,&v); CHKERRQ(ierr);
      for (iRow=0; iRow<loc; iRow++) {
	int Row=iRow+rstart;
	ierr = MatSetValues(inv,1,&Row,1,&Row,v+iRow,ADD_VALUES);
	CHKERRQ(ierr);
      }
      ierr = VecRestoreArray(this_level->h1,&v); CHKERRQ(ierr);
      ierr = MatAssemblyBegin(inv,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
      ierr = MatAssemblyEnd(inv,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
    }
    ierr = VecDestroy(t1); CHKERRQ(ierr);
  }
  
  *apx = inv;

/*printf("matrix to be approximately inverted\n"); MatView(mat,0); MPI_Barrier(comm);*/
/*printf("approximate inverse matrix\n"); MatView(*apx,0); MPI_Barrier(comm);*/
  return 0;
}

#undef __FUNC__
#define __FUNC__ "ComputeFillMatrix"
static int ComputeFillMatrix
(MC_OneLevel_struct *this_level,int it11,Mat *fill,Mat *inv)
{
  int ierr;

  if (this_level->fill_method != AMLFillNone) {/* construct the fill matrix */
/*
    Mat t12,t21=this_level->g21;
    ierr = MatConvert(this_level->g12,MATSAME,&t12); CHKERRQ(ierr);
    ierr = MatDiagonalScale_MPIAIJ
      (t12,this_level->cdiag1,0); CHKERRQ(ierr);
    ierr = MatMatMult_MPIAIJ(t21,t12,fill); CHKERRQ(ierr);
    ierr = MatDestroy(t12); CHKERRQ(ierr);
*/
    Mat tmp;
    ierr = ApxInv(this_level,it11,inv); CHKERRQ(ierr);
    ierr = MatMatMult_MPIAIJ(*inv,this_level->g12,&tmp); CHKERRQ(ierr);
    ierr = MatMatMult_MPIAIJ(this_level->g21,tmp,fill); CHKERRQ(ierr);
    ierr = MatDestroy(tmp); CHKERRQ(ierr);
  }
  return 0;
}

#undef __FUNC__
#define __FUNC__ "NextMatFill"
static int NextMatFill(Mat res_mat,Mat fill,AMLFillMethod meth)
{
  MPI_Comm comm = res_mat->comm;
  int rstart,rend,loc,ierr;

  if (meth == AMLFillNone) SETERRQ(1,0,"NextMatFill: should have been caught");

  ierr = MatGetOwnershipRange(fill,&rstart,&rend); CHKERRQ(ierr);
  loc = rend-rstart;

  if (meth == AMLFillDiag) {
    Vec d; Scalar *v; int iRow;
    /* extract only the diagonal */
    ierr = VecCreateMPI(comm,loc,PETSC_DECIDE,&d); CHKERRQ(ierr);
    ierr = MatGetDiagonal(fill,d); CHKERRQ(ierr);
    ierr = VecGetArray(d,&v); CHKERRQ(ierr);
  
    for (iRow=0; iRow<loc; iRow++) {
      int Row=rstart+iRow; Scalar val=-v[iRow];
      ierr = MatSetValues(res_mat,1,&Row,1,&Row,&val,ADD_VALUES);
      CHKERRQ(ierr);
    }
    ierr = VecRestoreArray(d,&v); CHKERRQ(ierr);
    ierr = VecDestroy(d); CHKERRQ(ierr);
  } else if (meth == AMLFillStrong) {
    int iRow;
    for (iRow=0; iRow<loc; iRow++) {
      int Row=iRow+rstart,ncols,*cols,iCol,mc=-1;
      Scalar *vals,mv = 0.0;
      ierr = MatGetRow(fill,Row,&ncols,&cols,&vals); CHKERRQ(ierr);
      for (iCol=0; iCol<ncols; iCol++) {
	Scalar val = -vals[iCol];
	if (cols[iCol]==Row) {
	  ierr = MatSetValues(res_mat,1,&Row,1,&Row,&val,ADD_VALUES);
	  CHKERRQ(ierr);
	} else {
	  if (-val > -mv) {mv = val; mc = iCol;}
	}
      }
      if (ncols>0 & mc>-1) {
	if (cols[mc]>Row) {
	  ierr = MatSetValues(res_mat,1,&Row,1,cols+mc,&mv,ADD_VALUES);
	  CHKERRQ(ierr);
	  ierr = MatSetValues(res_mat,1,cols+mc,1,&Row,&mv,ADD_VALUES);
	  CHKERRQ(ierr);
	}
      }
      ierr = MatRestoreRow(fill,iRow,&ncols,&cols,&vals); CHKERRQ(ierr);
    }
  } else if (meth == AMLFillFull) {
    int iRow;
    for (iRow=0; iRow<loc; iRow++) {
      int Row=iRow+rstart,ncols,*cols,iCol; Scalar *vals;
      ierr = MatGetRow(fill,Row,&ncols,&cols,&vals); CHKERRQ(ierr);
      for (iCol=0; iCol<ncols; iCol++) {
	Scalar val = -vals[iCol]/*-fabs(.95*vals[iCol])*/; int Col=cols[iCol];
	ierr = MatSetValues(res_mat,1,&Row,1,&Col,&val,ADD_VALUES);
	CHKERRQ(ierr);
      }
      ierr = MatRestoreRow(fill,iRow,&ncols,&cols,&vals); CHKERRQ(ierr);
    }
  } else SETERRQ(1,0,"NextMatFill: unknown method");
  ierr = MatAssemblyBegin(res_mat,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
  ierr = MatAssemblyEnd(res_mat,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);

  return 0;
}

/* decide whether we are going to have more levels */
#undef __FUNC__
#define __FUNC__ "ThisLevelContinueDecide"
static int ThisLevelContinueDecide
(MC_OneLevel_struct *this_level,int *more_levels,int cutoff)
{
  int local_next = this_level->size2;
  int local_test = local_next, more_matrix;

  if ((cutoff>0) && (local_test<cutoff)) local_test = 0;
  MPI_Allreduce((void *)&local_test,(void *)more_levels,
		1,MPI_INT,MPI_SUM,this_level->comm);
/*printf("local test=%d, global test=%d; cutoff=%d\n",local_test,*more_levels,cutoff);*/
  MPI_Allreduce((void *)&local_next,(void *)&more_matrix,
		1,MPI_INT,MPI_SUM,this_level->comm);
  if (*more_levels<more_matrix) more_levels = 0;
  if (!more_matrix)
    SETERRQ(1,0,"Should have been caught: no 2-block in ExtractBlocks");

  return 0;
}


#undef __FUNC__
#define __FUNC__ "ExtractBlocks"
static int ExtractBlocks
(IS clr,IS rest,MC_OneLevel_struct *this_level,Mat smat)
{
  Mat mat = this_level->mat;
  IS grest,gclr,*xis,*yis; Mat *res_mat; MPI_Comm comm = mat->comm;
  int loc=this_level->size1,local_next=this_level->size2,ierr;

  ierr = ISGetGlobalContent(clr,&gclr); CHKERRQ(ierr);
  ierr = ISGetGlobalContent(rest,&grest); CHKERRQ(ierr); 

  xis = (IS *) PetscMalloc(4*sizeof(IS)); CHKPTRQ(xis);
  yis = (IS *) PetscMalloc(4*sizeof(IS)); CHKPTRQ(yis);
  xis[0] = clr;   xis[1] = rest; xis[2] = clr;  xis[3] = rest;
  yis[0] = grest; yis[1] = gclr; yis[2] = gclr; yis[3] = grest;

  /* get the subblocks */
  ierr = MatGetSubMatrices(smat,2,xis,yis,MAT_INITIAL_MATRIX,
			   &res_mat); CHKERRQ(ierr);
  ierr = MatrixAij2Mpi(local_next,res_mat[0],comm,&(this_level->g12));
  CHKERRQ(ierr);
  ierr = MatrixAij2Mpi(loc,res_mat[1],comm,&(this_level->g21));
  CHKERRQ(ierr);
  ierr = MatDestroy(res_mat[0]); CHKERRQ(ierr);
  ierr = MatDestroy(res_mat[1]); CHKERRQ(ierr);

  ierr = MatGetSubMatrices(mat,2,xis+2,yis+2,MAT_INITIAL_MATRIX,
			   &res_mat); CHKERRQ(ierr);
  ierr = MatrixAij2Mpi(loc,res_mat[0],comm,&(this_level->g11));
  CHKERRQ(ierr);
  ierr = MatrixAij2Mpi(local_next,res_mat[1],comm,&(this_level->g22));
  CHKERRQ(ierr);
  ierr = MatDestroy(res_mat[0]); CHKERRQ(ierr);
  ierr = MatDestroy(res_mat[1]); CHKERRQ(ierr);

  ierr = ISDestroy(gclr); CHKERRQ(ierr);
  ierr = ISDestroy(grest); CHKERRQ(ierr);
  PetscFree(res_mat);

  return 0;
}

#undef __FUNC__
#define __FUNC__ "SetupSubVectors"
static int SetupSubVectors(IS clr,IS rest,MC_OneLevel_struct *this_level)
{
  MPI_Comm comm = this_level->comm;
  int ierr;

  {/* sub vectors */
    int s1,s2; Scalar *arr;
    ierr = ISGetSize(clr,&s1); CHKERRQ(ierr);
    ierr = ISGetSize(rest,&s2); CHKERRQ(ierr);
    ierr = VecCreateSeq(MPI_COMM_SELF,s1,&(this_level->u1)); CHKERRQ(ierr);
    ierr = VecCreateSeq(MPI_COMM_SELF,s2,&(this_level->u2)); CHKERRQ(ierr);
    ierr = VecCreateMPI(comm,s1,PETSC_DECIDE,&(this_level->g1));
    CHKERRQ(ierr);
    ierr = VecCreateMPI(comm,s2,PETSC_DECIDE,&(this_level->g2));
    CHKERRQ(ierr);
    ierr = VecDuplicate(this_level->u1,&(this_level->v1)); CHKERRQ(ierr);
    ierr = VecDuplicate(this_level->u2,&(this_level->v2)); CHKERRQ(ierr);
    ierr = VecDuplicate(this_level->g1,&(this_level->h1)); CHKERRQ(ierr);
    ierr = VecDuplicate(this_level->g2,&(this_level->h2)); CHKERRQ(ierr);
    ierr = VecDuplicate(this_level->u1,&(this_level->cdiag1)); CHKERRQ(ierr);
    /* alias some local and global vectors. Hmmmmmmmmm ....... */
    ierr = VecGetArray(this_level->u1,&arr); CHKERRQ(ierr);
    ierr = VecPlaceArray(this_level->g1,arr); CHKERRQ(ierr);
    ierr = VecGetArray(this_level->u2,&arr); CHKERRQ(ierr);
    ierr = VecPlaceArray(this_level->g2,arr); CHKERRQ(ierr);
    ierr = VecGetArray(this_level->v1,&arr); CHKERRQ(ierr);
    ierr = VecPlaceArray(this_level->h1,arr); CHKERRQ(ierr);
    ierr = VecGetArray(this_level->v2,&arr); CHKERRQ(ierr);
    ierr = VecPlaceArray(this_level->h2,arr); CHKERRQ(ierr);
    /* gather/scatter to sub vectors */
    {
      IS contig_1,contig_2;
      ierr = ISCreateStride(MPI_COMM_SELF,s1,0,1,&contig_1); CHKERRQ(ierr);
      ierr = ISCreateStride(MPI_COMM_SELF,s2,0,1,&contig_2); CHKERRQ(ierr);
      ierr = VecScatterCreate(this_level->u,clr,this_level->u1,contig_1,
			      &(this_level->get_clr)); CHKERRQ(ierr);
      ierr = VecScatterCreate(this_level->u1,contig_1,this_level->u,clr,
			      &(this_level->put_clr)); CHKERRQ(ierr);
      ierr = VecScatterCreate(this_level->u,rest,this_level->u2,contig_2,
			      &(this_level->get_rest)); CHKERRQ(ierr);
      ierr = VecScatterCreate(this_level->u2,contig_2,this_level->u,rest,
			      &(this_level->put_rest)); CHKERRQ(ierr);
      ierr = ISDestroy(contig_1); CHKERRQ(ierr);
      ierr = ISDestroy(contig_2); CHKERRQ(ierr);
    }
  }
  /* use the scatter contexts to get the diagonal on the 1-block */
  ierr = VecScatterBegin(this_level->cdiag,this_level->cdiag1,
			 INSERT_VALUES,SCATTER_FORWARD/*ALL*/,this_level->get_clr);
  CHKERRQ(ierr);
  ierr = VecScatterEnd(this_level->cdiag,this_level->cdiag1,
		       INSERT_VALUES,SCATTER_FORWARD/*ALL*/,this_level->get_clr);
  CHKERRQ(ierr);
  return 0;
}

#undef __FUNC__
#define __FUNC__ "SetupOneLevel"
static int SetupOneLevel
(Mat mat,IS idx,PCType local_pctype,int lvl,
#if RUGE
 int isize,int jsize,
#endif
 char *prefix,
 AMLFillMethod fill_method,AMLSolveScheme solve_scheme,
 AMLCoarseGridChoice grid_choice,AMLSmootherChoice smoothers,
 SLES pre,SLES post,SLES last_solve,
 int cutoff,int it11,
 MC_OneLevel_struct **return_level,int *early_return)
{
  MC_OneLevel_struct *this_level; IS set2g; Mat inv_mat;
  int ierr;

  /* initial setup of the level */
  this_level = (MC_OneLevel_struct *)
    PetscMalloc(sizeof(MC_OneLevel_struct)); CHKPTRQ(this_level);

  ierr = InitOneLevel(this_level,lvl,mat,idx,
#if RUGE
		      isize,jsize,
#endif
		      solve_scheme,grid_choice,fill_method);
#if TRACING
  ierr = LambdaOneEst(mat,this_level); CHKERRQ(ierr);
#endif
  ierr = MatGetDiagonal(this_level->mat,this_level->diag); CHKERRQ(ierr);
  ierr = SetSmoothers(smoothers,this_level,pre,post,prefix); CHKERRQ(ierr);

  {
    Mat strong_mat; IS set1,set2; int global_r;

    ierr = MakeStrongMatrix(this_level,&strong_mat); CHKERRQ(ierr);

    /* split this level into F and C */
    ierr = SplitOffOneLevel
      (this_level,strong_mat,&set1,&set2,&set2g,&global_r); CHKERRQ(ierr);
    if (global_r==0 & lvl>0) 
      {*early_return = 1; return 0;} else *early_return = 0;

    /* analyse this level, get subblocks and all that */
    ierr = VecReciprocal(this_level->diag); CHKERRQ(ierr);
    ierr = SetupSubVectors(set1,set2,this_level); CHKERRQ(ierr);

    /* sub blocks */
    ierr = ExtractBlocks(set1,set2,this_level,strong_mat); CHKERRQ(ierr);
    ierr = ISDestroy(set1); CHKERRQ(ierr);
    ierr = ISDestroy(set2); CHKERRQ(ierr);
    ierr = MatDestroy(strong_mat); CHKERRQ(ierr);
  }

  {
    Mat next_mat; int more_levels;
    /* setup the next matrix */
    next_mat = this_level->g22;
    if (fill_method != AMLFillNone) {
      Mat fill_mat;
      ierr = ComputeFillMatrix
	(this_level,it11,&fill_mat,&inv_mat); CHKERRQ(ierr);
/*printf("fill matrix\n"); MatView(fill_mat,0);*/
      ierr = NextMatFill(next_mat,fill_mat,fill_method); CHKERRQ(ierr);
      ierr = MatDestroy(fill_mat); CHKERRQ(ierr);
    }
/*printf("explicit inverse matrix\n"); MatView(inv_mat,0);*/
    ierr = Setup11Solve(local_pctype,this_level,it11,inv_mat); CHKERRQ(ierr);

    /* if there is enough matrix left, go to the next level */
    ierr = ThisLevelContinueDecide(this_level,&more_levels,cutoff);
    CHKERRQ(ierr);
    if (!more_levels) {
      this_level->next_level = 0;
      ierr = SetupLastLevel(this_level,next_mat,last_solve); CHKERRQ(ierr);
    } else {
      int early_ret;
      ierr = SetupOneLevel
	(next_mat,set2g,local_pctype,lvl+1,
#if RUGE
	 this_level->isize,this_level->jsize,
#endif
	 prefix,
	 fill_method,solve_scheme,grid_choice,smoothers,
	 pre,post,last_solve,cutoff,it11,
	 &(this_level->next_level),&early_ret); CHKERRQ(ierr);
      if (early_ret) {
	this_level->next_level = 0;
	ierr = SetupLastLevel(this_level,next_mat,last_solve);
	CHKERRQ(ierr);
      }
    }
  }
  *return_level = this_level;
  return 0;
}

#undef __FUNC__
#define __FUNC__ "PCSetup_MLevel"
int PCSetup_MLevel(PC pc)
{
  Mat base_mat = pc->mat;
  PC local_pc; PCType local_pctype;
  PC_MCol_struct *pc_data = (PC_MCol_struct *) pc->data;
  IS idx_set;
  char *prefix;
  int idum,ierr;

  {
    int rstart,rend,loc;
    ierr = MatGetOwnershipRange(base_mat,&rstart,&rend); CHKERRQ(ierr);
    loc = rend-rstart;
    ierr = ISCreateStride(base_mat->comm,loc,rstart,1,&idx_set);
    CHKERRQ(ierr);
  }
  ierr = PCParallelGetLocalPC(pc,&local_pc); CHKERRQ(ierr);
  ierr = PCGetType(local_pc,&local_pctype,PETSC_NULL); CHKERRQ(ierr);
  ierr = PCGetOptionsPrefix(pc,&prefix); CHKERRQ(ierr);

  ierr = SetupOneLevel
    (base_mat,idx_set,local_pctype,0,
#if RUGE
     0,0,
#endif
     prefix,
     pc_data->fill_method,pc_data->solve_scheme,pc_data->grid_choice,
     pc_data->smoother_choice,
     pc_data->pre_smoother,pc_data->post_smoother,pc_data->last_solve,
     pc_data->cutoff,pc_data->it11,
     &(pc_data->level_stuff),&idum); CHKERRQ(ierr);

  return 0;
}

/****************************************************************
 * Solve                                                        *
 ****************************************************************/
    static int SolveMultiLevel(MC_OneLevel_struct*this_level,Vec x,Vec y);

/* Transfer 1-component to 2-component, and solve there. */
#undef __FUNC__
#define __FUNC__ "SolveNextLevels"
static int SolveNextLevels
(MC_OneLevel_struct *this_level,Vec x,Vec y,Vec x_1,Vec y_1)
{
  Scalar mone=-1.0;
  int ierr;

  /* transfer to next level by (2,1) multiplication */
  ierr = MatMult(this_level->g21,x_1,this_level->g2); CHKERRQ(ierr);
  /* transfer to next level */
  ierr = VecScatterBegin(x,this_level->v2, INSERT_VALUES,SCATTER_FORWARD/*ALL*/,
			 this_level->get_rest); CHKERRQ(ierr);
  ierr = VecScatterEnd(x,this_level->v2, INSERT_VALUES,SCATTER_FORWARD/*ALL*/,
		       this_level->get_rest); CHKERRQ(ierr);
  ierr = VecAXPY(&mone,this_level->u2,this_level->v2); CHKERRQ(ierr);

  /* solve next level */
  if (this_level->next_level) {
    ierr = SolveMultiLevel
      (this_level->next_level,this_level->h2,this_level->g2);
    CHKERRQ(ierr);
  } else if (this_level->a22_solve) {
    int its;
    ierr = SLESSolve(this_level->a22_solve,
		     this_level->h2,this_level->g2,&its); CHKERRQ(ierr);
/*printf("input on deepest level \n");VecView(this_level->h2,0);
printf("output on deepest level\n");VecView(this_level->g2,0);*/
  } else SETERRQ(1,0,"Empty deepest level should have been caught");

  /* transfer from next level */
  ierr = VecScatterBegin(this_level->u2,y, INSERT_VALUES,SCATTER_FORWARD/*ALL*/,
			 this_level->put_rest); CHKERRQ(ierr);
  ierr = VecScatterEnd(this_level->u2,y, INSERT_VALUES,SCATTER_FORWARD/*ALL*/,
		       this_level->put_rest); CHKERRQ(ierr);
  /* transfer to previous level by multiplication */
  ierr = MatMult(this_level->g12,this_level->g2,y_1); CHKERRQ(ierr);

  return 0;
}

/* solve (1,1) block */
#undef __FUNC__
#define __FUNC__ "Solve1BlockFor"
static int Solve1BlockFor(MC_OneLevel_struct *this_level,Vec x,Vec y)
{
  int its,ierr;

  if (this_level->grid_choice == AMLCoarseGridDependent) {
    ierr = VecPointwiseMult(x,this_level->cdiag1,y); CHKERRQ(ierr);
  } else {
    ierr = SLESSolve(this_level->a11_solve,x,y,&its); CHKERRQ(ierr);
  }
  return 0;
}

#undef __FUNC__
#define __FUNC__ "Solve1BlockBack"
static int Solve1BlockBack(MC_OneLevel_struct *this_level,Vec x,Vec y)
{
  int its,ierr;

  if (this_level->grid_choice == AMLCoarseGridDependent) {
    ierr = VecPointwiseMult(x,this_level->cdiag1,y); CHKERRQ(ierr);
  } else {
    ierr = SLESSolve(this_level->a11_solve,x,y,&its); CHKERRQ(ierr);
  }
  return 0;
}

#undef __FUNC__
#define __FUNC__ "SolveThisLevel"
static int SolveThisLevel(MC_OneLevel_struct *this_level,Vec x,Vec y)
{
  int ierr; Scalar mone = -1.0;
  InsertMode UPDATE_VALUES;

  if (! ((int)(this_level->a22_solve) | (int)(this_level->next_level)))
    SETERRQ(1,0,"This cannot happen: end in 1-block");
  
  /* solve (1,1) block */
  ierr = VecScatterBegin(x,this_level->u1, INSERT_VALUES,SCATTER_FORWARD/*ALL*/,
			 this_level->get_clr); CHKERRQ(ierr);
  ierr = VecScatterEnd(x,this_level->u1, INSERT_VALUES,SCATTER_FORWARD/*ALL*/,
		       this_level->get_clr); CHKERRQ(ierr);
  ierr = Solve1BlockFor(this_level,this_level->g1,this_level->h1);
  CHKERRQ(ierr);
  if (this_level->solve_scheme==AMLSolveILU) {
    ierr = VecScatterBegin(this_level->v1,y, INSERT_VALUES,SCATTER_FORWARD/*ALL*/,
			   this_level->put_clr); CHKERRQ(ierr);
    ierr = VecScatterEnd(this_level->v1,y, INSERT_VALUES,SCATTER_FORWARD/*ALL*/,
			 this_level->put_clr); CHKERRQ(ierr);
  }

  /* solve next level(s) */
  ierr = SolveNextLevels(this_level,x,y,this_level->h1,this_level->g1);
  CHKERRQ(ierr);

  /* write back 1-component */
  ierr = Solve1BlockBack(this_level,this_level->g1,this_level->h1);
  CHKERRQ(ierr);
  ierr = VecScale(&mone,this_level->v1); CHKERRQ(ierr);
  if (this_level->solve_scheme==AMLSolveILU) {
    UPDATE_VALUES = ADD_VALUES;
  } else {
    UPDATE_VALUES = INSERT_VALUES;
  }
  ierr = VecScatterBegin(this_level->v1,y, UPDATE_VALUES,SCATTER_FORWARD/*ALL*/,
			 this_level->put_clr); CHKERRQ(ierr);
  ierr = VecScatterEnd(this_level->v1,y, UPDATE_VALUES,SCATTER_FORWARD/*ALL*/,
		       this_level->put_clr); CHKERRQ(ierr);
  return 0;
}

/* the main recursive solve routine */
#undef __FUNC__
#define __FUNC__ "SolveMultiLevel"
static int SolveMultiLevel(MC_OneLevel_struct *this_level,Vec x,Vec y)
{
  Scalar mone = -1.0, mfrac = -2.; /* VE !!! */
  int its,ierr;

  if ((int)this_level->pre_smooth) {
    if (!(this_level->post_smooth)) SETERRQ(1,0,"AMG solve needs post smooth");
/*printf("pre smooth input @ %d\n",this_level->level);VecView(x,0);*/
    ierr = SLESSolve(this_level->pre_smooth,x,y,&its); CHKERRQ(ierr);
/*printf("pre smooth output@ %d\n",this_level->level);VecView(y,0);*/
    
    ierr = MatMult(this_level->mat,y,this_level->u); CHKERRQ(ierr);
    ierr = VecAXPY(&mone,x,this_level->u); CHKERRQ(ierr);
    
    ierr = SolveThisLevel(this_level,this_level->u,this_level->v);
    CHKERRQ(ierr);
    
    ierr = VecAXPY(&mfrac,this_level->v,y); CHKERRQ(ierr);
    ierr = MatMult(this_level->mat,y,this_level->u); CHKERRQ(ierr);
    ierr = VecAXPY(&mone,x,this_level->u); CHKERRQ(ierr);
    
    ierr = SLESSolve
      (this_level->post_smooth,this_level->u,this_level->v,&its);
    CHKERRQ(ierr);
    
    ierr = VecAXPY(&mone,this_level->v,y); CHKERRQ(ierr);
  } else {
    ierr = SolveThisLevel(this_level,x,y); CHKERRQ(ierr);
  }
/*printf("input on level %d\n",this_level->level);VecView(x,0);
printf("output on level %d\n",this_level->level);VecView(y,0);*/
  return 0;
}

#undef __FUNC__
#define __FUNC__ "PCApply_MLevel"
static int PCApply_MLevel(PC pc,Vec x,Vec y)
{
  PC_MCol_struct *pc_data = (PC_MCol_struct *) pc->data;
  int ierr;

/*printf("input vector\n"); VecView(x,0);*/
  ierr = SolveMultiLevel(pc_data->level_stuff,x,y); CHKERRQ(ierr);
/*printf("output vector\n"); VecView(y,0);*/

  return 0;
}

#undef __FUNC__
#define __FUNC__ "PCDestroy_MLevel"
int PCDestroy_MLevel(PetscObject obj)
{
/*
  PC pc = (PC) obj;
  PC_MCol_struct *pc_data = (PC_MCol_struct *) pc->data;
*/
  return 0;

}

#undef __FUNC__
#define __FUNC__ "PCDuplicate_MLevel"
int PCDuplicate_MLevel(PC old,PC *new)
{
  *new = old;
  return 0;
}

#undef __FUNC__
#define __FUNC__ "PCCreate_MLevel"
int PCCreate_MLevel(PC pc)
{
  PC_MCol_struct *bij;
  int ierr;

  pc->apply     = PCApply_MLevel;
  pc->applyrich = 0;
  pc->destroy   = PCDestroy_MLevel;
  pc->setfrom   = 0;
  pc->printhelp = 0;
  pc->setup     = PCSetup_MLevel;
  pc->type      = PCMultiLevel;

  bij = (PC_MCol_struct *) PetscMalloc( sizeof(PC_MCol_struct) );
  CHKPTRQ(bij);
  bij->fill_method = AMLFillNone;
  bij->solve_scheme = AMLSolveILU;
  bij->grid_choice = AMLCoarseGridDependent;
  bij->smoother_choice = AMLSmoothNone;
  bij->cutoff = 10; bij->it11 = 1;

  /* create a subsolver for the 11 blocks, the pre/post smoother */
  ierr = PCParallelInstallSubSolve(pc->comm,&(bij->par_info.local_method));
  CHKERRQ(ierr);
  ierr = PCParallelInstallSubSolve(pc->comm,&(bij->pre_smoother));
  CHKERRQ(ierr);
  ierr = PCParallelInstallSubSolve(pc->comm,&(bij->post_smoother));
  CHKERRQ(ierr);
  ierr = PCParallelInstallSubSolve(pc->comm,&(bij->last_solve));
  CHKERRQ(ierr);

  pc->data      = (void *) bij;

  ierr = PCParallelInitCommStruct(pc); CHKERRQ(ierr);
  {
    PC local_pc;
    ierr = PCParallelGetLocalPC(pc,&local_pc); CHKERRQ(ierr);
    ierr = PCSetType(local_pc,PCJACOBI); CHKERRQ(ierr);
  }

  return 0;
}
