@PROBLEM sparse_iterative_solve
@INCLUDE <math.h>
@FUNCTION netsolve_iterative_solve
@DASHI $(MPI_DIR)/include
@DASHI $(NETSOLVE_ROOT)/src/SampleNumericalSoftware/SparseSolvers/sparse
@DASHI $(NETSOLVE_ROOT)/src/SampleNumericalSoftware/SparseSolvers/sparse/aux
@DASHI $(NETSOLVE_ROOT)/src/SampleNumericalSoftware/SparseSolvers/sparse/driver
@INCLUDE "mpi.h"
@INCLUDE "iterative.h"
#@INCLUDE "petsc/netsolve_petsc.h"
#@INCLUDE "petsc/petsc_auxs.h"
@INCLUDE "aztec/netsolve_aztec.h"
@INCLUDE "aztec/aztec_auxs.h"
@INCLUDE "itpack/netsolve_itpack.h"
@INCLUDE "itpack/itpack_auxs.h"
@INCLUDE "parallel_auxs.h"
@INCLUDE "matrix_auxs.h"
@INCLUDE "matrix_distr_auxs.h"
@LIB -lm
@LIB -L$(NETSOLVE_ROOT)/lib/$(NETSOLVE_ARCH)
@LIB -lnetsolve_aux
@LIB -lnetsolve_iterative_driver
@LIB -lnetsolve_iterative_auxs
@LIB -lnetsolve_aztec
@LIB -lnetsolve_itpack
@LIB -lnetsolve_petsc
@LIB -lnetsolve_tester
@LIB $(AZTEC_LIB_LINK)
@LIB $(NETSOLVE_ROOT)/lib/$(NETSOLVE_ARCH)/libitpack.a
@LIB -L$(PETSC_LIB_DIR)
@LIB -lpetscts
@LIB -lpetscsnes
@LIB -lpetscsles
@LIB -lpetscdm
@LIB -lpetscmat
@LIB -lpetscvec
@LIB -lpetsc
@LIB $(LAPACK_LIB_LINK)
@LIB $(BLAS_LIB_LINK)
@LIB -L$(MPI_DIR)/lib
@LIB -lmpich
@LIB -lm

@FUNCTION netsolve_iterative_solve
@LANGUAGE C
@MAJOR ROW
@PATH /PETSc-Aztec-ITPACK/
@COMPLEXITY 3,2
@CUSTOMIZED ITER_SOLVE
@DESCRIPTION 
netsolve iterative solve using petsc and aztec and itpack
@PARALLEL MPI

@INPUT 5
@OBJECT STRING package
PETSC or AZTEC or ITPACK
@OBJECT SPARSEMATRIX D sm
the sparse matrix
@OBJECT VECTOR D rhs_vector
the right-hand-side vector
@OBJECT SCALAR D rtol
error value
@OBJECT SCALAR I maxit
maximum number of iterations

@OUTPUT 2
@OBJECT VECTOR D sol_vector
solution vector
@OBJECT SCALAR I iterations
iterations converged

@CALLINGSEQUENCE
@ARG I0
@ARG mI1, nI1, mI2
@ARG fI1
@ARG I1
@ARG iI1
@ARG pI1
@ARG I2
@ARG I3
@ARG I4
@ARG O0
@ARG O1

@CODE
extern int netsolve_iterative_solve();
extern int aztec_params();
extern int petsc_params();
extern int itpack_params();

MPI_Comm comm ;
MPI_Comm basecom ;
int i,j, index;
int ierr, conv, *convs=NULL, flag;
int package;
int Argc = 1;
char **Args;
int local_size;
int first;
int mytid, ntids, rank, key, color;
int its, *iterations=NULL;
int total_size, last, nnzero, this_size;
int *ptr, *idx;
int maxit;
double rtol;
double *value_arr, *rhs, *sol;
double** result;
iterative_info_block info=0;
MPI_Status* status;

  Args = (char**)calloc(1,sizeof(char*));
  Args[0] = strdup("dummy");

  if(strcmp(@I0@,"ITPACK") == 0){
  /* Itpack should run on only 1 processor */

    basecom = MPI_COMM_WORLD;
    MPI_Comm_rank(basecom, &rank);
    key = rank+1;
    color = rank+1;

    MPI_Comm_split (basecom, color, key, &comm);   /* make the new com */
  }
  else{
    comm = MPI_COMM_WORLD; 
  }

  MPI_Comm_size(comm,&ntids);
  MPI_Comm_rank(comm,&mytid);

  if(strcmp(@I0@,"AZTEC") == 0){
    package = 1;
    ierr = aztec_allocate_info_block(comm,&info);
  }
  else if(strcmp(@I0@,"PETSC") == 0){
    package = 2;
    ierr = petsc_allocate_info_block(comm,&info);
  }
  else if(strcmp(@I0@,"ITPACK") == 0){
    package = 5;
    ierr = itpack_allocate_info_block(comm,&info);
  }
  else
    return NS_PROT_BAD_VALUES;

  ierr = iterative_set_params(info,*@I3@,*@I4@);

  if(strcmp(@I0@,"AZTEC") == 0){
  }
  else if(strcmp(@I0@,"PETSC") == 0){
    ierr = petsc_params(info,1.e-6,1.e+5,NULL);
  }
  else if(strcmp(@I0@,"ITPACK") == 0){
    itpack_params
          ((itpack_info_block)info,NetsolveChebychev,NetsolveSSOR);
  }

  if(@pI1@[0] == 1)
    ierr = crs_mat_tobase0(@I1@, @iI1@, @pI1@, *@mI1@);

  first = 0;
  local_size = (*@mI1@ > 1) ? *@mI1@ : 1;
  @O0@ = (double*)malloc(local_size*sizeof(double));
  @O1@ = (int*)malloc(sizeof(int));

  /* debug */
   /*printf("package: %d\\n", package);
   printf("local size: %d\\n", local_size);
  printf("values:\\n");
  for(i=0;i<*@fI1@;i++)
   printf("%lf   ",@I1@[i]);
  printf("\\n");
  printf("indices:\\n");
  for(i=0;i<*@fI1@;i++)
   printf("%d   ", @iI1@[i]);
  printf("\\n");
  printf("ptrs:\\n");
  for(i=0;i<*@mI1@+1;i++)
    printf("%d   ", @pI1@[i]);
  printf("\\n");
  printf("rhs:\\n");
  for(i=0;i<*@mI1@;i++)
    printf("%lf   ", @I2@[i]);
  printf("\\n");
    printf("rtol: %lf\\n", *@I3@);
    printf("maxit:%d\\n", *@I4@); */
  /* debug */

  
  if(mytid == 0){
    total_size = *@mI2@;
  }
  MPI_Bcast(&total_size,1,MPI_INT,0,comm);
  ierr = divide(comm,total_size,&first,&last);
  local_size = last - first;
  ierr = crs_nnzeros(@pI1@,total_size,0,&nnzero);
  ptr = (int *) malloc((local_size+1)*sizeof(int));
  idx = (int *) malloc((nnzero+1)*sizeof(int));
  value_arr = (double *) malloc((nnzero+1)*sizeof(double));
  rhs = (double *) malloc((local_size)*sizeof(double));
  sol = (double *) malloc((local_size)*sizeof(double));

  for(i = (@pI1@[first]-@pI1@[0]);i<(@pI1@[last]-@pI1@[0]);i++){
    value_arr[i-(@pI1@[first]-@pI1@[0])] = @I1@[i];
    idx[i-(@pI1@[first]-@pI1@[0])] = @iI1@[i];
  }

  for(i=first;i< last;i++){
    ptr[i-first] = @pI1@[i] - @pI1@[first];
    rhs[i-first] = @I2@[i];
  }
  ptr[last-first] = @pI1@[last] - @pI1@[first];

  /* debug */
    /*  printf("first: %d\\n", first);
      printf("local size: %d\\n", local_size);
      printf("values: \\n");
      for(i=0;i<local_size*total_size;i++)
        printf("%lf   ", value_arr[i]);
      printf("\\n");
      printf("indices:\\n");
      for(i=0;i<local_size*total_size;i++)
        printf("%d  ", idx[i]);
      printf("\\n");
      printf("ptrs:\\n");
      for(i=0;i<local_size+1;i++)
        printf("%d\\t", ptr[i]);
      printf("\\n");
      printf("rhs:\\n");
      for(i=0;i<local_size;i++){
          printf("%lf\\t", rhs[i]);
      }
      printf("\\n"); */


  ierr = netsolve_iterative_solve
        (package,Argc,Args,comm, first,local_size,0,
	 value_arr, idx, ptr,  rhs, sol, info);

  /* debug */
  /*fprintf(stderr,"sol:\\n");
  for(i=0;i<local_size;i++){
    fprintf(stderr,"%lf\\t", sol[i]);
  }
  printf("\\n"); */
  /* debug */

  ierr = iterative_get_return_params(info,&conv,&its);

  if(mytid == 0){
    @O0@ = (double*)malloc(total_size*sizeof(double));
    @O1@ = (int*)malloc(sizeof(int));
    result = (double**)malloc(sizeof(double*)*ntids);
    status = (MPI_Status*)malloc(sizeof(MPI_Status)*ntids);
    for(i=0;i<ntids;i++)
      result[i] = (double*)malloc(sizeof(double)*total_size);
    iterations = (int*)malloc(sizeof(int)*ntids);
    convs = (int*)malloc(sizeof(int)*ntids);
    *@O1@ = 0;
    *@mO0@ = total_size;

    /* recieve the results from all the other processes */
    for(i=1;i<ntids;i++)
      MPI_Recv(result[i], total_size, MPI_DOUBLE, i, 7, comm, status+i);

    for(index=0;index<local_size;index++){
      @O0@[index] = sol[index];
    }

    for(i=1;i<ntids;i++){
      MPI_Get_count(status+i, MPI_DOUBLE, &this_size);
      for(j=0;j<this_size;j++){
        @O0@[index] = result[i][j];
        index++;
      }
   }
  }
  else{
    MPI_Send(sol, local_size, MPI_DOUBLE, 0, 7, comm);
  }
  MPI_Gather(&its,1, MPI_INT, iterations, 1, MPI_INT, 0, comm);
  MPI_Gather(&conv,1, MPI_INT, convs, 1, MPI_INT, 0, comm);

  if(mytid == 0){
    flag = 0;
    for(i=0;i<ntids;i++){
      if(convs[i]) flag = 1;
    }
    if (flag) {
      *@O1@ = iterations[0];
      printf("Convergence in %d iterations\\n",*@O1@);
    } else {
      printf("No convergence in the specified number of iterations.\\n");
    }
  }

@END_CODE
