#include <stdio.h>
#include "petsc.h"
#include "vec.h"
#include "parpre.h"
#include "ksp.h"
#include "sys.h"
#include "pc.h"
#include "options.h"
#include "./examples.h"
#include <math.h>

#undef __FUNC__
#define __FUNC__ "cg"
int cg(MPI_Comm comm,Mat A, PC B,double *sol, double *rhs, int lsize,
       int *its)
{
  double *ax,*z,*r,*p,*q,*x;
  double zero=0.0,one=1.0; 
  double rho = one, rho_0 = one, rr = one, rr_0 = one,rho_old,pq,beta,alpha;
  int it,mytid,i;
  Vec r_vec = 0, z_vec = 0, p_vec = 0,q_vec = 0;
  int ierr;
#ifdef USE_PETSC_BOPT_g
  double t,tt;
#endif

  PetscFunctionBegin;
  MPI_Comm_rank(comm,&mytid);

  /* these are the arrays that you presumably created in your
   * application */
  ax = (double *) PetscMalloc(lsize*sizeof(double));
  z = (double *) PetscMalloc(lsize*sizeof(double));
  r = (double *) PetscMalloc(lsize*sizeof(double));
  p = (double *) PetscMalloc(lsize*sizeof(double));
  q = (double *) PetscMalloc(lsize*sizeof(double));
  x = (double *) PetscMalloc(lsize*sizeof(double));

  /* here is how you turn those arrays into something 
   * that ParPre understands */
  ierr = VecCreateMPIWithArray(comm,lsize,PETSC_DECIDE,r,&r_vec);
 CHKERRQ(ierr);
 ierr = VecCreateMPIWithArray(comm,lsize,PETSC_DECIDE,z,&z_vec);
 CHKERRQ(ierr);

  ierr = VecCreateMPIWithArray(comm,lsize,PETSC_DECIDE,p,&p_vec);
  CHKERRQ(ierr);
  ierr = VecCreateMPIWithArray(comm,lsize,PETSC_DECIDE,q,&q_vec);
  CHKERRQ(ierr);

  /* Init: r = b-A*x0, with x0=0 makes r=b */
  for (i=0; i<lsize; i++) {
    sol[i] = zero;
    r[i]=-rhs[i];
  }

#ifdef USE_PETSC_BOPT_g
  ierr = PetscGetTime(&t); CHKERRQ(ierr);
#endif
  /* Loop */
  for (it=1; (sqrt(rr/rr_0)>1.e-5) & (it<500); it++) {
    double l_rho,l_pq,l_rr;

    /* solve Mz = r */
    ierr = PCApply(B,r_vec,z_vec); CHKERRQ(ierr);

    /* rho = r^t z */
    rho_old = rho; l_rho = zero;
    for (i=0; i<lsize; i++) l_rho = l_rho + r[i]*z[i];
    l_rr = zero;
    for (i=0; i<lsize; i++) l_rr = l_rr + r[i]*r[i];
    MPI_Allreduce(&l_rho,&rho,1,MPI_DOUBLE,MPI_SUM,comm);
    MPI_Allreduce(&l_rr,&rr,1,MPI_DOUBLE,MPI_SUM,comm);
    PetscPrintf(comm,"[ iteration %d ] rMr = %e, |r| = %e\n",it,rho,sqrt(rr));
#ifdef USE_PETSC_BOPT_g
#endif    

    if (it==1) {
      /* p=z */
      rho_0 = rho; rr_0 = rr;
      for (i=0; i<lsize; i++) p[i] = z[i];
    } else {
      /* p=z+beta p */
      beta = rho/rho_old;
      for (i=0; i<lsize; i++) p[i] = z[i]+beta*p[i];
    }
    
    /* q = A*p */
    ierr = MatMult(A,p_vec,q_vec); CHKERRQ(ierr);

    /* alpha = rho / p^t q;*/
    l_pq = zero;
    for (i=0; i<lsize; i++) l_pq = l_pq + p[i]*q[i];
    MPI_Allreduce(&l_pq,&pq,1,MPI_DOUBLE,MPI_SUM,comm);
    alpha = rho / pq;
    /* x = x+alpha p */
    for (i=0; i<lsize; i++) x[i]=x[i]-alpha*p[i];

    /* r = r-alpha q */
    for (i=0; i<lsize; i++) r[i]=r[i]-alpha*q[i];
    
  }
  *its = it;
#ifdef USE_PETSC_BOPT_g
  ierr = PetscGetTime(&tt); CHKERRQ(ierr);
  t = tt-t;
  PetscPrintf(comm,"Total iteration time: %e\n",t);
#endif

  ierr = VecDestroy(r_vec); CHKERRQ(ierr);
  ierr = VecDestroy(z_vec); CHKERRQ(ierr);

  ierr = VecDestroy(p_vec); CHKERRQ(ierr);
  ierr = VecDestroy(q_vec); CHKERRQ(ierr);

  PetscFunctionReturn(0);
}
