#include <stdio.h>
#include "petsc.h"
#include "vec.h"
#include "parpre.h"
#include "ksp.h"
#include "sys.h"
#include "pc.h"
#include "options.h"
#include "./examples.h"
#include <math.h>

#undef __FUNC__
#define __FUNC__ "cg"
int cg(MPI_Comm comm,Mat A, PC B,double *sol, double *rhs, int lsize)
{
  double *ax,*z,*r,*p,*q,*x;
  double zero=0.0,one=1.0; 
  double rho = one, rho_0 = one,l_rho,rho_old,pq,l_pq,beta,alpha;
  int it,mytid,i;
  Vec r_vec = 0, z_vec = 0, p_vec = 0,q_vec = 0;
  int ierr;
  double t;

  MPI_Comm_rank(comm,&mytid);

  /* these are the arrays that you presumably created in your
   * application */
  ax = (double *) PetscMalloc(lsize*sizeof(double));
  z = (double *) PetscMalloc(lsize*sizeof(double));
  r = (double *) PetscMalloc(lsize*sizeof(double));
  p = (double *) PetscMalloc(lsize*sizeof(double));
  q = (double *) PetscMalloc(lsize*sizeof(double));
  x = (double *) PetscMalloc(lsize*sizeof(double));

  /* here is how you turn those arrays into something 
   * that ParPre understands */
  ierr = VecCreateMPIWithArray(comm,lsize,PETSC_DECIDE,r,&r_vec);
 CHKERRQ(ierr);
 ierr = VecCreateMPIWithArray(comm,lsize,PETSC_DECIDE,z,&z_vec);
 CHKERRQ(ierr);

  ierr = VecCreateMPIWithArray(comm,lsize,PETSC_DECIDE,p,&p_vec);
  CHKERRQ(ierr);
  ierr = VecCreateMPIWithArray(comm,lsize,PETSC_DECIDE,q,&q_vec);
  CHKERRQ(ierr);

  /* Init: r = b-A*x0, with x0=0 makes r=b */
  for (i=0; i<lsize; i++) {
    sol[i] = zero;
    r[i]=rhs[i];
  }

  t = PetscGetTime();
  /* Loop */
  for (it=1; (rho/rho_0>.0000001) & (it<100); it++) {

    /* solve Mz = r */
    /*printf("About to apply\n"); VecView(r_vec,0);*/
    ierr = PCApply(B,r_vec,z_vec); CHKERRQ(ierr);
    /*printf("Done apply\n"); VecView(z_vec,0);*/

    /* rho = r^t z */
    rho_old = rho; l_rho = zero;
    for (i=0; i<lsize; i++) l_rho = l_rho + r[i]*z[i];
    MPI_Allreduce(&l_rho,&rho,1,MPI_DOUBLE,MPI_SUM,comm);
    if (!mytid) printf("[it=%d] rMr: %e\n",it,rho);

    if (it==1) {
      /* p=z */
      rho_0 = rho;
      for (i=0; i<lsize; i++) p[i] = z[i];
    } else {
      /* p=z+beta p */
      beta = rho/rho_old;
      /*if (!mytid) printf("beta: %e\n",beta);*/
      for (i=0; i<lsize; i++) p[i] = z[i]+beta*p[i];
    }

    /* q = A*p */
    /*printf("Mvp on\n"); VecView(p_vec,0);*/
    ierr = MatMult(A,p_vec,q_vec); CHKERRQ(ierr);
    /*printf("Mvp result\n"); VecView(q_vec,0);*/

    /* alpha = rho / p^t q;*/
    l_pq = zero;
    for (i=0; i<lsize; i++) l_pq = l_pq + p[i]*q[i];
    MPI_Allreduce(&l_pq,&pq,1,MPI_DOUBLE,MPI_SUM,comm);
    alpha = rho / pq;

    /* x = x+alpha p */
    for (i=0; i<lsize; i++) x[i]=x[i]+alpha*p[i];

    /* r = r-alpha q */
    for (i=0; i<lsize; i++) r[i]=r[i]-alpha*q[i];

  }
  t = PetscGetTime()-t;
  if (!mytid) printf("Total iteration time: %e\n",t);
      ierr = VecDestroy(r_vec); CHKERRQ(ierr);
      ierr = VecDestroy(z_vec); CHKERRQ(ierr);

      ierr = VecDestroy(p_vec); CHKERRQ(ierr);
      ierr = VecDestroy(q_vec); CHKERRQ(ierr);

  return 0;
}
