#include "BSprivate.h"

/*+ BSpar_symmlq - SYMMLQ; reverse communication

    Input Parameters:
.   num_cols - the length of the vector on this processor
.   rhs - the rhs on this processor
.   x - the solution vector on this processor
.   resid - the residual vector on this processor
.   z - the work vector on this processor
.   v - the work vector on this processor
.   w - the work vector on this processor
.   r2 - the work vector on this processor
.   beta thru conv - symmlq values that aren't of much interest
.   cur_step - the current cg step
.   cur_phase - the current cg phase
.   procinfo - the usual processor stuff

    Output Parameters:
.   many of the input parameters are changed, it is just the usual
    SYMMLQ algorithm

    Returns:
    The action of the calling program depends on the return value
    SYMM_MATVECXR: multiply A*x and put the results in resid
    SYMM_MATVECVZ: multiply A*v and put the results in z
    SYMM_MSOLVE1: multiply A(-1)*resid and put the results in z
    SYMM_MSOLVE2: multiply A(-1)*r2 and put the results in z
    SYMM_MSOLVEB: multiply A(-1)*rhs and put the results in z
    SYMM_ERROR: error during SYMMLQ
    SYMM_DONE: SYMMLQ is finished, convergence has occurred

    Notes:
    The first time that BSpar_bcg is called, cur_step and
    cur_phase should be 0 and conv should be FALSE.

 +*/
int BSpar_symmlq(num_cols,rhs,x,resid,z,v,w,r2,beta,alpha,obeta,gbar,dbar,rhs1,rhs2,
				bstep,snprod,beta1,tnorm,ynorm2,bestnm,cgpt,conv,
				cur_step,cur_phase,procinfo) 
int	num_cols;
FLOAT	*rhs;
FLOAT	*x;
FLOAT	*resid;
FLOAT	*z;
FLOAT	*v;
FLOAT	*w;
FLOAT	*r2;
FLOAT	*beta;
FLOAT	*alpha;
FLOAT	*obeta;
FLOAT	*gbar;
FLOAT	*dbar;
FLOAT	*rhs1;
FLOAT	*rhs2;
FLOAT	*bstep;
FLOAT	*snprod;
FLOAT	*beta1;
FLOAT	*tnorm;
FLOAT	*ynorm2;
FLOAT	*bestnm;
int	*cgpt;
int	conv;
int	*cur_step;
int	*cur_phase;
BSprocinfo *procinfo;
{
	int	i;
	FLOAT	t1, t2;
	FLOAT	s;
	FLOAT	gamma, cs, sn, delta, epsln;
	FLOAT	ynorm, anorm, epsa, gpert, diag;
	FLOAT	elqnrm, qrnorm, cgnorm;
	FLOAT	zbar;
	FLOAT	eps;
	FLOAT	DLAMCH();
	char EPS = 'E';

	eps = DLAMCH(&EPS);
	switch((*cur_phase)) {
		case 0: {
			/* compute A*x=r */
			(*cur_phase) = 1;
			(*cur_step) = 0;
			return(SYMM_MATVECXR);
		}
		case 1: {
			/* compute r and then z = M(-1)*r */
			for (i=0;i<num_cols;i++) {
				resid[i] = rhs[i] - resid[i];
			}
			(*cur_phase) = 2;
			return(SYMM_MSOLVE1);
		}
		case 2: {
			/* compute beta1 and then z = A*v */
			(*beta) = BSpar_ip(num_cols,z,resid,procinfo); CHKERR(0);
			if ((*beta) < 0.0) return(SYMM_ERROR);
			(*beta) = sqrt((*beta));
			(*beta1) = (*beta);
			s = 1.0/(*beta);
			for (i=0;i<num_cols;i++) {
				v[i] = s*z[i];
			}
			(*cur_phase) = 3;
			return(SYMM_MATVECVZ);
		}
		case 3: {
			/* compute alpha, z, r2, and then z = M(-1)*r2 */
			(*alpha) = BSpar_ip(num_cols,v,z,procinfo); CHKERR(0);
			s = -(*alpha)/(*beta);
			for (i=0;i<num_cols;i++) {
				z[i] += s*resid[i];
			}
			t1 = BSpar_ip(num_cols,v,z,procinfo); CHKERR(0);
			t2 = BSpar_ip(num_cols,v,v,procinfo); CHKERR(0);
			s = -t1/t2;
			for (i=0;i<num_cols;i++) {
				z[i] += s*v[i];
			}
			for (i=0;i<num_cols;i++) {
				r2[i] = z[i];
			}
			(*cur_phase) = 4;
			return(SYMM_MSOLVE2);
		}
		case 4: {
			/* compute beta */
			(*obeta) = (*beta);
			(*beta) = BSpar_ip(num_cols,z,r2,procinfo); CHKERR(0);
			if ((*beta) < 0.0) return(SYMM_ERROR);
			(*beta) = sqrt((*beta));

			/* initialize accumulators */
			(*gbar) = (*alpha);
			(*dbar) = (*beta);
			(*bstep) = 0.0;
			(*snprod) = 1.0;
			(*rhs1) = (*beta1);
			(*rhs2) = 0.0;
			(*tnorm) = 0.0;
			(*ynorm2) = 0.0;
			for (i=0;i<num_cols;i++) {
				w[i] = 0.0;
			}

			/* estimate norms */
			(*tnorm) += (*alpha)*(*alpha) + 2.0*(*beta)*(*beta);
			anorm = sqrt(*tnorm);
			ynorm = sqrt(*ynorm2);
			epsa = anorm*eps;
			gpert = (*gbar) + epsa;
			if ((*gbar) < 0.0) gpert = (*gbar) - epsa;
			diag = fabs(gpert);
			elqnrm = sqrt((*rhs1)*(*rhs1)+(*rhs2)*(*rhs2));
			qrnorm = (*snprod)*(*beta1);
			cgnorm = qrnorm * (*beta)/diag;
			if (cgnorm < elqnrm) {
				(*cgpt) = TRUE;
				(*bestnm) = cgnorm;
			} else {
				(*cgpt) = FALSE;
				(*bestnm) = elqnrm;
			}

			/* compute v and then z = A*v */
			s = 1.0/(*beta);
			for (i=0;i<num_cols;i++) {
				v[i] = s*z[i];
			}
			(*cur_phase) = 6;
			return(SYMM_MATVECVZ);
		}
		case 5: {
			/* compute beta */
			(*obeta) = (*beta);
			(*beta) = BSpar_ip(num_cols,z,r2,procinfo); CHKERR(0);
			if ((*beta) < 0.0) return(SYMM_ERROR);
			(*beta) = sqrt((*beta));

			/* get x */
			gamma = sqrt((*gbar)*(*gbar)+(*obeta)*(*obeta));
			cs = (*gbar)/gamma;
			sn = (*obeta)/gamma;
			delta = cs*(*dbar) + sn*(*alpha);
			(*gbar) = sn*(*dbar) - cs*(*alpha);
			epsln = sn*(*beta);
			(*dbar) = -cs*(*beta);
			s = (*rhs1)/gamma;
			t1 = s*cs;
			t2 = s*sn;
			for (i=0;i<num_cols;i++) {
				x[i] = (w[i]*t1 + v[i]*t2) + x[i];
				w[i] = w[i]*sn - v[i]*cs;
			}
			(*rhs1) = (*rhs2) - delta*s;
			(*rhs2) = -epsln*s;
			(*bstep) = (*snprod)*cs*s + (*bstep);
			(*snprod) = (*snprod)*sn;
			(*ynorm2) += s*s;

			/* estimate norms */
			(*tnorm) += (*alpha)*(*alpha) + 2.0*(*beta)*(*beta);
			anorm = sqrt(*tnorm);
			ynorm = sqrt(*ynorm2);
			epsa = anorm*eps;
			gpert = (*gbar) + epsa;
			if ((*gbar) < 0.0) gpert = (*gbar) - epsa;
			diag = fabs(gpert);
			elqnrm = sqrt((*rhs1)*(*rhs1)+(*rhs2)*(*rhs2));
			qrnorm = (*snprod)*(*beta1);
			cgnorm = qrnorm * (*beta)/diag;
			if (cgnorm < elqnrm) {
				(*cgpt) = TRUE;
				(*bestnm) = cgnorm;
			} else {
				(*cgpt) = FALSE;
				(*bestnm) = elqnrm;
			}

			/* compute v and then z = A*v */
			s = 1.0/(*beta);
			for (i=0;i<num_cols;i++) {
				v[i] = s*z[i];
			}
			(*cur_phase) = 6;
			(*cur_step)++;
			return(SYMM_MATVECVZ);
		}
		case 6: {
			if (conv) {
				/* do next to last finishing step and then z = A*rhs */
				if (cgpt) {
					zbar = (*rhs1)/(*gbar);
					(*bstep) = (*snprod)*zbar + (*bstep);
					ynorm = sqrt((*ynorm2) + zbar*zbar);
					for (i=0;i<num_cols;i++) {
						x[i] += zbar*w[i];
					}
				}
				(*bstep) = (*bstep) / (*beta1);
				for (i=0;i<num_cols;i++) {
					z[i] = rhs[i];
				}
				(*cur_phase) = 7;
				return(SYMM_MSOLVEB);
			} else {
				/* orthogonalize z and then z = A*r2 */
				s = -(*beta)/(*obeta);
				for (i=0;i<num_cols;i++) {
					z[i] += s*resid[i];
				}
				(*alpha) = BSpar_ip(num_cols,v,z,procinfo); CHKERR(0);
				s = -(*alpha)/(*beta);
				for (i=0;i<num_cols;i++) {
					z[i] += s*r2[i];
				}
				for (i=0;i<num_cols;i++) {
					resid[i] = r2[i];
				}
				for (i=0;i<num_cols;i++) {
					r2[i] = z[i];
				}
				(*cur_phase) = 5;
				return(SYMM_MSOLVE2);
			}
		}
		case 7: {
				for (i=0;i<num_cols;i++) {
					x[i] += (*bstep)*z[i];
				}
				return(SYMM_DONE);
		}
	}
}
