#include <stdio.h>
/*---------------------------------------------------------------*/
/*     	CAPSS: A Cartesian Parallel Sparse Solver                */
/*     	Beta Release                                             */
/*      Author: Padma Raghavan                                   */
/*---------------------------------------------------------------*/
#include	"cnd_stats.h"

dist_fsolve (me, nprocs, 	start_proc, 
	fact_n,	 n, ncols, map, mycols, col, b,
	have_contrib, contrib_b, msg_type_offset)
int 		me,	nprocs,	start_proc,	fact_n,
		n, 	ncols, *map, *mycols , *have_contrib, msg_type_offset;
double 		**col, *b, *contrib_b ;
/*
 *  Column-oriented lower triangular forward solve.
 *  Uses fan-in communication without compute-ahead.
 */
{
	int  double_size, i, j, k, count, limit;
	
	double t, tmp ;
	extern	double stats[];

	double_size = sizeof(double);
	j = limit =0 ;
	for (k = 0 ; k <n ; k++) {
		if( have_contrib[k] != -1)  {
				t= contrib_b[have_contrib[k]];
				have_contrib[k] = -1;
		}
		else	t = 0 ;
		for (i = 0 ; i < limit ; i++)
			t -= b[i] * *(col[i]+k-mycols[i]) ;

		if(map[k] ==me) {
			for (count=0; count < nprocs -1; count++) {
				recv0(&tmp, (double_size),k+msg_type_offset);

				t+= tmp;
			}
			b[j] += t ;
			if (k < fact_n) {
				b[j] /= *col[j] ;
				limit++;
			}
			j++ ;
		}
		else  {
			send0(&t, double_size, k+msg_type_offset, map[k]);
			stats[so_d_c] +=1;
		}
	
	}	

}/*dist_fsolve*/

dist_mat_vec_and_reduce (me, nprocs, 	start_proc, 
	fact_n,	 n, ncols, map, mycols, col, b,
	have_contrib, contrib_b, msg_type_offset)

int 		me,	nprocs,	start_proc,	fact_n,
		n, 	ncols, *map, *mycols , *have_contrib, msg_type_offset;
double 		**col, *b, *contrib_b ;
/*
 *  Column-oriented lower triangular forward solve by mat vec with inverse
 *  Uses fan-in communication without compute-ahead.
 */
{
	int  m, inc, i, j, k, count, limit, my_pos, next_msg_type;
	
	double t, tmp, zero, *r , *q;
	extern	 double *colk, *colj;
	extern	double stats[];

	for (k =j= 0 ; k <n ; k++) {
		if( have_contrib[k] != -1)  {
				colk[k]= contrib_b[have_contrib[k]];
				have_contrib[k] = -1;
		}
		else	colk[k] = 0.0;
		if(map[k] == me) {
				colk[k] += b[j];
				j++;
		}
		if (k == (fact_n-1)) limit = j;
	}
	my_pos = me-start_proc;
	next_msg_type = msg_type_offset;

	p_subset_sum(me, my_pos, colj, colk, n, nprocs, start_proc,
			next_msg_type);
				/*gather b*/

	next_msg_type += 2*nprocs;
	for (j=0 ; j<ncols ; j++) {
		b[j] = colk[mycols[j]];
	}
	inc = 1;
	zero =0.0;
	m = fact_n;
	dfill(&m, &zero, colk, &inc) ;

	for (j=0 ; j<limit ; j++) {
		m = fact_n- mycols[j];
		r = colk+ mycols[j];
		q = col[j];
		t = b[j];
		daxpy1_(&m, &t, q, &inc, r);
	}
				/*local sparse mat vec */
	p_subset_sum(me, my_pos, colj, colk, fact_n, nprocs, 
			start_proc, next_msg_type);
	next_msg_type += 2*nprocs;
	for (j=0 ; j<limit ; j++) {
			b[j] = colk[mycols[j]];
	}
					/*gather new b till fact_n */
	m = n-fact_n;
	r = colk+fact_n;
	dfill(&m, &zero, r, &inc) ;
	for (j=limit ; j <ncols ; j++) {
			colk[mycols[j]] = b[j]; 
			
	}
		
					/*now reduce */
	m = n -fact_n;
	for (j=0 ; j<limit ; j++) {
		q = col[j] + fact_n - mycols[j];;
		t = - b[j];
		daxpy1_(&m, &t, q, &inc, r);
	}
					/*gather new b  fact_n -n */
	p_subset_sum(me, my_pos, colj+fact_n, r, m, nprocs, 
			start_proc, next_msg_type);
	for (j=limit ; j <ncols ; j++) {
			b[j] = colk[mycols[j]];
	}
        for (i=0; ((1<< i) < nprocs); )
                                 i++;
	stats[so_d_c] += 2*i *n;
	

}/*dist_mat_vec_and_reduce*/


dist_b_reduce(	me, 	map,	myrows,
		size,	start_col,	end_col,	l_struc_curr,
		l_struc_curr_size,	row,	later_b,	b)

int		me, 	map,  *myrows, size,	
		start_col,	end_col,	*l_struc_curr,
		l_struc_curr_size;
double		**row,	*later_b,	*b;
{


		int	i,j, len, inc;
		double	unknown, t;
		extern  double  ddot_();

		
		len = end_col-start_col +1;
		inc = 1;
		for (j=0; j <size; j++){
			t =  ddot_(&len, later_b+start_col, &inc,
					(row[j] +start_col-myrows[j]),
					&inc);
			b[j] -= t;
		}
			
}/*end*/
row_ubs ( me, nprocs, start_proc,
	n, nrows, map, myrows, row, b, stack_b, msg_type_offset )
int 	me, nprocs,	start_proc, n, nrows, *map, *myrows, msg_type_offset ;
double **row, *b , *stack_b;
/*
 *  Row-oriented upper triangular backward solve.
 *  Uses fan-out communication without send-ahead.
 */
{
	int double_size, i, j, k, count, next;
	double x ;
	extern	double stats[];

	j = nrows-1 ;
	double_size = sizeof(double);
	for (k = n-1 ; k >= 0 ; k--) {
		if (map[k] == me) {
			b[j] /= *row[j] ;
			x = b[j] ;
			j-- ;

			stats[so_d_c] += nprocs-1;
			for(count =0, next= (k>0)? map[k-1]: start_proc;
				count< nprocs-1 ; ) {
				if ( next != me) {
				send0(&x, double_size, k+msg_type_offset, next);
				count++;
				}
				next = ( next < (start_proc + nprocs -1)) ?
						(next+1): start_proc;
			}
		} else recv0(&x,double_size, k+msg_type_offset);
	
				
		if (k>0)
		for (i = j ; i >= 0 ; i--) b[i] -= x * *(row[i]+k-myrows[i]) ;
		stack_b[k] = x;
	}

}

dist_mat_vec ( me, nprocs, start_proc,
	n, nrows, map, myrows, row, b, stack_b, msg_type_offset )
int 	me, nprocs,	start_proc, n, nrows, *map, *myrows, msg_type_offset ;
double **row, *b , *stack_b;
/*
 *  Row-oriented upper triangular backward solve using mat vec muct with inverse
 */
{
	int  i, inc, m,  next_msg_type, my_pos;
	extern double *colk, *colj;
	double    *r, zero;
	extern	double stats[];
	extern double   ddot_();


	
	zero = 0.0;
	inc  =1 ;
	m = n;	
	my_pos = me -start_proc;
	next_msg_type = msg_type_offset;
	dfill(&m, &zero, colk, &inc);
	for (i=0; i <nrows; i++) {
			colk[myrows[i]] = b[i];
	}
        p_subset_sum(me, my_pos, colj, colk, m, nprocs,
                        start_proc, next_msg_type);

	next_msg_type += 2*nprocs;

	dfill(&m, &zero, stack_b, &inc);

	inc = 1;
	for (i=0; i <nrows; i++) {
		m= n - myrows[i];
		r = row[i] ;
		stack_b[myrows[i]] = b[i]= 
		ddot_(&m, r,&inc, colk+myrows[i], &inc); 
	}
        p_subset_sum(me, my_pos, colj, stack_b, n, nprocs,
                        start_proc, next_msg_type);

        for (i=0; ((1<< i) < nprocs); )
                                 i++;
        stats[so_d_c] += 2*i *n;
	check_spd(stack_b,n);


}/*end dist mat vec */

local_fsolve(size,	 fact_size, 	b, 	col)

int		size,	fact_size;
double		*b,	**col;

/*
 *		Forward Solve
 */

{

		int	j, k, inc, len;

		double t;

		inc =1;
		for ( j = 0; j < fact_size; j++) {
			b[j]  /= (*col[j]) ;
			len =size -j-1;
			t = -b[j];
			daxpy1_(&len, &t, col[j]+1,  &inc, b+j+1);
		}

}/*end local_fsolve*/

local_bsolve(size,	 fact_size, 	b, 	row)

int		size,	fact_size;
double		*b,	**row;

/*
 *		Backward Solve
*/

{

		int	j, k, inc, len;
		extern  double ddot_();

		inc = 1;
		for ( k = size-1; k>= 0; k--) {
			b[k]  /= (*row[k]) ;
			len = size -k;
			if( k >0)
			b[k-1] -= 
				ddot_(&len, b+k, &inc,
					row[k-1]+1, &inc);
		}
	check_spd(b,size);

}/*end local_bsolve*/
local_b_reduce(	size,	start_col,	end_col,	l_struc_curr,
		l_struc_curr_size,	row,	real_b,	b)

int		size,	start_col,	end_col,	*l_struc_curr,
		l_struc_curr_size;
double		**row,	*real_b,	*b;
{


		int	i,j, len, inc;
		double	unknown, t;
		extern  double  ddot_();

		
		len = end_col-start_col +1;
		inc = 1;
		for (j=0; j <size; j++){
			t =  ddot_(&len, real_b+start_col, &inc,
					(row[j] +start_col-j),
					&inc);
			b[j] -= t;
		}

}/*end*/
row_ubs_ring ( me, nprocs, start_proc,
	n, nrows, map, myrows, row, b, stack_b, msg_type_offset )
int 	me, nprocs,	start_proc, n, nrows, *map, *myrows, msg_type_offset ;
double  **row, *b , *stack_b;
/*
 *  Row-oriented upper triangular backward solve.
 *  Uses ring fan-out communication without send-ahead.
 */
{
	int double_size, i, j, k,  last_proc, proc;
	double  x ;
	extern	double stats[];
 
	last_proc = start_proc + nprocs -1;
	j = nrows-1 ;
	double_size = sizeof(double);
	for (k = n-1 ; k >= 0 ; k--) {
		if (map[k] == me) {
			b[j] /= *row[j] ;
			x = b[j] ;
			j-- ;

			
			stats[so_d_c] += 1;
			if( k>0)
				proc = map[k-1];
			else { if((me -1) <start_proc) proc =last_proc;
                        	else proc = me -1;
			}
			
			send0(&x, double_size, k+msg_type_offset, proc);
		} else {
			recv0(&x,double_size, k+msg_type_offset);
                        if((me -1) <start_proc) proc =last_proc;
                        else proc = me -1;
                        if((proc != map[k]) && (proc!= me))
                         send0(&x, (double_size),
                                        k+msg_type_offset, proc);
	
			stats[so_d_c] += 1;
			}
				
		if (k>0)
		for (i = j ; i >= 0 ; i--) b[i] -= x * *(row[i]+k-myrows[i]) ;
		stack_b[k] = x;
	}
}
