/*---------------------------------------------------------------*/
/*     	CAPSS: A Cartesian Parallel Sparse Solver                */
/*     	Beta Release                                             */
/*      Author: Padma Raghavan                                   */
/*---------------------------------------------------------------*/
#include	"n_fact.h"
#define		  MANY_RHS		0
n_fact(fname, str)
char	*fname, *str;
{
		extern	double clock0(), stats[];
		double	last_clock;
		extern int dist_factor_spd(), dist_factor_invert_spd(),
			dist_b_solve(), dist_f_solve(),
			dist_b_solve_using_inv(), dist_f_solve_using_inv();
		



		if (me ==0)
		printf("\n...doing numeric factor and solve\n"); 
		n_fact_initialize(	(max_b_size),
					(max_mat_size_in_d_phase),
					(max_stack_columns), 
					(done_factor_columns));

		sync0();


                rec_factor      (local_phase_root,
                                        tree_child,     tree_sibling,
                                        tree_parent,    tree_chains,
                                        chain_index,
                                        &done_factor_columns,
                                        index_list);
		if (P >1) {
		
			if( MANY_RHS) {
				d_factor_spd	
					(local_phase_root,
					dist_factor_invert_spd );
			} 
			else  {
				d_factor_spd	
					(local_phase_root,
					dist_factor_spd );
			}
		}

                n_solve_initialize(max_b_size,
                                     max_stack_columns,
					max_factor_columns);

                rec_solve       (local_phase_root,
                                        tree_child,     tree_sibling,
                                        tree_parent,    tree_chains,
                                        chain_index,
                                        &done_solution_elements,
                                        index_list);
		
		if (P >1) {
		
			if( MANY_RHS) {
				d_solve_spd    
					(local_phase_root,
					dist_f_solve_using_inv,
					dist_b_solve_using_inv );
			} 
			else  {
				d_solve_spd    
					(local_phase_root,
					dist_f_solve,
					dist_b_solve);
			}
		}


		last_clock =  clock0();
		l_b_solve (	(local_phase_root),	
				tree_child,
				tree_parent,
				tree_chains,
				tree_sibling,
				factor_index_list,
				chain_index, 
				index_list);
		stats[so_l_t] += clock0() - last_clock;
                stats[n_t] = stats[n_d_t] + stats[n_l_t];
                stats[n_d_o] /= Million;
                stats[n_l_o] /= Million;
                stats[n_o] = stats[n_d_o] + stats[n_l_o];
                stats[n_d_c] /=Thousand; 
                stats[n_c] = stats[n_d_c] + stats[n_l_c];
                stats[n_r] = stats[n_o]/stats[n_t];
                stats[n_d_r] = stats[n_d_o]/stats[n_d_t];
                stats[n_l_r] = stats[n_l_o]/stats[n_l_t];
                stats[so_t] = stats[so_d_t] + stats[so_l_t];
                stats[so_d_o] /=Million;
                stats[so_l_o] /=Million;
                stats[so_o] = stats[so_d_o] + stats[so_l_o];
                stats[so_d_c] /= Thousand;
                stats[so_c] = stats[so_d_c] + stats[so_l_c];
                stats[so_r] = stats[so_o]/stats[so_t];


		

		if (me ==0)
		printf("\n...printing statistics in %s.stats\n",fname); 
		gather_stats (fname,str);

		if (me ==0)
			printf("\n...writing solution to file %s.x\n",fname);
		print_b(fname,
			local_phase_root,
			tree_size,
			tree_chains,
			chain_index,
			tree_start_procs,
			b_tree_chains);
			

}

l_b_solve (
		current,
		tree_child,
		tree_parent,
		tree_chains,
		tree_sibling,
		factor_index_list,
		chain_index,
		index_list)


int             current,
		*tree_child,
		*tree_parent,
		*tree_chains,
		*tree_sibling,
		*factor_index_list,
		*chain_index,
		*index_list;
{
		int	child, i,j; 
		double	n,t;
		extern	double	stats[];


		set_up_b_in_stack (current, tree_parent[current],
				index_list);


		local_b_reduce(
				(chain_index[current+1] - chain_index[current]),
				(chain_index[current+1] - chain_index[current]),
				factor_struc_sizes[current] -1,
				factor_struc[current],
				factor_struc_sizes[current],
				(factor_nonz + factor_index_list[current]),
				(stack_b+ index_list[current]),
				(factor_b+ factor_index_list[current]));

				
		local_bsolve (
				(chain_index[current+1] - chain_index[current]),
				(chain_index[current+1] - chain_index[current]),
				(factor_b+ factor_index_list[current]),
				(factor_nonz + factor_index_list[current]));
		copy_to(
			(stack_b + index_list[current]), 
				(factor_b+factor_index_list[current]), 
				(chain_index[current+1] - 
					chain_index[current]));
		copy_to((b_tree_chains+chain_index[current]),
				(stack_b+index_list[current]),
				(chain_index[current+1] - 
					chain_index[current]));
				

		n = factor_struc_sizes[current];
		t = chain_index[current+1] - chain_index[current] ;
		stats[so_l_o]  += (double)
				(2*n*t - t*t);


		for (child = tree_child[current]; child != -1; 
					child =tree_sibling[child])
		{

				l_b_solve (	child,
						tree_child,
						tree_parent,
						tree_chains,
						tree_sibling,
						factor_index_list,
						chain_index,
						index_list);



		}
				/*free up */
		stack_ptr = index_list[current];
		
}/*end l_b_solve*/

check_ones()
{
extern int		N,  *a_struc, *a_index, *a_size;
extern	float		*a_nonz, *b;


			int i, j, k;

			print_vec(stdout,a_index,0,N,"a_index");
			print_vec(stdout,a_size,0,N,"a_size");
			print_vec(stdout,a_struc,0,N,"a_struc");
			for (i=0; i < N; i++){
				for (k=a_size[i]-1, j= a_index[i]+1,
					b[i] -= a_nonz[a_index[i]];
					k>0; k--, j++) {
					b[a_struc[j]] -= a_nonz[j];
					b[i] -= a_nonz[j];
				}
				
			}	
			for (i=0; i <N; i++)
				printf("%10.2f\n",b[i]);
}
print_small_mat()
{
extern	int     N, *a_struc, *a_size, *a_index;
extern float   *a_nonz, *b;
			float	 small_mat[50][50];

                        int i,j, k, next;
                        for (i=0; i <N; i++)
                                for(j=0; j <N; j++)
                                        small_mat[i][j] = 0.0;
			for (i=0; i < N; i++){
				for (k=a_size[i]-1, j= a_index[i]+1,
					small_mat[i][i] = a_nonz[a_index[i]];
					k>0; k--, j++) {
					small_mat[i][a_struc[j]] = 
					small_mat[a_struc[j]][i] = 
					a_nonz[j];
				}
			}	
                       	 

                        for (i=0; i <N; i++){
                                for (j=0; j <N; j++)
                                printf("%5.1f",small_mat[i][j]);

                                printf("= %5.1f\n\n",b[i]);
                        }
}

n_fact_dump()
{

		extern	 int N, *a_struc, *a_index, *tree_chains,
			size_tree_chains, *tree_local_column;
		extern	float	*a_nonz,  *b;
		
		int		i, j, k, l, m;
		for (i=0; i <size_tree_chains; i++){
			j = tree_chains[i];
			k = tree_local_column[i];
			if (k!= -1) {
			for(l=a_index[k], m=a_size[k]; m>0; m--, l++) 	
				printf("a %5d %5d %6.2f\n",
					j,a_struc[l],a_nonz[l]);
			
			printf("b %5d %6.2f\n",
					j,b[k]);
			}
		}
		exit();
}
print_b(fname,local_phase_root,
	tree_size,
	tree_chains,
	chain_index,
	tree_start_procs,
	b_tree_chains)
char	*fname;
int	local_phase_root, tree_size, *tree_chains,
	*chain_index, 	*tree_start_procs;
double	*b_tree_chains;
{
		
		int i,j, k,l,m;
		char	x_file[80];

        	sprintf(x_file, "%s.x",fname);
		for (i=0; i <P; i++) {
			sync0(); 


			if (i== me) {
				if (me ==0) {
        				if ((fp=fopen(x_file,"w")) == NULL)
                        			exit_err("print_b",fopen_err);
				} else {
        				if ((fp=fopen(x_file,"a")) == NULL)
                        		exit_err("print_b",fopen_err);
				}

				for (j=0; j <=local_phase_root; j++){
					for(k=chain_index[j],
						l = chain_index[j+1];
						k<l; k++){
					fprintf(fp,"%8d%15.7lf\n",
						o_n_tree_chains[k],
						b_tree_chains[k]);
					}
				}
				for (m=0, j= tree_parent[local_phase_root]; 
					j != -1; j= tree_parent[j], m++){

				if (tree_start_procs [m] == me) {
					for(k=chain_index[j],
						l = chain_index[j+1];
						k<l; k++){
					fprintf(fp,"%8d%15.7f\n",
						o_n_tree_chains[k],
						b_tree_chains[k]);
					}
				}
				}
				fclose (fp);
			}	
		}/*for*/
		sync0();
}/*end print_b*/
