#include "BSprivate.h"

/*@ BSscale_diag - Symmetrically scale the matrix by a diagonal matrix

    Input Parameters:
.   A - a sparse matrix
.   sc_diag - the diagonal matrix (stored as a vector) to scale A by
.   procinfo - the usual processor stuff

    Output Parameters:
.   A - a sparse matrix scaled by a diagonal matrix

    Returns:
    void

 @*/
void BSscale_diag(A,sc_diag,procinfo)
BSpar_mat *A;
FLOAT	*sc_diag;
BSprocinfo *procinfo;
{
	int	count, size;
	BSbb_d *diag_bb;
	int	*addrs;
	FLOAT *answs;
	int	cl_ind, in_ind;
	BSinode *inodes;
	BScl_2_inode *cliques;
	int	i, j;
	FLOAT	*p_diag;
	FLOAT	rdiag, cdiag;
	int	*inv_perm;
	int	loc_ind;
	FLOAT *matrix, *nz;
	int	*row_num;

	/* set scale_diag for A */
	A->scale_diag = sc_diag;

	/* find the request size */
	/* by counting the # of rows and subtracting the number of local rows */
	inodes = A->inodes->list;
	count = 0;
	for (in_ind=0;in_ind<A->inodes->length;in_ind++) {
		count+=inodes[in_ind].num_cols;
	}
	count -= A->num_rows;

	/* allocate and build query */
	MY_MALLOC(addrs,(int *),sizeof(int)*count,1);
	count = 0;
	cliques = A->clique2inode;
	for (cl_ind=0;cl_ind<cliques->num_cols;cl_ind++) {
		if (cliques->proc[cl_ind] != procinfo->my_id) {
			for (in_ind=cliques->inode_index[cl_ind];
				in_ind<cliques->inode_index[cl_ind+1];in_ind++) {
				for (i=0;i<inodes[in_ind].num_cols;i++) {
					addrs[count] = inodes[in_ind].o_gcol_num[i];
					count++;
				}
			}
		}
	}

	/* get an inversely permuted copy of the diagonal */
	MY_MALLOC(p_diag,(FLOAT *),sizeof(FLOAT)*A->num_rows,2);
	BSperm_dvec(sc_diag,p_diag,A->inv_perm); CHKERR(0);
	for (i=0;i<A->num_rows;i++) p_diag[i] = sqrt(p_diag[i]);

	/* set up bulletin board and do query */
	diag_bb = BSinit_bb_d(A->num_rows,A->map); CHKERR(0);
	BSpost_noaddr_bb_d(diag_bb,A->num_rows,p_diag); CHKERR(0);
	MY_MALLOC(answs,(FLOAT *),sizeof(FLOAT)*count,3);
	BSquery_nomatch_bb_d(diag_bb,count,addrs,answs,procinfo); CHKERR(0);

	BSfree_bb_d(diag_bb); CHKERR(0);
	MY_FREE(addrs);

	/* now, do the scaling */
	inv_perm = A->inv_perm->perm;
	count = 0;
	for (cl_ind=0;cl_ind<cliques->num_cols;cl_ind++) {
		if (cliques->proc[cl_ind] == procinfo->my_id) {
			/* first, do dense block */
			loc_ind = cliques->d_mats[cl_ind].local_ind;
			size = cliques->d_mats[cl_ind].size;
			matrix = cliques->d_mats[cl_ind].matrix;
			for (i=0;i<size;i++) {
				cdiag = p_diag[inv_perm[loc_ind+i]];
				for (j=i;j<size;j++) {
					rdiag = p_diag[inv_perm[loc_ind+j]];
					matrix[(i*size)+j] /= (cdiag*rdiag);
				}
			}
			for (in_ind=cliques->inode_index[cl_ind];
				in_ind<cliques->inode_index[cl_ind+1];in_ind++) {
				row_num = inodes[in_ind].row_num;
				nz = inodes[in_ind].nz;
				size = inodes[in_ind].length;
				for (i=0;i<inodes[in_ind].num_cols;i++) {
					rdiag = p_diag[inv_perm[loc_ind]];
					loc_ind++;
					for (j=0;j<size;j++) {
						cdiag = p_diag[inv_perm[row_num[j]]];
						nz[(i*size)+j] /= (rdiag*cdiag);
					}
				}
			}
		} else {
			for (in_ind=cliques->inode_index[cl_ind];
				in_ind<cliques->inode_index[cl_ind+1];in_ind++) {
				row_num = inodes[in_ind].row_num;
				nz = inodes[in_ind].nz;
				size = inodes[in_ind].length;
				for (i=0;i<inodes[in_ind].num_cols;i++) {
					rdiag = answs[count];
					count++;
					for (j=0;j<size;j++) {
						cdiag = p_diag[inv_perm[row_num[j]]];
						nz[(i*size)+j] /= (rdiag*cdiag);
					}
				}
			}
		}
	}

	MY_FREE(answs);
	MY_FREE(p_diag);
}
