CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC C C C The BLACS calls are modified/replaced to use MPI message-passing C C primitives. C C C C Purushotham Bangalore (puri@cs.msstate.edu) C C C CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC C SUBROUTINE PDUTSV( DIAG, N, NB, A, LDA, B, WORK, COMM ) INCLUDE "mpif.h" INTEGER N, NB, LDA CHARACTER*1 DIAG INTEGER COMM( 3 ) DOUBLE PRECISION A(LDA, *), B( * ), WORK( * ) * * Parallel double precision Upper Triangular Solve * * Purpose * ======= * * PDUTSV computes the solution of U x = b * * Arguments * ========= * * DIAG (input) character*1 * indicates whether on not unit diagonal * ('Unit' or 'Nonunit') * * n (input) integer * dimension of matrix u. * * nb (input) integer * block size * * a (input) double precision array, dimension (lda,n) * upper triangular portion of a contains the uppertriangular * matrix. * * lda (input) integer * the leading dimension of the array a. lda >= max(1,n). * * b (input/output) double precision array, dimension n * On entry, this node's contribution to the * right hand side of the linear system. * On exit, this node's contribution to the solution vector x. * * work double precision array, dimension MYNROW * array used to collect partial results * * info (output) integer * = 0: successful exit * < 0: if info = -k, the k-th argument had an illegal value * > 0: if info = k, u(k,k) is exactly zero, no computation is * preformed. * * ======================================================================= * * VERSION 2.0 DATED Aug. 1992 * R. VAN DE GEIJN * * This code resulted from a joint collaboration between * Robert van de Geijn * and the group for 'Research in Parallel Computational * Electromagnetics' at the Jet Propulsion Laboratory. Futher * information can be found in the paper, * * T. Cwik, R. van de Geijn, and J. Patterson, * "Application of Massively Parallel Computation to * Integral Equation Models of Electromagnetic Scattering * (Invited Paper)," J. Opt. Soc. Am. A, Vol. 11, No. 4, * April 1994. * * Please email your comments to the authors at * * rvdg@cs.utexas.edu (Robert van de Geijn) * cwik@yosemite.jpl.nasa.gov (Tom Cwik) * jep@yosemite.jpl.nasa.gov (Jean Patterson) * * Use of this code must properly reference that paper. * * The code is free, and comes with no implicit or explicit * guarantees. * * * All rights reserved * * TO BE DONE: plug in ring broadcast instead of SD/RC pairs * * .. parameters .. DOUBLE PRECISION ONE PARAMETER ( ONE = 1.0E+0 ) * .. * .. local scalars .. * * nprow row dimension of node grid * npcol column dimension of node grid * * myrow my row index * mycol my column index * * icurrow index of node row that holds current diagonal block * icurcol index of node column that holds current diagonal block * INTEGER NPROW, NPCOL, MYROW, MYCOL, ICURROW, ICURCOL * * i loop index * istart, ifin start and end of local part of subvector (row index) * jstart start of local part of current matrix block (column index) * isize size of local part of subvector * jb size of current diagonal block * mynrow number of rows assigned to this node * idummy dummy parameter * nbdim number of blocks * INTEGER I, ISTART, IFIN, JSTART, ISIZE, JB, MYNROW, $ IDUMMY, NBDIM, II, J * * neighbor indices * INTEGER INORTH, ISOUTH, JWEST, JEAST INTEGER ROWCOMM, COLCOMM * .. * .. intrinsic functions .. * INTRINSIC MOD, MIN * .. * .. * .. external procedures .. EXTERNAL DAXPY, DCOPY, PLAMCH2 $ DGEMV, DTRSV, IMYPART * .. * .. start of executable code .. * get machine parameters * CALL PLAMCH2( NPROW, NPCOL, MYROW, MYCOL, COMM ) ROWCOMM = COMM( 2 ) COLCOMM = COMM( 3 ) * * get neighbor indices * INORTH = MOD( MYROW-1+NPROW, NPROW ) ISOUTH = MOD( MYROW+1, NPROW ) JWEST = MOD( MYCOL-1+NPCOL, NPCOL ) JEAST = MOD( MYCOL+1, NPCOL ) * * initialize indices of node holds current diagonal block * NBDIM = N/NB IF (NBDIM*NB .NE. N) NBDIM = NBDIM+1 ICURROW = MOD( NBDIM-1, NPROW ) ICURCOL = MOD( NBDIM-1, NPCOL ) * * mynrow = number of rows assigned to this node * CALL IMYPART( 1, N, NB, I, MYNROW, MYROW, NPROW ) * * start of main loop * DO 100 I = (NBDIM-1)*NB+1, 1, -NB IF (ICURCOL .EQ. MYCOL) THEN IF (I .LT. (NBDIM-1)*NB+1) THEN * * compute where this node's part of the subvector starts * and ends * CALL IMYPART( MAX( 1, I-(NPCOL-2)*NB), MIN( I+NB-1, N ), $ NB, ISTART, IFIN, MYROW, NPROW ) ISIZE = IFIN-ISTART+1 * * receive update information and add to local information * CALL DGERV2D( ISIZE, 1, WORK, ISIZE, $ JEAST, ROWCOMM, JEAST ) * * add to local result * CALL DAXPY ( ISIZE, ONE, WORK, 1, B( ISTART ), 1 ) ENDIF JB = MIN( NB, N-I+1 ) IF (ICURROW .EQ. MYROW) THEN * * solve triangular system * CALL IMYPART( I, MIN( I+NB-1, N) , NB, ISTART, IFIN, $ MYROW, NPROW ) ISIZE = IFIN-ISTART+1 CALL IMYPART( I, I, NB, JSTART, IDUMMY, MYCOL, NPCOL ) CALL DTRSV ( 'UPPERTRIANGULAR', 'NOTRANSPOSE', DIAG, $ JB, A( ISTART, JSTART ), LDA, B( ISTART ), 1 ) * * pass result around ring * IF (NPROW .GT. 1 .AND. I-NB .GE. 1) THEN CALL DGESD2D( JB, 1, B( ISTART ), JB, $ INORTH, COLCOMM, MYROW ) ENDIF CALL DCOPY ( JB, B( ISTART ), 1, WORK, 1 ) ELSE IF (I-NB .GE. 1) THEN CALL DGERV2D( JB, 1, WORK, JB, $ ISOUTH, COLCOMM, ISOUTH ) IF (INORTH .NE. ICURROW) THEN CALL DGESD2D( JB, 1, WORK, JB, $ INORTH, COLCOMM, MYROW ) ENDIF ENDIF ENDIF IF (I-NB .GE. 1) THEN * * update subvector * CALL IMYPART( MAX( 1, I-(NPCOL-1)*NB ) , I-1, $ NB, ISTART, IFIN, MYROW, NPROW ) ISIZE = IFIN-ISTART+1 CALL IMYPART( I, I, NB, JSTART, IDUMMY, MYCOL, NPCOL ) CALL DGEMV ( 'NOTRANSPOSE', ISIZE, JB, -ONE, $ A( ISTART, JSTART ), LDA, WORK, 1, ONE, $ B( ISTART ), 1 ) * * pass subvector to left * CALL DGESD2D( ISIZE, 1, B(ISTART), ISIZE, $ JWEST, ROWCOMM, MYCOL ) * * update rest of local result * IF (ISTART-1 .GT. 0) $ CALL DGEMV ( 'NOTRANSPOSE', ISTART-1, JB, -ONE, $ A( 1, JSTART ), LDA, WORK, 1, ONE, $ B( 1 ), 1 ) ENDIF ENDIF ICURROW = MOD( ICURROW-1+NPROW, NPROW ) ICURCOL = MOD( ICURCOL-1+NPCOL, NPCOL ) 100 CONTINUE * * zero all parts of x except those responding to the * diagonal elements of a * ICURROW = 0 ICURCOL = 0 II = 1 DO 200 I=1, N, NB IF (ICURROW .EQ. MYROW) THEN IF (ICURCOL .NE. MYCOL) THEN DO 150 J=1, MIN(NB, N-I+1) B(II+J-1) = (0.0D00,0.0D00) 150 CONTINUE ENDIF II = II+NB ENDIF ICURROW = MOD( ICURROW+1, NPROW) ICURCOL = MOD( ICURCOL+1, NPCOL) 200 CONTINUE RETURN END .