      SUBROUTINE PDSUMMA( TRANSA, TRANSB, M, N, K, MB, NB, KB, ALPHA, A,
     $                    LDA, B, LDB, BETA, C, LDC, IMROW, IMCOL, WORK,
     $                    IWORK )
*
*     .. Scalar Aguments ..
      CHARACTER*1        TRANSA, TRANSB
      INTEGER            IMROW, IMCOL, K, KB, LDA, LDB, LDC, M, MB, N,
     $                   NB
      DOUBLE PRECISION   ALPHA, BETA
*     ..
*     .. Array Arguments ..
      INTEGER            IWORK( * )
      DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), C( LDC, * ),
     $                   WORK( * )
*     ..
*
*  Purpose
*  =======
*
*  PDSUMMA performs one of the matrix-matrix operations
*
*     C := alpha*op( A )*op( B ) + beta*C,
*
*  where  op( X ) is one of
*
*     op( X ) = X   or   op( X ) = X',
*
*  where alpha and beta are scalars, and A, B and C are distributed 
*  matrices, with op( A ) an M by K distributed matrix, op( B ) a K by N
*  distributed matrix and C an M by N ditributed matrix.
*
*  Arguments
*  =========
*
*  TRANSA - (input) CHARACTER*1
*           TRANSA specifies the form of op( A ) to be used in the
*           matrix multiplication as follows:
*
*              TRANSA = 'N' or 'n',  op( A ) = A.
*
*              TRANSA = 'T' or 't',  op( A ) = A'.
*
*              TRANSA = 'C' or 'c',  op( A ) = A'.
*
*  TRANSB - (input) CHARACTER*1
*           TRANSB specifies the form of op( B ) to be used in the
*           matrix multiplication as follows:
*
*              TRANSB = 'N' or 'n',  op( B ) = B.
*
*              TRANSB = 'T' or 't',  op( B ) = B'.
*
*              TRANSB = 'C' or 'c',  op( B ) = B'.
*
*  M      - (input) INTEGER
*           On entry, M specifies the number of rows of the distributed 
*           matrix op( A ) and of the distributed matrix C.  M >= 0.
*
*  N      - (input) INTEGER
*           On entry, N specifies the number of columns of the distribu-
*           ted matrix op( B ) and the number of columns of the distri-
*           buted matrix C. N >= 0.
*
*  K      - (input) INTEGER
*           On entry, K specifies the number of columns of the distribu-
*           matrix op( A ) and the number of rows of the distributed 
*           matrix op( B ). K >= 0.
*
*  MB     - (input) INTEGER
*           On entry, MB specifies the row block size of the distributed
*           matrix op( A ) and of the distributed matrix C.  MB >= 1.
*
*  NB     - (input) INTEGER
*           On entry, NB specifies the column block size of the distribu-
*           matrix op( B ) and of the distributed matrix C.  NB >= 1.
*
*  KB     - (input) INTEGER
*           On entry, KB specifies the column block size of the distribu-
*           ted matrix op( A ) and the row block size of the distributed
*           matrix op( B ).  KB >= 1.
*
*  ALPHA  - (input) DOUBLE PRECISION
*           On entry, ALPHA specifies the scalar alpha.
*
*  A      - (input) DOUBLE PRECISION array of DIMENSION ( LDA, Lx ),
*           where Lx is Kq when TRANSA = 'N' or 'n', and is Mq
*           otherwise. Before entry with  TRANSA = 'N' or 'n', the
*           leading  Mp by Kq part of the array A must contain the local
*           pieces of the distributed matrix A, otherwise the leading
*           Kp by Mq part of the array A must contain the local pieces
*           of the distributed matrix A.  Mp, Kq, Kp and Mq are local
*           variables (see below description of local parameters).
*
*  LDA    - (input) INTEGER
*           On entry, LDA specifies the first dimension of A as declared
*           in the calling (sub) program. When  TRANSA = 'N' or 'n' then
*           LDA must be at least  max( 1, Mp ), otherwise LDA must be at
*           least  max( 1, Kp ).
*
*  B      - (input) DOUBLE PRECISION array of DIMENSION ( LDB, Lx ),
*           where Lx is Nq when TRANSB = 'N' or 'n', and is Kp
*           otherwise. Before entry with  TRANSB = 'N' or 'n', the
*           leading  Kp by Nq part of the array B must contain the local
*           pieces of the distributed matrix B, otherwise the leading
*           Np by Kq part of the array B must contain the local pieces
*           of the distributed matrix B.  Kp, Nq, Np and Kq are local
*           variables (see below description of local parameters).
*
*  LDB    - (input) INTEGER
*           On entry, LDB specifies the first dimension of B as declared
*           in the calling (sub) program. When  TRANSB = 'N' or 'n' then
*           LDB must be at least  max( 1, Kp ), otherwise LDB must be at
*           least  max( 1, Np ).
*
*  BETA   - (input) DOUBLE PRECISION
*           BETA specifies the scalar beta.  When BETA is supplied as
*           zero then C need not be set on input.
*
*  C      - (input/output) DOUBLE PRECISION array of DIMENSION (LDC, Nq)
*           Before entry, the leading  Mp by Nq  part of the array  C
*           must contain the distributed matrix  C,  except when  beta
*           is zero, in which case C need not be set on entry.
*           On exit, the array  C  is overwritten by the  Mp by Nq
*           matrix ( alpha*op( A )*op( B ) + beta*C ).
*
*  LDC    - (input) INTEGER
*           On entry, LDC specifies the first dimension of C as declared
*           in  the  calling  (sub)  program.   LDC  must  be  at  least
*           max( 1, Mp ).
*
*  IMROW  - (input) INTEGER
*           On entry, IMROW specifies a row of the process template,
*           which holds the first block of the matrices.
*           0 <= IMROW < NPROW.
*
*  IMCOL  - (input) INTEGER
*           On entry, IMCOL specifies a column of the process template,
*           which holds the first block of the matrices.
*           0 <= IMCOL < NPCOL.
*
*  WORK   - (workspace) DOUBLE PRECISION array of size >= LWORK.
*           See below description of local parameters.
*
*  IWORK  - (workspace) INTEGER array of size >= LIWORK.
*           See below description of local parameters.
*
*  Description of local parameters
*  ===============================
*
*  LCM   =  the lowest common multiple of P and Q
*  Mp = NUMROC( M, MB, MYROW, IMROW, NPROW )
*  Mq = NUMROC( M, MB, MYCOL, IMCOL, NPCOL )
*  Np = NUMROC( N, NB, MYROW, IMROW, NPROW )
*  Nq = NUMROC( N, NB, MYCOL, IMCOL, NPCOL )
*
*  (1) TRANSA = 'N', and TRANSB = 'N'
*      LIWORK = 0, WORK = KB x ( Mp + Nq ).
*
*  (2) TRANSA = 'T', and TRANSB = 'N'
*      LIWORK = 0, WORK = MB x ( Kp + Nq ). 
*
*  (3) TRANSA = 'N', and TRANSB = 'T'
*      LIWORK = 0, WORK = NB x ( Kq + Mp ).
*
*  (4) TRANSA = 'T', and TRANSB = 'T'
*      LIWORK = 3 x MAX(P, Q), 
*      LWORK  = Np x Mq + MAX( KB x ( Mq + Np ), 2 x Ceil( Ceil(N,NB),
*               LCM ) x NB x Ceil( Ceil(M,MB), LCM ) x MB ).
*
* ======================================================================
*
*     .. Parameters ..
      DOUBLE PRECISION   ONE,          ZERO
      PARAMETER          ( ONE  = 1.0D+0, ZERO = 0.0D+0 )
*     ..
*     .. Local Scalars ..
      LOGICAL            NOTA, NOTB
      INTEGER            ICTXT, ICURCOL, ICURROW, II, INFO, IPC, IPW,
     $                   IPW1, IPW2, JJ, KBB, KK, KP, KQ, LDW, MBB, MM,
     $                   MP, MQ, MYCOL, MYROW, NBB, NN, NP, NPROW,
     $                   NPCOL, NROWA, NROWB, NQ
*     ..
*     .. External Subroutines ..
      EXTERNAL           BLACS_GRIDINFO, DGEBS2D, DGEBR2D, DGEMM,
     $                   DGSUM2D, DLACPY, DLASET, PDTRANS, PXERBLA
*     ..
*     .. Intrinsic Functions ..
      INTRINSIC          MAX, MIN, MOD
*     ..
*     .. External Functions ..
      LOGICAL            LSAME
      INTEGER            NUMROC
      EXTERNAL           LSAME, NUMROC
*     ..
*     .. Common Blocks ..
      COMMON             /CONTEXT/     ICTXT
*     ..
*     .. Executable Statements ..
*
*     Get grid parameters
*
      CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL )
*
*     Set  NOTA  and  NOTB  as  true if  A  and  B  respectively are not
*     transposed and set  NROWA, and  NROWB  as the number of rows of  A
*     and the  number of  rows  of  B  respectively.
*
      NOTA  = LSAME( TRANSA, 'N' )
      NOTB  = LSAME( TRANSB, 'N' )
*
*     Test the input parameters.
*
      INFO = 0
      IF(      ( .NOT.NOTA                 ).AND.
     $         ( .NOT.LSAME( TRANSA, 'C' ) ).AND.
     $         ( .NOT.LSAME( TRANSA, 'T' ) )      ) THEN
         INFO = 1
      ELSE IF( ( .NOT.NOTB                 ).AND.
     $         ( .NOT.LSAME( TRANSB, 'C' ) ).AND.
     $         ( .NOT.LSAME( TRANSB, 'T' ) )      ) THEN
         INFO = 2
      ELSE IF( M  .LT. 0                          ) THEN
         INFO = 3
      ELSE IF( N  .LT. 0                          ) THEN
         INFO = 4
      ELSE IF( K  .LT. 0                          ) THEN
         INFO = 5
      ELSE IF( MB .LT. 1                          ) THEN
         INFO = 6
      ELSE IF( NB .LT. 1                          ) THEN
         INFO = 7
      ELSE IF( KB .LT. 1                          ) THEN
         INFO = 8
      ELSE IF( IMROW.LT.0 .OR. IMROW.GE.NPROW     ) THEN
         INFO = 17
      ELSE IF( IMCOL.LT.0 .OR. IMCOL.GE.NPCOL     ) THEN
         INFO = 18
      END IF
*
      IF( INFO.EQ.0 ) THEN
         MP = NUMROC( M, MB, MYROW, IMROW, NPROW )
         KP = NUMROC( K, KB, MYROW, IMROW, NPROW )
         NP = NUMROC( N, NB, MYROW, IMROW, NPROW )
         IF( NOTA )THEN
            NROWA = MP
         ELSE
            NROWA = KP
         END IF
         IF( NOTB )THEN
            NROWB = KP
         ELSE
            NROWB = NP
         END IF
*
         IF( LDA.LT.MAX( 1, NROWA ) ) THEN
            INFO = 11
         ELSE IF( LDB.LT.MAX( 1, NROWB ) ) THEN
            INFO = 13
         ELSE IF( LDC.LT.MAX( 1, MP    ) ) THEN
            INFO = 16
         END IF
      END IF
*
      IF( INFO.NE.0 ) THEN
         CALL PXERBLA( ICTXT, 'PDMMA', INFO )
         RETURN
      END IF
*
*     Quick return if possible.
*
      IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR.
     $    ( ( ( ALPHA.EQ.ZERO ).OR.( K.EQ.0 ) ).AND.( BETA.EQ.ONE ) ) )
     $   RETURN
*
*     And if  alpha.eq.zero.
*
      NQ = NUMROC( N, NB, MYCOL, IMCOL, NPCOL )
      IF( ALPHA.EQ.ZERO ) THEN
         IF( BETA.EQ.ZERO ) THEN
            CALL DLASET( 'Full', MP, NQ, ZERO, ZERO, C, LDC )
         ELSE
            DO 20, JJ = 1, NQ
               DO 10, II = 1, MP
                  C( II, JJ ) = BETA*C( II, JJ )
   10          CONTINUE
   20       CONTINUE
         END IF
         RETURN
      END IF
*
      II = 1
      JJ = 1
      ICURROW = IMROW
      ICURCOL = IMCOL
*
      IF( NOTB ) THEN
*
         IF( NOTA ) THEN
*
*           C := beta*C
*
            IF( BETA.EQ.ZERO ) THEN
               CALL DLASET( 'Full', MP, NQ, ZERO, ZERO, C, LDC )
            ELSE IF( BETA.NE.ONE ) THEN
               DO 40 NN = 1, NQ
                  DO 30 MM = 1, MP
                     C( MM, NN ) = BETA * C( MM, NN )
   30             CONTINUE
   40          CONTINUE
            END IF
*
            IF( ALPHA.EQ.ZERO )
     $         RETURN
*
*           C := alpha*A*B + C
*
            IPW = KB * NQ + 1
            LDW = MAX( 1, MP )
*
            DO 50 KK = 1, K, KB
               KBB = MIN( K-KK+1, KB )
*
               IF( MYROW.EQ.ICURROW ) THEN
                  CALL DLACPY( 'Full', KBB, NQ, B( II, 1 ), LDB, WORK,
     $                         KBB )
                  CALL DGEBS2D( ICTXT, 'Columnwise', 'I-ring', KBB, NQ,
     $                          WORK, KBB )
               ELSE
                  CALL DGEBR2D( ICTXT, 'Columnwise', 'I-ring', KBB, NQ,
     $                          WORK, KBB, ICURROW, MYCOL )
               END IF
*
               IF( MYCOL.EQ.ICURCOL ) THEN
                  CALL DLACPY( 'Full', MP, KBB, A( 1, JJ ), LDA,
     $                         WORK( IPW ), LDW )
                  CALL DGEBS2D( ICTXT, 'Rowwise', 'I-ring', MP, KBB,
     $                          WORK( IPW ), LDW )
               ELSE
                  CALL DGEBR2D( ICTXT, 'Rowwise', 'I-ring', MP, KBB,
     $                          WORK( IPW ), LDW, MYROW, ICURCOL )
               END IF
*
               CALL DGEMM( TRANSA, TRANSB, MP, NQ, KBB, ALPHA,
     $                     WORK( IPW ), LDW, WORK, KBB, ONE, C, LDC )
*
               IF( MYROW.EQ.ICURROW )
     $            II = II + KBB
               IF( MYCOL.EQ.ICURCOL )
     $            JJ = JJ + KBB
*
               ICURROW = MOD( ICURROW+1, NPROW )
               ICURCOL = MOD( ICURCOL+1, NPCOL )
*
   50       CONTINUE
*
         ELSE
*
*           C := alpha*A'*B + beta*C
*
            IPW = MB * KP + 1
            LDW = MAX( 1, KP )
*
            DO 60 MM = 1, M, MB
               MBB = MIN( M-MM+1, MB )
*
               IF( MYCOL.EQ.ICURCOL ) THEN
                  CALL DLACPY( 'Full', KP, MBB, A( 1, JJ ), LDA, WORK,
     $                         LDW )
                  CALL DGEBS2D( ICTXT, 'Rowwise', 'I-ring', KP, MBB,
     $                          WORK, LDW )
               ELSE
                  CALL DGEBR2D( ICTXT, 'Rowwise', 'I-ring', KP, MBB,
     $                          WORK, LDW, MYROW, ICURCOL )
               END IF
*
               IF( MYROW.EQ.ICURROW ) THEN
                  CALL DGEMM( TRANSA, TRANSB, MBB, NQ, KP, ALPHA, WORK,
     $                        LDW, B, LDB, BETA, C( II, 1 ), LDC )
c
c                 When new BLACS released, uncomment the next 2 lines
c                 and remove the other call to DGSUM2D.
c
c                 CALL DGSUM2D( ICTXT, 'Columnwise', 'I-ring', MBB, NQ,
c    $                          C( II, 1 ), LDC, MYCOL, ICURROW )
                  CALL DGSUM2D( ICTXT, 'Columnwise', ' ', MBB, NQ,
     $                          C( II, 1 ), LDC, ICURROW, MYCOL )
               ELSE
                  CALL DGEMM( TRANSA, TRANSB, MBB, NQ, KP, ALPHA, WORK,
     $                        LDW, B, LDB, ZERO, WORK( IPW ), MBB )
c
c                 When new BLACS released, uncomment the next 2 lines
c                 and remove the other call to DGSUM2D.
c
c                 CALL DGSUM2D( ICTXT, 'Columnwise', 'I-ring', MBB, NQ,
c    $                          WORK( IPW ), MBB, MYCOL, ICURROW )
                  CALL DGSUM2D( ICTXT, 'Columnwise', ' ', MBB, NQ,
     $                          WORK( IPW ), MBB, ICURROW, MYCOL )
               END IF
*
               IF( MYROW.EQ.ICURROW )
     $            II = II + MBB
               IF( MYCOL.EQ.ICURCOL )
     $            JJ = JJ + MBB
*
               ICURROW = MOD( ICURROW+1, NPROW )
               ICURCOL = MOD( ICURCOL+1, NPCOL )
*
   60       CONTINUE
*
         END IF
*
      ELSE
*
         IF( NOTA ) THEN
*
*           C := alpha*A*B' + beta*C
*
            KQ = NUMROC( K, KB, MYCOL, IMCOL, NPCOL )
            IPW = NB * KQ + 1
            LDW = MAX( 1, MP )
*
            DO 70 NN = 1, N, NB
               NBB = MIN( N-NN+1, NB )
*
               IF( MYROW.EQ.ICURROW ) THEN
                  CALL DLACPY( 'Full', NBB, KQ, B( II, 1 ), LDB, WORK,
     $                         NBB )
                  CALL DGEBS2D( ICTXT, 'Columnwise', 'I-ring', NBB, KQ,
     $                          WORK, NBB )
               ELSE
                  CALL DGEBR2D( ICTXT, 'Columnwise', 'I-ring', NBB, KQ,
     $                          WORK, NBB, ICURROW, MYCOL )
               END IF
*
               IF( MYCOL.EQ.ICURCOL ) THEN
                  CALL DGEMM( TRANSA, TRANSB, MP, NBB, KQ, ALPHA, A,
     $                        LDA, WORK, NBB, BETA, C( 1, JJ ), LDC )
c
c                 When new BLACS released, uncomment the next 2 lines
c                 and remove the other call to DGSUM2D.
c
c                 CALL DGSUM2D( ICTXT, 'Rowwise', 'I-ring', MP, NBB,
c    $                          C( 1, JJ ), LDC, MYROW, ICURCOL )
                  CALL DGSUM2D( ICTXT, 'Rowwise', ' ', MP, NBB,
     $                          C( 1, JJ ), LDC, MYROW, ICURCOL )
               ELSE
                  CALL DGEMM( TRANSA, TRANSB, MP, NBB, KQ, ALPHA, A,
     $                        LDA, WORK, NBB, ZERO, WORK( IPW ), LDW )
c
c                 When new BLACS released, uncomment the next 2 lines
c                 and remove the other call to DGSUM2D.
c
c                 CALL DGSUM2D( ICTXT, 'Rowwise', 'I-ring', MP, NBB,
c    $                          WORK( IPW ), LDW, MYROW, ICURCOL )
                  CALL DGSUM2D( ICTXT, 'Rowwise', ' ', MP, NBB,
     $                          WORK( IPW ), LDW, MYROW, ICURCOL )
               END IF
*
               IF( MYROW.EQ.ICURROW )
     $            II = II + NBB
               IF( MYCOL.EQ.ICURCOL )
     $            JJ = JJ + NBB
*
               ICURROW = MOD( ICURROW+1, NPROW )
               ICURCOL = MOD( ICURCOL+1, NPCOL )
*
   70       CONTINUE
*
         ELSE
*
*           C := alpha*A'*B' + beta*C
*
            MQ   = NUMROC( M, MB, MYCOL, IMCOL, NPCOL )
            IPC  = 1
            LDW  = MAX( 1, NP )
            IPW1 = IPC + NP * MQ
            IPW2 = IPW1 + KB * MQ
*
            CALL DLASET( 'Full', NP, MQ, ZERO, ZERO, WORK( IPC ), LDW )
*
*           W := alpha*B*A
*
            DO 80 KK = 1, K, KB
               KBB = MIN( K-KK+1, KB )
*
               IF( MYROW.EQ.ICURROW ) THEN
                  CALL DLACPY( 'Full', KBB, MQ, A( II, 1 ), LDA,
     $                         WORK( IPW1 ), KBB )
                  CALL DGEBS2D( ICTXT, 'Columnwise', 'I-ring', KBB, MQ,
     $                          WORK( IPW1 ), KBB )
               ELSE
                  CALL DGEBR2D( ICTXT, 'Columnwise', 'I-ring', KBB, MQ,
     $                          WORK( IPW1 ), KBB, ICURROW, MYCOL )
               END IF
*
               IF( MYCOL.EQ.ICURCOL ) THEN
                  CALL DLACPY( 'Full', NP, KBB, B( 1, JJ ), LDB,
     $                         WORK( IPW2 ), LDW )
                  CALL DGEBS2D( ICTXT, 'Rowwise', 'I-ring', NP, KBB,
     $                          WORK( IPW2 ), LDW )
               ELSE
                  CALL DGEBR2D( ICTXT, 'Rowwise', 'I-ring', NP, KBB,
     $                          WORK( IPW2 ), LDW, MYROW, ICURCOL )
               END IF
*
               CALL DGEMM( 'No transpose', 'No transpose', NP, MQ, KBB,
     $                     ALPHA, WORK( IPW2 ), LDW, WORK( IPW1 ), KBB,
     $                     ONE, WORK( IPC ), LDW )
*
               IF( MYROW.EQ.ICURROW )
     $            II = II + KBB
               IF( MYCOL.EQ.ICURCOL )
     $            JJ = JJ + KBB
*
               ICURROW = MOD( ICURROW+1, NPROW )
               ICURCOL = MOD( ICURCOL+1, NPCOL )
*
   80       CONTINUE
*
*           C := beta*C + W'
*
            CALL PDTRANS( 'T', N, M, NB, MB, WORK( IPC ), LDW, BETA,
     $                    C, LDC, IMROW, IMCOL, WORK( IPW1 ), IWORK )
*
         END IF
*
      END IF
*
      RETURN
*
*     End of PDSUMMA
*
      END
