      SUBROUTINE PDTRANS( TRANS, M, N, MB, NB, A, LDA, BETA, C, LDC,
     $                   IMROW, IMCOL, WORK, IWORK )
*
*  -- PUMMA Package routine (version 2.1) --
*     Jaeyoung Choi, Oak Ridge National Laboratory.
*     Jack Dongarra, Univ. of Tennessee, Oak Ridge National Laboratory.
*     David Walker,  Oak Ridge National Laboratory.
*     October 31, 1994.
*
*     ..
*     .. Scalar Arguments ..
      CHARACTER*1        TRANS
      INTEGER            IMCOL, IMROW, M, MB, LDA, LDC, N, NB
      DOUBLE PRECISION   BETA
*     ..
*     .. Array Arguments ..
      INTEGER            IWORK( 3, 0:* )
      DOUBLE PRECISION   A( LDA, * ), C( LDC, * ), WORK( * )
*     ..
*
*  Purpose
*  =======
*
*  PDTRANS routine is one of the PUMMA package based on block cyclic
*  data distribution on 2-D process configuration.
*
*  It is used for the following matrix transposition,
*
*     Form  C := A' + beta*C
*
*  where beta is a scalar, and A and C are matrices, with A an M by N
*  matrix (globally), and C an N by M matrix (globally).
*
*  Parameters
*  ==========
*
*  TRANS  - (input) CHARACTER*1
*           TRANS specifies whether A is transposed or conjugate
*           transposed.
*
*              TRANS = 'T',   transpose;
*
*              TRANS = 'C',   conjugate transpose.
*
*  M      - (input) INTEGER
*           M specifies the (global) number of rows of the matrix A and
*           the (global) number of rows of the matrix C.  M >= 0.
*
*  N      - (input) INTEGER
*           N specifies the (global) number of columns of the matrix A
*           and columns of the matrix B.  N >= 0.
*
*  MB     - (input) INTEGER
*           MB specifies the row block size of the matrix A and the
*           column block of the matrix C.  MB >= 1.
*
*  NB     - (input) INTEGER
*           NB specifies the column block size of the matrix A and the
*           row block size of the matrix C.  NB >= 1.
*
*  A      - (input) DOUBLE PRECISION array of DIMENSION ( LDA, Nq ).
*           The leading Mp by Nq part of the array A must contain the
*           local matrix  A.  Mp and Nq are local variables
*           (see description of local parameters).
*
*  LDA    - (input) INTEGER
*           The leading dimension of the (local) array A.
*           LDA >= max( 1, Mp ).
*
*  BETA   - (input) DOUBLE PRECISION
*           BETA  specifies the scalar beta.  When BETA is supplied as
*           zero then C need not be set on input.
*
*  C      - (input/ouput) DOUBLE PRECISION array of DIMENSION (LDC, Mq).
*           On entry the leading Np by Mq part of the array C must
*           contain the local matrix C, except when beta is zero,
*           in which case C need not be set on entry.
*           On exit, the array C is overwritten by the Np by Mq matrix
*           (A'+bata*C).  Np and Mq are local variables
*           (see description of local parameters).
*
*  LDC    - (input) INTEGER
*           The leading dimension of the (local) array C.
*           LDC >= max( 1, Np ).
*
*  IMROW  - (input) INTEGER
*           IMROW specifies a row of the process template, which holds
*           the first block of the matrices.  0 <= IMROW < NPROW.
*
*  IMCOL  - (input) INTEGER
*           IMCOL specifies a column of the process template, which
*           holds the first block of the matrices.  0 <= IMCOL < NPCOL.
*
*  WORK   - (workspace) DOUBLE PRECISION array
*           See requirements.
*
*  IWORK  - (workspace) INTEGER array
*           See requirements.
*
*  Local  Parameters
*  =================
*
*  LCM   =  the lowest common multiple of P and Q
*  LCMP  =  LCM/P = number of template rows in LCM block
*  LCMQ  =  LCM/Q = number of template columns in LCM block
*  IGCD   =  the greatest common divisor (GCD) of P and Q
*  MpxNq =  size of (local) matrix A in the process, iam
*  NpxMq =  size of (local) matrix C in the process, iam
*  KMOD  =  Define Group I.D.
*  item  =  temporal integer parameter
*
*    Two buffers for storing A' and T(= subblock of A')
*       WORK       <== A'
*       WORK(IPT)  <== T
*
*    Three interger buffers
*       IWORK(1,k) <== starting point of row subblock of A  to send and
*                      C to receive in K2 loop (rowwise communication)
*       IWORK(2,k) <== starting point of column subblock of A to send in
*                      J1 loop (columnwise communication)
*       IWORK(3,k) <== starting point of column subblock of C to receive
*                      in J1 loop (columnwise communication)
*
*  Requirements (approximate)
*  ==========================
*
*   Size(IWORK) = 3 x MAX(P, Q)
*   Size(WORK)  = 2 x Ceil(Ceil(M,MB),LCM)xMB x Ceil(Ceil(N,NB),LCM)xNB
*
* ======================================================================
*
*     ..
*     .. Parameters ..
      DOUBLE PRECISION   ONE,          ZERO
      PARAMETER        ( ONE = 1.0D+0, ZERO = 0.0D+0 )
*     ..
*     .. Local Scalars ..
      INTEGER            ICTXT, IGCD, INFO, IPT, ITEM, J1, K1, K2, KIA
      INTEGER            KIC, KJA, KJC, KMOD1, KMOD2, KPCOL, KPROW
      INTEGER            LBM, LBM0, LBM1, LBM2, LBN, LBN0, LBN1, LBN2
      INTEGER            LCM, LCMP, LCMQ,  LDT, MB0, MB1, MB2, MCOL
      INTEGER            ML, MP, MPROW, MPCOL, MQ, MRCOL, MRROW, MYCOL
      INTEGER            MYROW, NB0, NB1, NB2, NCOL, NL, NP, NPCOL
      INTEGER            NPROW, NQ
      DOUBLE PRECISION   TBETA
*     ..
*     .. External Subroutines ..
      EXTERNAL           PXERBLA, DSCAL, DTR2MX, DTR2BF
      EXTERNAL           DMV2MX, DGESD2D, DGERV2D,
     $                   BLACS_GRIDINFO
*     ..
*     .. Intrinsic Functions ..
      INTRINSIC          MOD
*     ..
*     .. External Functions ..
      LOGICAL            LSAME
      INTEGER            ICEIL, ILCM, NUMROC
      EXTERNAL           ICEIL, ILCM, LSAME, NUMROC
*     ..
*     .. Scalars in Common ..
      INTEGER            IAZ, JAZ, ITZ, JTZ
*     ..
*     .. Common Blocks ..
      COMMON             /COMMTRB/ IAZ, JAZ, ITZ, JTZ
      COMMON             /CONTEXT/ ICTXT
*     ..
*     .. Executable Statements ..
*
*     Get grid parameters
*
      CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL )
*
*     Test for the input parameters.
*
      INFO = 0
      IF( ( .NOT. LSAME( TRANS, 'T' ) ) .AND.
     $    ( .NOT. LSAME( TRANS, 'C' ) )           ) THEN
         INFO = 1
      ELSE IF( M  .LT. 0                          ) THEN
         INFO = 2
      ELSE IF( N  .LT. 0                          ) THEN
         INFO = 3
      ELSE IF( MB .LT. 1                          ) THEN
         INFO = 4
      ELSE IF( NB .LT. 1                          ) THEN
         INFO = 5
      ELSE IF( LDA.LT. 1                          ) THEN
         INFO = 7
      ELSE IF( LDC.LT. 1                          ) THEN
         INFO = 10
      ELSE IF( IMROW .LT. 0 .OR. IMROW .GE. NPROW ) THEN
         INFO = 11
      ELSE IF( IMCOL .LT. 0 .OR. IMCOL .GE. NPCOL ) THEN
         INFO = 12
      END IF
*
   10 CONTINUE
      IF( INFO .NE. 0 ) THEN
          CALL PXERBLA( ICTXT, 'PDTRANS', INFO )
          RETURN
      END IF
*
*     Initialize parameters
*
      MPROW = NPROW + MYROW
      MPCOL = NPCOL + MYCOL
      MRROW = MOD( MPROW-IMROW, NPROW )
      MRCOL = MOD( MPCOL-IMCOL, NPCOL )
*
      LCM  = ILCM( NPROW, NPCOL )
      LCMP = LCM / NPROW
      LCMQ = LCM / NPCOL
      IGCD  = NPROW  / LCMQ
*
      MP = NUMROC( M, MB, MRROW, 0, NPROW )
      MQ = NUMROC( M, MB, MRCOL, 0, NPCOL )
      NP = NUMROC( N, NB, MRROW, 0, NPROW )
      NQ = NUMROC( N, NB, MRCOL, 0, NPCOL )
*
      LBM = ICEIL( ICEIL( M, MB ), LCM )
      LBN = ICEIL( ICEIL( N, NB ), LCM )
*
*     Test for the input parameters again with local parameters
*
      IF(      LDA .LT. MP ) THEN
         INFO = 7
      ELSE IF( LDC .LT. NP ) THEN
         INFO = 10
      END IF
      IF( INFO .NE. 0 ) GO TO 10
*
*     Quick return if possible.
*
      IF( M.EQ.0 .OR. N.EQ.0 ) RETURN
*
*     At first, scale C with beta if beta != 0.0 & beta != 1.0
*
      TBETA = BETA
      IF( BETA.NE.ZERO .AND. BETA.NE.ONE ) THEN
         DO 20 J1 = 1, MQ
            CALL DSCAL( NP, BETA, C(1,J1), 1 )
   20    CONTINUE
         TBETA = ONE
      END IF
*
      IAZ = LCMP * MB
      JAZ = LCMQ * NB
      ITZ = LCMP * NB
      JTZ = LCMQ * MB
*
      ML  = LBM*MB
      NL  = LBN*NB
      IPT = ML * NL + 1
      LDT = NL
      KPROW = MRROW + NPROW
      KPCOL = MRCOL + NPCOL
*
*     Initialize Parameters -- Compute the positions of subblocks
*
      DO 30 K1 = 0, NPCOL-1
         NCOL = MOD( KPCOL-K1, NPCOL )
         DO 30 J1 = 0, LCMQ-1
            ITEM = NPCOL*J1 + NCOL
            IF( MOD(ITEM, NPROW).EQ.MRROW ) IWORK(1,NCOL) = ITEM / NPROW
   30 CONTINUE
*
      DO 40 J1 = 0, LCMQ-1
         ITEM = MOD( NPCOL*J1+MRCOL, NPROW )
         IWORK(2, ITEM) = J1
         IWORK(3, ITEM) = J1
         DO 40 K1 = 1, IGCD-1
            IWORK(2, MOD(ITEM+NPROW-K1, NPROW)) = J1
            IWORK(3, MOD(ITEM+K1, NPROW))       = J1
   40 CONTINUE
*
*     Set parameters for efficient copying
*
      LBM0 = LBM
      LBM1 = LBM
      LBM2 = LBM
      LBN0 = LBN
      LBN1 = LBN
      LBN2 = LBN
      MB0  = MB
      MB1  = MB
      MB2  = MB
      NB0  = NB
      NB1  = NB
      NB2  = NB
*
      IF( NPROW.EQ.NPCOL ) THEN
          LBM0 = 1
          LBN0 = 1
          MB0  = MP
          NB0  = NQ
      END IF
      IF( NPROW.EQ.LCM ) THEN
          LBM1 = 1
          LBN2 = 1
          MB1  = MP
          NB2  = NP
      END IF
      IF( NPCOL.EQ.LCM ) THEN
          LBN1 = 1
          LBM2 = 1
          NB1  = NQ
          MB2  = MQ
      END IF
*
*     For each K2 loop (rowwise), Copy A' to WORK & Send it to KTPROC
*                                 then, Receive WORK and Copy WORK to C
*
      KMOD1 = MOD( NPROW+MRCOL-MRROW, IGCD )
      KMOD2 = MOD( IGCD-KMOD1, IGCD )
*
      DO 60 K2 = 0, LCMP-1
*
*        Copy A' to WORK in the appropriate order & Send it
*
         K1   = K2*IGCD + KMOD1
         MCOL = MOD( KPCOL-K1, NPCOL )
         KIA  = IWORK( 1, MCOL ) * MB
         MCOL = MOD( MCOL+IMCOL, NPCOL )
         NCOL = MOD( MRCOL+K2*IGCD+KMOD2, NPCOL )
         KIC  = IWORK( 1, NCOL ) * NB
         NCOL = MOD( NCOL+IMCOL, NPCOL )
*
         DO 50 J1 = 0, LCMQ-1
            KJA = IWORK(2, MOD(MRROW+IGCD*J1, NPROW)) * NB
*
            IF( MYROW.EQ.MOD(MYROW+IGCD*J1+KMOD1,NPROW) .AND.
     $          MYCOL.EQ.MCOL ) THEN
                KJC = IWORK(3, MOD(KPROW-IGCD*J1, NPROW)) * MB
                CALL DTR2MX( A(KIA+1,KJA+1), LDA, TBETA,
     $                       C(KIC+1,KJC+1), LDC, LBM0, LBN0, MB0,
     $                       NB0, MP-KIA, NQ-KJA )
*
            ELSE
                CALL DTR2BF( A(KIA+1,KJA+1), LDA, WORK, LDT,
     $                       LBM1, LBN1, MB1, NB1, MP-KIA, NQ-KJA )
*
                IF( NPROW.EQ.NPCOL .AND. BETA.EQ.ZERO .AND.
     $              LDC.EQ.LDT )THEN
                    CALL DGESD2D( ICTXT, NL, ML, WORK, NL,
     $                            MOD(MYROW+IGCD*J1+KMOD1,NPROW), MCOL )
                    KJC = IWORK(3, MOD(KPROW-IGCD*J1, NPROW)) * MB
                    CALL DGERV2D( ICTXT, NL, ML, C(1,KJC+1), LDC,
     $                       MOD(MPROW-IGCD*J1-KMOD2,NPROW), NCOL )
*
                ELSE
                    CALL DGESD2D( ICTXT, NL, ML, WORK, NL,
     $                            MOD(MYROW+IGCD*J1+KMOD1,NPROW), MCOL )
                    CALL DGERV2D( ICTXT, NL, ML, WORK(IPT), NL,
     $                            MOD(MPROW-IGCD*J1-KMOD2,NPROW), NCOL )
                    KJC = IWORK(3, MOD(KPROW-IGCD*J1, NPROW)) * MB
                    CALL DMV2MX( WORK(IPT), LDT, TBETA, C(KIC+1,KJC+1),
     $                           LDC, LBN2, LBM2, NB2, MB2,
     $                           NP-KIC, MQ-KJC )
                END IF
            END IF
   50    CONTINUE
   60 CONTINUE
*
      RETURN
*
*     End of PDTRANS
*
      END
*
*  =====================================================================
*  Subroutine DTR2MX
*  =====================================================================
*
      SUBROUTINE DTR2MX( A, LDA, BETA, T, LDT, NROW, NCOL,
     $                   MB, NB, ILT, JLT )
*
*  -- PUMMA Package routine (version 2.1) --
*     Jaeyoung Choi, Oak Ridge National Laboratory.
*     Jack Dongarra, Univ. of Tennessee, Oak Ridge National Laboratory.
*     David Walker,  Oak Ridge National Laboratory.
*     October 31, 1994.
*
*     ..
*     .. Scalar Arguments ..
      INTEGER            ILT, JLT, LDA, LDT, MB, NB, NCOL, NROW
      DOUBLE PRECISION   BETA
*     ..
*     .. Array Arguments ..
      DOUBLE PRECISION   A( LDA, * ), T( LDT, * )
*     ..
*
*  Purpose
*  =======
*
*  T <== A' + beta*T (assume beta = 0.0, or 1.0)
*  T is a scattered 2-D array from a scattered 2-D array A
*
* ======================================================================
*
*     ..
*     .. Parameters ..
      DOUBLE PRECISION   ZERO
      PARAMETER        ( ZERO = 0.0D+0 )
*     ..
*     .. Local Variables ..
      INTEGER            IA, IRM, IT, JA, JJ, JRM, JT, K, KI, KJ, MR
*     ..
*     .. Scalars in Common ..
      INTEGER            IAZ, JAZ, ITZ, JTZ
*     ..
*     .. Common blocks ..
      COMMON /COMMTRB/   IAZ, JAZ, ITZ, JTZ
*     ..
*     .. Executable Statements ..
*
*     T = A'
*
      IA = 0
      JT = 0
*
      IF( BETA.EQ.ZERO ) THEN
          DO 40 KI = 0, NROW-2
             JA = 0
             IT = 0
             DO 20 KJ = 0, NCOL-2
                DO 10 JJ = 1, NB
                   DO 10 K = 1, MB
                      T(IT+JJ,JT+K) = A(IA+K,JA+JJ)
   10           CONTINUE
                JA = JA + JAZ
                IT = IT + ITZ
   20        CONTINUE
*
             JRM = JLT - JA
             IF( JRM.GT.0 ) THEN
                 DO 30 JJ = 1, MIN(NB, JRM)
                    DO 30 K = 1, MB
                       T(IT+JJ,JT+K) = A(IA+K,JA+JJ)
   30            CONTINUE
             END IF
             IA = IA + IAZ
             JT = JT + JTZ
   40     CONTINUE
*
          IRM = ILT - IA
          IF( IRM.GT.0 ) THEN
              JA = 0
              IT = 0
              MR = MIN(IRM, MB)
              DO 60 KJ = 0, NCOL-2
                 DO 50 JJ = 1, NB
                    DO 50 K = 1, MR
                       T(IT+JJ,JT+K) = A(IA+K,JA+JJ)
   50            CONTINUE
                 JA = JA + JAZ
                 IT = IT + ITZ
   60         CONTINUE
*
              JRM = JLT - JA
              IF( JRM.GT.0 ) THEN
                  DO 70 JJ = 1, MIN(NB, JRM)
                     DO 70 K = 1, MR
                        T(IT+JJ,JT+K) = A(IA+K,JA+JJ)
   70             CONTINUE
              END IF
          END IF
*
      ELSE
*
*         T = A' + T
*
          DO 110 KI = 0, NROW-2
             JA = 0
             IT = 0
             DO 90 KJ = 0, NCOL-2
                DO 80 JJ = 1, NB
                   DO 80 K = 1, MB
                      T(IT+JJ,JT+K) = T(IT+JJ,JT+K)+A(IA+K,JA+JJ)
   80           CONTINUE
                JA = JA + JAZ
                IT = IT + ITZ
   90        CONTINUE
*
             JRM = JLT - JA
             IF( JRM.GT.0 ) THEN
                 DO 100 JJ = 1, MIN(NB, JRM)
                    DO 100 K = 1, MB
                       T(IT+JJ,JT+K) = T(IT+JJ,JT+K)+A(IA+K,JA+JJ)
  100            CONTINUE
             END IF
             IA = IA + IAZ
             JT = JT + JTZ
  110     CONTINUE
*
          IRM = ILT - IA
          IF( IRM.GT.0 ) THEN
              JA = 0
              IT = 0
              MR = MIN(IRM, MB)
              DO 130 KJ = 0, NCOL-2
                 DO 120 JJ = 1, NB
                    DO 120 K = 1, MR
                       T(IT+JJ,JT+K) = T(IT+JJ,JT+K)+A(IA+K,JA+JJ)
  120            CONTINUE
                 JA = JA + JAZ
                 IT = IT + ITZ
  130         CONTINUE
*
              JRM = JLT - JA
              IF( JRM.GT.0 ) THEN
                  DO 140 JJ = 1, MIN(NB, JRM)
                     DO 140 K = 1, MR
                        T(IT+JJ,JT+K) = T(IT+JJ,JT+K)+A(IA+K,JA+JJ)
  140             CONTINUE
              END IF
          END IF
      END IF
*
      RETURN
*
*     End of DTR2MX
*
      END
*
*  =====================================================================
*  Subroutine DTR2BF
*  =====================================================================
*
      SUBROUTINE DTR2BF( A, LDA, T, LDT, NROW, NCOL, MB, NB,
     $                   ILT, JLT )
*
*  -- PUMMA Package routine (version 2.1) --
*     Jaeyoung Choi, Oak Ridge National Laboratory.
*     Jack Dongarra, Univ. of Tennessee, Oak Ridge National Laboratory.
*     David Walker,  Oak Ridge National Laboratory.
*     October 31, 1994.
*
*     ..
*     .. Scalar Arguments ..
      INTEGER            ILT, JLT, LDA, LDT, MB, NB, NCOL, NROW
*     ..
*     .. Array Arguments ..
      DOUBLE PRECISION   A( LDA, * ), T( LDT, * )
*
*  Purpose
*  =======
*
*  T <== A'
*  T is a condensed 2-D buffer from a scattered 2-D array A
*
* ======================================================================
*
*     ..
*     .. Local Variables ..
      INTEGER            IA, IRM, IT, JA, JJ, JRM, JT, K, KI, KJ, MR
*     ..
*     .. Scalars in Common ..
      INTEGER            IAZ, JAZ, ITZ, JTZ
*     ..
*     .. Common blocks ..
      COMMON /COMMTRB/   IAZ, JAZ, ITZ, JTZ
*     ..
*     .. Executable Statements ..
*
      IA = 0
      JT = 0
      DO 40 KI = 0, NROW-2
         JA = 0
         IT = 0
         DO 20 KJ = 0, NCOL-2
            DO 10 JJ = 1, NB
               DO 10 K = 1, MB
                  T(IT+JJ,JT+K) = A(IA+K,JA+JJ)
   10       CONTINUE
            JA = JA + JAZ
            IT = IT + NB
   20    CONTINUE
*
         JRM = JLT - JA
         IF( JRM.GT.0 ) THEN
             DO 30 JJ = 1, MIN(NB,JRM)
                DO 30 K = 1, MB
                   T(IT+JJ,JT+K) = A(IA+K,JA+JJ)
   30        CONTINUE
         END IF
         IA = IA + IAZ
         JT = JT + MB
   40 CONTINUE
*
      IRM = ILT - IA
      IF( IRM.GT.0 ) THEN
          JA = 0
          IT = 0
          MR = MIN(MB, IRM)
          DO 60 KJ = 0, NCOL-2
             DO 50 JJ = 1, NB
                DO 50 K = 1, MR
                   T(IT+JJ,JT+K) = A(IA+K,JA+JJ)
   50        CONTINUE
             JA = JA + JAZ
             IT = IT + NB
   60     CONTINUE
*
          JRM = JLT - JA
          IF( JRM.GT.0 ) THEN
              DO 70 JJ = 1, MIN(NB,JRM)
                 DO 70 K = 1, MR
                    T(IT+JJ,JT+K) = A(IA+K,JA+JJ)
   70         CONTINUE
          END IF
      END IF
*
      RETURN
*
*     End of DTR2BF
*
      END
*
*  =====================================================================
*  Subroutine DMV2MX
*  =====================================================================
*
      SUBROUTINE DMV2MX( T, LDT, BETA, A, LDA, NROW, NCOL, MB, NB,
     $                   ILT, JLT )
*
*  -- PUMMA Package routine (version 2.1) --
*     Jaeyoung Choi, Oak Ridge National Laboratory.
*     Jack Dongarra, Univ. of Tennessee, Oak Ridge National Laboratory.
*     David Walker,  Oak Ridge National Laboratory.
*     October 31, 1994.
*
*     ..
*     .. Scalar Arguments ..
      INTEGER            ILT, JLT, LDA, LDT, MB, NB, NCOL, NROW
      DOUBLE PRECISION   BETA
*     ..
*     .. Array Arguments ..
      DOUBLE PRECISION   T( LDT, *), A( LDA, * )
*     ..
*
*  Purpose
*  =======
*
*  A <== T + beta*A (assume beta = 0.0, or 1.0)
*  A is a scattered 2-D array from a condensed 2-D buffer T
*
* ======================================================================
*
*     ..
*     .. Parameters ..
      DOUBLE PRECISION   ONE,          ZERO
      PARAMETER        ( ONE = 1.0D+0, ZERO = 0.0D+0 )
*     ..
*     .. Local Variables ..
      INTEGER            IA, IRM, IT, JA, JJ, JRM, JT, K, KI, KJ, MR
*     ..
*     .. Intrinsic Functions ..
      INTRINSIC          MIN
*     ..
*     .. External Functions ..
      EXTERNAL           DCOPY, DAXPY
*     ..
*     .. Scalars in Common ..
      INTEGER            IAZ, JAZ, ITZ, JTZ
*     ..
*     .. Common blocks ..
      COMMON /COMMTRB/   IAZ, JAZ, ITZ, JTZ
*
      IT = 0
      IA = 0
*
*     A <== T
*
      IF( BETA.EQ.ZERO ) THEN
*
*         If NPROW = 1, use DCOPY
*
          IF( NROW.EQ.1 ) THEN
              JT = 0
              JA = 0
              DO 20 KJ = 0, NCOL-2
                 DO 10 JJ = 1, NB
                    CALL DCOPY( MIN(MB,ILT), T(1,JT+JJ), 1,
     $                          A(1,JA+JJ), 1 )
   10            CONTINUE
                 JT = JT + NB
                 JA = JA + JTZ
   20         CONTINUE
*
              JRM = JLT - JA
              IF( JRM.GT.0 ) THEN
                  DO 30 JJ = 1, MIN(NB, JRM)
                     CALL DCOPY( MIN(MB,ILT), T(1,JT+JJ), 1,
     $                           A(1,JA+JJ), 1 )
   30             CONTINUE
              END IF
*
          ELSE
*
              DO 70 KI = 0, NROW-2
                 JT = 0
                 JA = 0
                 DO 50 KJ = 0, NCOL-2
                    DO 40 JJ = 1, NB
                       DO 40 K = 1, MB
                          A(IA+K,JA+JJ) = T(IT+K,JT+JJ)
   40               CONTINUE
                    JT = JT + NB
                    JA = JA + JTZ
   50            CONTINUE
*
                 JRM = JLT - JA
                 IF( JRM.GT.0 ) THEN
                     DO 60 JJ = 1, MIN(NB, JRM)
                        DO 60 K = 1, MB
                           A(IA+K,JA+JJ) = T(IT+K,JT+JJ)
   60                CONTINUE
                 END IF
                 IT = IT + MB
                 IA = IA + ITZ
   70         CONTINUE
*
              IRM = ILT - IA
              IF( IRM.GT.0 ) THEN
                  JT = 0
                  JA = 0
                  MR = MIN(MB, IRM)
                  DO 90 KJ = 0, NCOL-2
                     DO 80 JJ = 1, NB
                        DO 80 K = 1, MR
                           A(IA+K,JA+JJ) = T(IT+K,JT+JJ)
   80                CONTINUE
                     JT = JT + NB
                     JA = JA + JTZ
   90             CONTINUE
*
                  JRM = JLT - JA
                  IF( JRM.GT.0 ) THEN
                      DO 100 JJ = 1, MIN(NB, JRM)
                         DO 100 K = 1, MR
                            A(IA+K,JA+JJ) = T(IT+K,JT+JJ)
  100                 CONTINUE
                  END IF
              END IF
          END IF
*
*         A <== T + A
*
      ELSE
*
*         If NPROW = 1, use DAXPY
*
          IF( NROW.EQ.1 ) THEN
              JT = 0
              JA = 0
              DO 120 KJ = 0, NCOL-2
                 DO 110 JJ = 1, NB
                    CALL DAXPY( MIN(MB,ILT), ONE, T(1,JT+JJ), 1,
     $                          A(1,JA+JJ), 1 )
  110            CONTINUE
                 JT = JT + NB
                 JA = JA + JTZ
  120         CONTINUE
*
              JRM = JLT - JA
              IF( JRM.GT.0 ) THEN
                  DO 130 JJ = 1, MIN(NB, JRM)
                     CALL DAXPY( MIN(MB,ILT), ONE, T(1,JT+JJ), 1,
     $                           A(1,JA+JJ), 1 )
  130             CONTINUE
              END IF
*
          ELSE
              DO 170 KI = 0, NROW-2
                 JT = 0
                 JA = 0
                 DO 150 KJ = 0, NCOL-2
                    DO 140 JJ = 1, NB
                       DO 140 K = 1, MB
                          A(IA+K,JA+JJ) = A(IA+K,JA+JJ) + T(IT+K,JT+JJ)
  140               CONTINUE
                    JT = JT + NB
                    JA = JA + JTZ
  150            CONTINUE
*
                 JRM = JLT - JA
                 IF( JRM.GT.0 ) THEN
                     DO 160 JJ = 1, MIN(NB, JRM)
                        DO 160 K = 1, MB
                           A(IA+K,JA+JJ) = A(IA+K,JA+JJ) + T(IT+K,JT+JJ)
  160                CONTINUE
                 END IF
                 IT = IT + MB
                 IA = IA + ITZ
  170         CONTINUE
*
              IRM = ILT - IA
              IF( IRM.GT.0 ) THEN
                  JT = 0
                  JA = 0
                  MR = MIN(MB, IRM)
                  DO 190 KJ = 0, NCOL-2
                     DO 180 JJ = 1, NB
                        DO 180 K = 1, MR
                           A(IA+K,JA+JJ) = A(IA+K,JA+JJ) + T(IT+K,JT+JJ)
  180                CONTINUE
                     JT = JT + NB
                     JA = JA + JTZ
  190             CONTINUE
*
                  JRM = JLT - JA
                  IF( JRM.GT.0 ) THEN
                      DO 200 JJ = 1, MIN(NB, JRM)
                         DO 200 K = 1, MR
                            A(IA+K,JA+JJ) = A(IA+K,JA+JJ) +
     $                                      T(IT+K,JT+JJ)
  200                 CONTINUE
                  END IF
              END IF
          END IF
      END IF
*
      RETURN
*
*     End of DMV2MX
*
      END
