      SUBROUTINE PZHPR2KF( UPLO, TRANS, N, K, ALPHA, A, IA, JA, DESCA,
     $                     B, IB, JB, DESCB, BETA, C, IC, JC, DESCC,
     $                     WORK, LWORK )
*
*
*  -- ScaLAPACK auxiliary routine (version 2.0) --
*     University of Tennessee, Knoxville, Oak Ridge National Laboratory,
*     and University of California, Berkeley.
*     Oct 10, 1996
*
*
* Purpose
* =======
*
* PSPR2KF_  performs one of the hermitian rank 2k operations
*
* sub( C ) := alpha*sub( A )*conjg( sub( B ) )' +
*             conjg( alpha )*sub( B )*conjg( sub( A ) )' +
*             beta*sub( C ),
*
* or
*
* sub( C ) := alpha*conjg( sub( A )' )*sub( B ) +
*             conjg( alpha )*conjg( sub( B )' )*sub( A ) +
*             beta*sub( C ),
* PSPR2KF_  performs one of the symmetric rank 2k operations
*
* sub( C ) := alpha*sub( A )*sub( B )' + alpha*sub( B )*sub( A )' +
*             beta*sub( C ),
*
* or
*
* sub( C ) := alpha*sub( A )'*sub( B ) + alpha*sub( B )'*sub( A ) +
*             beta*sub( C ),
*
* where sub( C ) denotes C(IC:IC+N-1,JC:JC+N-1),
*
*       sub( A ) denotes A(IA:IA+N-1,JA:JA+K-1)  if TRANS = 'N',
*                        A(IA:IA+K-1,JA:JA+N-1)  otherwise,
*
*       sub( B ) denotes B(IB:IB+N-1,JB:JB+K-1)  if TRANS = 'N',
*                        B(IB:IB+K-1,JB:JB+N-1)  otherwise.
*
* where sub( C ) denotes C(IC:IC+N-1,JC:JC+N-1),
*
*       sub( A ) denotes A(IA:IA+N-1,JA:JA+K-1)  if TRANS = 'N',
*                        A(IA:IA+K-1,JA:JA+N-1)  otherwise,
*
*       sub( B ) denotes B(IB:IB+N-1,JB:JB+K-1)  if TRANS = 'N',
*                        B(IB:IB+K-1,JB:JB+N-1)  otherwise.
*
* Matrix A is stored in packed storage.
*
* Alpha and beta are scalars with beta real, sub( C ) is an N-by-N
* distributed matrix and sub( A ) and sub( B ) are N-by-K
* distributed matrices in the first case and a K-by-N distributed
* matrices in the second case.
*
* Notes
* =====
*
* Each global data object is described by an associated description
* vector.  This vector stores the information required to establish
* the mapping between an object element and its corresponding process
* and memory location.
*
* Let A be a generic term for any 2D block cyclicly distributed array.
* Such a global array has an associated description vector descA.
* In the following comments, the character _ should be read as
* "of the global array".
*
* NOTATION        STORED IN      EXPLANATION
* --------------- -------------- --------------------------------------
* DT_A   (global) descA[ DT_ ]   The descriptor type.  In this case,
*                                DT_A = 1.
* CTXT_A (global) descA[ CTXT_ ] The BLACS context handle, indicating
*                                the BLACS process grid A is distribu-
*                                ted over. The context itself is glo-
*                                bal, but the handle (the integer
*                                value) may vary.
* M_A    (global) descA[ M_ ]    The number of rows in the global
*                                array A.
* N_A    (global) descA[ N_ ]    The number of columns in the global
*                                array A.
* MB_A   (global) descA[ MB_ ]   The blocking factor used to distribu-
*                                te the rows of the array.
* NB_A   (global) descA[ NB_ ]   The blocking factor used to distribu-
*                                te the columns of the array.
* RSRC_A (global) descA[ RSRC_ ] The process row over which the first
*                                row of the array A is distributed.
* CSRC_A (global) descA[ CSRC_ ] The process column over which the
*                                first column of the array A is
*                                distributed.
* LLD_A  (local)  descA[ LLD_ ]  The leading dimension of the local
*                                array.  LLD_A >= MAX(1,LOCr(M_A)).
*
* Let K be the number of rows or columns of a distributed matrix,
* and assume that its process grid has dimension p x q.
* LOCr( K ) denotes the number of elements of K that a process
* would receive if K were distributed over the p processes of its
* process column.
* Similarly, LOCc( K ) denotes the number of elements of K that a
* process would receive if K were distributed over the q processes of
* its process row.
* The values of LOCr() and LOCc() may be determined via a call to the
* ScaLAPACK tool function, NUMROC:
*         LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
*         LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
* An upper bound for these quantities may be computed by:
*         LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
*         LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
*
* according to a square block cyclic decomposition, i.e MB_C = NB_C, if
* N+MOD(IC-1,MB_C) > MB_C or N+MOD(JC-1,NB_C) > NB_C, in which case
* sub( C ) is not just contained into a block and IC-1 (resp. JC-1)
* must be a multiple of MB_C (resp. NB_C).
*
* If TRANS = 'N', then sub( A ), sub( B ) and sub( C ) must be row
* aligned, i.e the row process having the first entries of sub( A )
* must also own the first entries of sub( B ) and sub( C ).
* If sub( C ) is not just contained into a block, IC-1 (resp. IB-1,
* IA-1) must be a multiple of MB_C (resp. MB_B, MB_A) and the column
* block size of sub( C ) must be equal to the row block size of A,
* and the column block sizes of A and B must be equal, i.e NB_C = MB_A
* and NB_A = NB_B.
*
* Otherwise, then sub( A ), sub( B ) and sub( C ) must be column
* aligned, i.e the column process having the first entries of sub( A )
* must also own the first entries of sub( B ) and sub( C ).
* If sub( C ) is not just contained into a block, JC-1 (resp. JB-1,
* JA-1) must be a multiple of NB_C (resp. NB_B, NB_A) and the row
* block size of sub( C ) must be equal to the column block size of A,
* and the row block sizes of A and B must be equal, i.e MB_C = NB_A
* and MB_A = MB_B.
*
* Parameters
* ==========
*
* UPLO    (global input) CHARACTER
*         On  entry,   UPLO  specifies  whether  the  upper  or  lower
*         triangular  part  of the  distributed matrix sub( C ) is to
*         be  referenced  as  follows:
*
*         UPLO = 'U' or 'u' Only the upper triangular part of sub( C )
*                           is to be referenced,
*
*         UPLO = 'L' or 'l' Only the lower triangular part of sub( C )
*                           is to be referenced.
*
* TRANS   (global input) CHARACTER
*         On entry,  TRANS  specifies the operation to be performed as
*         follows:
*
*         TRANS = 'N' or 'n'
*           sub( C ) := alpha*sub( A )*conjg( sub( B )' ) +
*                       conjg( alpha )*sub( B )*conjg( sub( A )' ) +
*                       beta*C,
*
*         TRANS = 'C' or 'c'
*           sub( C ) := alpha*conjg( sub( A )' )*sub( B ) +
*                       conjg( alpha )*conjg( sub( B )' )*sub( A ) +
*                       beta*sub( C ).
*
* N       (global input) INTEGER
*         The order of the distributed matrix sub( C ). N >= 0.
*
* K       (global input) INTEGER
*         On entry with  TRANS = 'N' or 'n', K specifies the number of
*         columns  of the distributed matrices sub( A ) and sub( B ),
*         and on entry with TRANS = 'C' or 'c', K specifies the number
*         of rows of the distributed matrices sub( A ) and sub( B ).
*         K >= 0.
*
* A       (local input) DTYPE
*         an array of dimension (LLD_A, KLa), where KLa is
*         LOCc(JA+K-1) when  TRANS = 'N' or 'n',  and is LOCc(JA+N-1)
*         otherwise.  Before entry with TRANS = 'N' or 'n', this array
*         contains the local pieces of the distributed matrix sub( A ).
*
* IA      (global input) INTEGER
*         The global row index of the submatrix of the distributed
*         matrix A to operate on.
*
* JA      (global input) INTEGER
*         The global column index of the submatrix of the distributed
*         matrix A to operate on.
*
* DESCA   (global and local input) INTEGER array of dimension 8.
*         The array descriptor of the distributed matrix A.
*
* B       (local input) DTYPE
*         an array of dimension (LLD_B, KLb), where KLb is
*         LOCc(JB+K-1) when  TRANS = 'N' or 'n',  and is LOCc(JB+N-1)
*         otherwise.  Before entry with TRANS = 'N' or 'n', this array
*         contains the local pieces of the distributed matrix sub( B ).
*
* IB      (global input) INTEGER
*         The global row index of the submatrix of the distributed
*         matrix B to operate on.
*
* JB      (global input) INTEGER
*         The global column index of the submatrix of the distributed
*         matrix B to operate on.
*
* DESCB   (global and local input) INTEGER array of dimension 8.
*         The array descriptor of the distributed matrix B.
*
* BETA    (global input) FTYPE
*         On entry,  BETA  specifies the scalar beta.
*
* C       (local input/local output) DTYPE
*         an array of dimension (LLD_C, LOCc(JC+N-1)).
*         Before entry with UPLO = 'U' or 'u', this array contains the
*         local pieces of the N-by-N upper triangular part of the
*         Hermitian distributed matrix sub( C ) and its strictly lower
*         triangular part is not referenced. On exit, the upper trian-
*         gular part of sub( C ) is overwritten by the upper triangular
*         part of the updated distributed matrix.
*         Before entry  with  UPLO = 'L' or 'l', this array contains the
*         local pieces of the N-by-N lower triangular part of the
*         Hermitian distributed matrix sub( C ) and its strictly upper
*         triangular part is not referenced. On exit, the lower trian-
*         gular part of sub( C ) is overwritten by the lower triangular
*         part of the updated distributed matrix.
*         Note that the imaginary parts of the diagonal elements of
*         sub( C ) need not be set, they are assumed to be zero, and
*         on exit they are set to zero.
*
* IC      (global input) INTEGER
*         The global row index of the submatrix of the distributed
*         matrix C to operate on.
*
* JC      (global input) INTEGER
*         The global column index of the submatrix of the distributed
*         matrix C to operate on.
*
* DESCC   (global and local input) INTEGER array of dimension 8.
*         The array descriptor of the distributed matrix C.
*
**  =====================================================
*
*     .. Parameters ..
      INTEGER            DLEN_
      PARAMETER          ( DLEN_ = 9 )
      INTEGER            CTXT_, MB_, NB_
      PARAMETER          ( CTXT_ = 2, MB_ = 5, NB_ = 6 )
      INTEGER            RSRC_, CSRC_, LLD_
      PARAMETER          ( RSRC_ = 7, CSRC_ = 8, LLD_ = 9 )
*     ..
*     .. Scalar Arguments ..
      CHARACTER          TRANS, UPLO
      INTEGER            IA, IB, IC, JA, JB, JC, K, LWORK, N
      DOUBLE PRECISION   BETA
      COMPLEX*16         ALPHA
*     ..
*     .. Array Arguments ..
      INTEGER            DESCA( * ), DESCB( * ), DESCC( * )
      COMPLEX*16         A( * ), B( * ), C( * ), WORK( * )
*     ..
*     .. Local Scalars ..
      LOGICAL            ISLOWER, ISMYCOL, ISMYROW, ISTRANSA, ISUPPER,
     $                   NOTRANSA
      INTEGER            ACOL_SIZE, AROW_SIZE, BCOL_SIZE, BROW_SIZE,
     $                   COFF, CONTXT_A, CONTXT_B, CONTXT_C, CPROC,
     $                   CSRC, IAROW, IBROW, ICDIAG, ICEND, ICFIRST,
     $                   ICROW, ICSTART, IDX, IDX_ACOL, IDX_AROW,
     $                   IDX_BCOL, IDX_BROW, IDX_C, IFREE, IIA, IIB,
     $                   IIC, INEED, INFO, IP_ACOL, IP_AROW, IP_BCOL,
     $                   IP_BROW, ISIZE, JACOL, JBCOL, JCCOL, JCDIAG,
     $                   JCFIRST, JEND, JJA, JJB, JJC, JSIZE, JSTART,
     $                   LCINDX, LDC, LDCOL, LDROW, LMM, LNN, LOFFSET,
     $                   LRINDX, LSIZE, M, MB, MB_C, MM, MYPCOL,
     $                   MYPCOL_A, MYPCOL_B, MYPCOL_C, MYPROW, MYPROW_A,
     $                   MYPROW_B, MYPROW_C, NB, NB_C, NN, NPCOL,
     $                   NPCOL_A, NPCOL_B, NPCOL_C, NPROW, NPROW_A,
     $                   NPROW_B, NPROW_C, ROFF, RPROC, RSRC
      DOUBLE PRECISION   DONE
      COMPLEX*16         ALPHA1, ALPHA2, CBETA, ONE, ZERO
*     ..
*     .. Local Arrays ..
      INTEGER            DESC1( DLEN_ ), DESCACOL( DLEN_ ),
     $                   DESCAROW( DLEN_ ), DESCBCOL( DLEN_ ),
     $                   DESCBROW( DLEN_ )
*     ..
*     .. External Functions ..
      LOGICAL            LSAME
      INTEGER            INDXFIRST, INDXG2P, NUMROC, NUMROC2
      EXTERNAL           LSAME, INDXFIRST, INDXG2P, NUMROC, NUMROC2
*     ..
*     .. External Subroutines ..
      EXTERNAL           BLACS_GRIDINFO, DESCINITT, DESCSET, INFOG2L,
     $                   PXERBLA, PZLASCALET, PZLASETT, PZTRANC, ZGEMM,
     $                   ZHER2K
*     ..
*     .. Intrinsic Functions ..
      INTRINSIC          DBLE, DCMPLX, DCONJG, MAX, MIN, MOD
*     ..
*     .. Executable Statements ..
      DONE = DBLE( 1 )
      ONE = DCMPLX( DBLE( 1 ) )
      ZERO = DCMPLX( DBLE( 0 ) )
      CBETA = BETA
      ISLOWER = LSAME( UPLO, 'L' )
      ISUPPER = LSAME( UPLO, 'U' )
      NOTRANSA = LSAME( TRANS, 'N' )
      ISTRANSA = .NOT.NOTRANSA
      M = N
      IF( ( M.LE.0 ) .OR. ( N.LE.0 ) .OR. ( K.LE.0 ) ) THEN
         INEED = 1
         WORK( 1 ) = DCMPLX( DBLE( INEED ) )
         RETURN
      ENDIF
*
* Determine storage requirement.
*
      CONTXT_C = DESCC( CTXT_ )
      CALL BLACS_GRIDINFO( CONTXT_C, NPROW_C, NPCOL_C, MYPROW_C,
     $                     MYPCOL_C )
      CONTXT_A = DESCA( CTXT_ )
      IF( CONTXT_A.EQ.CONTXT_C ) THEN
         NPROW_A = NPROW_C
         NPCOL_A = NPCOL_C
         MYPROW_A = MYPROW_C
         MYPCOL_A = MYPCOL_C
      ELSE
         CALL BLACS_GRIDINFO( CONTXT_A, NPROW_A, NPCOL_A, MYPROW_A,
     $                        MYPCOL_A )
      ENDIF
      CONTXT_B = DESCB( CTXT_ )
      IF( CONTXT_B.EQ.CONTXT_C ) THEN
         NPROW_B = NPROW_C
         NPCOL_B = NPCOL_C
         MYPROW_B = MYPROW_C
         MYPCOL_B = MYPCOL_C
      ELSE
         CALL BLACS_GRIDINFO( CONTXT_B, NPROW_B, NPCOL_B, MYPROW_B,
     $                        MYPCOL_B )
      ENDIF
      MB_C = DESCC( MB_ )
      NB_C = DESCC( NB_ )
      ROFF = MOD( MB_C+( IC-1 ), MB_C )
      COFF = MOD( NB_C+( JC-1 ), NB_C )
      MM = ( IC+N-1 ) - ( IC-ROFF ) + 1
      NN = ( JC+N-1 ) - ( JC-COFF ) + 1
      ICROW = INDXG2P( IC, MB_C, MYPROW_C, DESCC( RSRC_ ), NPROW_C )
      JCCOL = INDXG2P( JC, NB_C, MYPCOL_C, DESCC( CSRC_ ), NPCOL_C )
      IAROW = INDXG2P( IA, DESCA( MB_ ), MYPROW_A, DESCA( RSRC_ ),
     $        NPROW_A )
      JACOL = INDXG2P( JA, DESCA( NB_ ), MYPCOL_A, DESCA( CSRC_ ),
     $        NPCOL_A )
      IBROW = INDXG2P( IB, DESCB( MB_ ), MYPROW_B, DESCB( RSRC_ ),
     $        NPROW_B )
      JBCOL = INDXG2P( JB, DESCB( NB_ ), MYPCOL_B, DESCB( CSRC_ ),
     $        NPCOL_B )
      RSRC = ICROW
      CSRC = JCCOL
      LMM = NUMROC( MM, MB_C, MYPROW_C, RSRC, NPROW_C )
      LNN = NUMROC( NN, NB_C, MYPCOL_C, CSRC, NPCOL_C )
*
*  if k is one,
*  Arow is a replicated row vector
*  Acol is a replicated column vector
*
*
*   Replicated (block) row vectors.
*
      RSRC = MYPROW_C
      CSRC = JCCOL
      INFO = 0
      LDROW = MAX( 1, K )
      MB = K
      NB = NB_C
      CALL DESCSET( DESCAROW, K, NN, MB, NB, RSRC, CSRC, CONTXT_C,
     $              LDROW )
      CALL DESCSET( DESCBROW, K, NN, MB, NB, RSRC, CSRC, CONTXT_C,
     $              LDROW )
*
*   Replicated (block) column vectors
*
      RSRC = ICROW
      CSRC = MYPCOL_C
      INFO = 0
      LDCOL = MAX( 1, LMM )
      MB = MB_C
      NB = K
      CALL DESCSET( DESCACOL, MM, K, MB, NB, RSRC, CSRC, CONTXT_C,
     $              LDCOL )
      CALL DESCSET( DESCBCOL, MM, K, MB, NB, RSRC, CSRC, CONTXT_C,
     $              LDCOL )
      AROW_SIZE = K*MAX( 1, LNN )
      BROW_SIZE = K*MAX( 1, LNN )
      ACOL_SIZE = MAX( 1, LMM )*K
      BCOL_SIZE = MAX( 1, LMM )*K
      INEED = MAX( 1, AROW_SIZE+BROW_SIZE+ACOL_SIZE+BCOL_SIZE )
      IF( LWORK.LT.INEED ) THEN
         IF( LWORK.NE.-1 ) THEN
            CALL PXERBLA( CONTXT_C, 'PxSPR2KF', 20 )
         ENDIF
         WORK( 1 ) = DCMPLX( DBLE( INEED ) )
         RETURN
      ENDIF
      IFREE = 1
      IP_ACOL = IFREE
      IFREE = IFREE + ACOL_SIZE
      IP_BROW = IFREE
      IFREE = IFREE + BROW_SIZE
      IP_BCOL = IFREE
      IFREE = IFREE + BCOL_SIZE
      IP_AROW = IFREE
      IFREE = IFREE + AROW_SIZE
*
* Perform scaling of C.
*
      IF( CBETA.EQ.ZERO ) THEN
*
*     Zero out entire matrix.
*
         CALL PZLASETT( UPLO, M, N, ZERO, ZERO, C, IC, JC, DESCC )
      ELSE
         IF( CBETA.NE.ONE ) THEN
            CALL PZLASCALET( UPLO, CBETA, M, N, C, IC, JC, DESCC, INFO )
         ENDIF
      ENDIF
*
*   New PBLAS V2 capabilities.
*
      DESCAROW( RSRC_ ) = -1
      DESCBROW( RSRC_ ) = -1
      DESCACOL( CSRC_ ) = -1
      DESCBCOL( CSRC_ ) = -1
      IF( NOTRANSA ) THEN
*
*      Note sub(A) is n by k,  A( ia:ia+n-1, ja:ja+k-1)
*           sub(B) is n by k,  B( ib:ib+n-1, jb:jb+k-1)
*
         CALL PZTRANC( K, N, ONE, A, IA, JA, DESCA, ZERO,
     $                 WORK( IP_AROW ), 1, 1+COFF, DESCAROW )
         CALL PZTRANC( N, K, ONE, WORK( IP_AROW ), 1, 1+COFF, DESCAROW,
     $                 ZERO, WORK( IP_ACOL ), 1+ROFF, 1, DESCACOL )
         CALL PZTRANC( K, N, ONE, B, IB, JB, DESCB, ZERO,
     $                 WORK( IP_BROW ), 1, 1+COFF, DESCBROW )
         CALL PZTRANC( N, K, ONE, WORK( IP_BROW ), 1, 1+COFF, DESCBROW,
     $                 ZERO, WORK( IP_BCOL ), 1+ROFF, 1, DESCBCOL )
      ELSE
*
*      Note sub(A) is k by n,  A( ia:ia+k-1, ja:ja+n-1)
*           sub(B) is k by n,  B( ib:ib+k-1, jb:jb+n-1)
*
         CALL PZTRANC( N, K, ONE, A, IA, JA, DESCA, ZERO,
     $                 WORK( IP_ACOL ), 1+ROFF, 1, DESCACOL )
         CALL PZTRANC( K, N, ONE, WORK( IP_ACOL ), 1+ROFF, 1, DESCACOL,
     $                 ZERO, WORK( IP_AROW ), 1, 1+COFF, DESCAROW )
         CALL PZTRANC( N, K, ONE, B, IB, JB, DESCB, ZERO,
     $                 WORK( IP_BCOL ), 1+ROFF, 1, DESCBCOL )
         CALL PZTRANC( K, N, ONE, WORK( IP_BCOL ), 1+ROFF, 1, DESCBCOL,
     $                 ZERO, WORK( IP_BROW ), 1, 1+COFF, DESCBROW )
      ENDIF
*
*  Data is in place, no more communications required.
*
      MYPROW = MYPROW_C
      MYPCOL = MYPCOL_C
      NPROW = NPROW_C
      NPCOL = NPCOL_C
      RSRC = DESCC( RSRC_ )
      CSRC = DESCC( CSRC_ )
      NB = DESCC( NB_ )
      MB = DESCC( MB_ )
*
*    Main loop.
*
      JSTART = JC
   10 CONTINUE
      IF( JSTART.LE.JC+N-1 ) THEN
         JEND = JSTART - MOD( ( JSTART-1 )+NB, NB ) + ( NB-1 )
         JEND = MIN( JEND, JC+N-1 )
         JSIZE = JEND - JSTART + 1
         ISMYCOL = ( MYPCOL.EQ.INDXG2P( JSTART, NB, MYPCOL, CSRC,
     $             NPCOL ) )
         IF( ISMYCOL ) THEN
*
*          Handle diagonal block.
*
            ICDIAG = IC + ( JSTART-JC )
            JCDIAG = JC + ( JSTART-JC )
            ISMYROW = ( MYPROW.EQ.INDXG2P( ICDIAG, MB, MYPROW, RSRC,
     $                NPROW ) )
            IF( ISMYROW ) THEN
               CALL DESCINITT( UPLO, ICDIAG, JCDIAG, DESCC, IIC, JJC,
     $                         LOFFSET, DESC1 )
               LDC = DESC1( LLD_ )
               CALL INFOG2L( IIC, JJC, DESC1, NPROW, NPCOL, MYPROW,
     $                       MYPCOL, LRINDX, LCINDX, RPROC, CPROC )
               IDX_C = ( LOFFSET-1 ) + LRINDX +
     $                 ( LCINDX-1 )*DESC1( LLD_ )
               IIA = ( 1+ROFF ) + ( ICDIAG-IC )
               IDX_ACOL = ( IP_ACOL-1 ) + NUMROC( IIA, DESCACOL( MB_ ),
     $                    MYPROW, DESCACOL( RSRC_ ), NPROW )
               IIB = ( 1+ROFF ) + ( ICDIAG-IC )
               IDX_BCOL = ( IP_BCOL-1 ) + NUMROC( IIB, DESCBCOL( MB_ ),
     $                    MYPROW, DESCBCOL( RSRC_ ), NPROW )
               CALL ZHER2K( UPLO, 'No', JSIZE, K, ALPHA,
     $                      WORK( IDX_ACOL ), DESCACOL( LLD_ ),
     $                      WORK( IDX_BCOL ), DESCACOL( LLD_ ), DONE,
     $                      C( IDX_C ), LDC )
            ENDIF
*
*            Handle off-diagonal block.
*
            IF( ISLOWER ) THEN
               ICSTART = ICDIAG + JSIZE
               ICEND = IC + M - 1
            ELSE
               IF( ISUPPER ) THEN
                  ICSTART = IC
                  ICEND = ICDIAG - 1
               ENDIF
            ENDIF
            ISIZE = ICEND - ICSTART + 1
            LSIZE = 0
            IF( ISIZE.GE.1 ) THEN
               LSIZE = NUMROC2( ISIZE, ICSTART, MB, MYPROW, RSRC,
     $                 NPROW )
            ENDIF
            IF( LSIZE.GE.1 ) THEN
               ICFIRST = INDXFIRST( ISIZE, ICSTART, MB, MYPROW, RSRC,
     $                   NPROW )
               CALL DESCINITT( UPLO, ICFIRST, JSTART, DESCC, IIC, JJC,
     $                         LOFFSET, DESC1 )
               LDC = DESC1( LLD_ )
               CALL INFOG2L( IIC, JJC, DESC1, NPROW, NPCOL, MYPROW,
     $                       MYPCOL, LRINDX, LCINDX, RPROC, CPROC )
               IDX_C = ( LOFFSET-1 ) + LRINDX +
     $                 ( LCINDX-1 )*DESC1( LLD_ )
               IIA = ( 1+ROFF ) + ( ICFIRST-IC )
               IDX_ACOL = ( IP_ACOL-1 ) + NUMROC( IIA, DESCACOL( MB_ ),
     $                    MYPROW, DESCACOL( RSRC_ ), NPROW )
               IIB = ( 1+ROFF ) + ( ICFIRST-IC )
               IDX_BCOL = ( IP_BCOL-1 ) + NUMROC( IIB, DESCBCOL( MB_ ),
     $                    MYPROW, DESCBCOL( RSRC_ ), NPROW )
               JCFIRST = JSTART
               JJA = ( 1+COFF ) + ( JCFIRST-JC )
               IDX = NUMROC( JJA, DESCAROW( NB_ ), MYPCOL,
     $               DESCAROW( CSRC_ ), NPCOL )
               IDX_AROW = ( IP_AROW-1 ) + ( IDX-1 )*DESCAROW( LLD_ ) + 1
               JJB = ( 1+COFF ) + ( JCFIRST-JC )
               IDX = NUMROC( JJB, DESCBROW( NB_ ), MYPCOL,
     $               DESCBROW( CSRC_ ), NPCOL )
               IDX_BROW = ( IP_BROW-1 ) + ( IDX-1 )*DESCBROW( LLD_ ) + 1
               ALPHA1 = ALPHA
               ALPHA2 = DCONJG( ALPHA )
               CALL ZGEMM( 'No', 'No', LSIZE, JSIZE, K, ALPHA1,
     $                     WORK( IDX_ACOL ), DESCACOL( LLD_ ),
     $                     WORK( IDX_BROW ), DESCBROW( LLD_ ), ONE,
     $                     C( IDX_C ), LDC )
               CALL ZGEMM( 'No', 'No', LSIZE, JSIZE, K, ALPHA2,
     $                     WORK( IDX_BCOL ), DESCBCOL( LLD_ ),
     $                     WORK( IDX_AROW ), DESCAROW( LLD_ ), ONE,
     $                     C( IDX_C ), LDC )
            ENDIF
         ENDIF
* end if (ismycol)
         JSTART = JEND + 1
         GOTO 10
      ENDIF
   20 CONTINUE
* end while
      WORK( 1 ) = DCMPLX( DBLE( INEED ) )
      RETURN
      END
