
      SUBROUTINE PDSGNDFL( WANTT, WANTZ, N, N2, K, A, IA, JA, ILO,
     $                     DESCA, S, IS, JS, DESCS, Z, IZ, JZ,
     $                     DESCZ, RHP, E21NORM, WORK, LWORK, IWORK,
     $                     INFO )
      implicit none
*     ..
*     .. Scalar Arguments ..
      INTEGER            N, N2, K, IA, JA, IS, JS, IZ, JZ, ILO,
     $                   LWORK, INFO
      LOGICAL            WANTT, WANTZ, RHP
      DOUBLE PRECISION   E21NORM
*     ..
*     .. Array Arguments ..
      INTEGER            DESCA( * ), DESCS( * ), DESCZ( * ), 
     $                   IWORK( * )
      DOUBLE PRECISION   A( * ), S( * ), Z( * ), WORK( LWORK )
*
*
* Purpose
* =======
*
*  Given matrices A and sign(A), PDSGNDFL computes an orthogonal 
*  transformation matrix Q, such that :
*                      k  n-k
*         Q^T*A*Q = [ A11 A12 ] k
*                   [ E21 A22 ] n-k
*
*  where the eigenvalues of A11 are in the selected half-plane
*  (right or left as specified by HALFPLANE), and the 
*  eigenvalues of A22 are in the unselected halfplane.
*
*  In exact arithmetic, E21 = 0. We choose to use the norm of
*  E21 (relative to the norm of our matrix A) as a posterori bound
*  for the errors incurred.
*
*  sign(A) can be computed form PDGESGN.
*
*  The matrix to be deflated - sub( A ) - is 
*  A(IA+ILO-1:IA+ILO+N2-2, JA+ILO-1:JA+ILO-N2-2);
*  the original/root matrix that is being block triangularized by 
*  repeated calls to this routine is A(IA:IA+N-1, JA:JA+N-1).
*  This routine updates the root matrix with the orthogonal matrix from the
*  deflation of sub( A ).
*
*  That is, if we have already deflated the root matrix twice, it may resemble:
*       A_2 = [ A11 A12 A13 ]
*             [  0  A22 A23 ]
*             [  0   0  A33 ]
*  along with the accumulation of orthogonal transformations applied so far:
*       Z_2 = [ Z11 Z12 Z13 ]
*  If the task is to block triangularize A22, then we have
*       Q^T*A22*Q = T = 2x2 block triangular form.
*  We accumulate the orthogonal transformation:
*       Z_3 = [ Z11 Z12*Q Z13 ]
*  and update the off-diagonal blocks of the root matrix:
*       A_3 = [ A11 A12*Q   A13   ]
*             [  0   T    Q^T*A23 ]
*             [  0   0      A33   ]
*  so that the Schur decomposition relationship (Z_3^T * A_3 * Z_3)
*  still holds.
*
* Notes
* =====
*
*  A description vector is associated with each 2D block-cyclicly dis-
*  tributed matrix.  This vector stores the information required to
*  establish the mapping between a matrix entry and its corresponding
*  process and memory location.
*
*  In the following comments, the character _ should be read as
*  "of the distributed matrix".  Let A be a generic term for any 2D
*  block cyclicly distributed matrix.  Its description vector is DESCA:
*
*  NOTATION        STORED IN      EXPLANATION
*  --------------- -------------- --------------------------------------
*  DT_A   (global) DESCA( DT_ )   The descriptor type.
*  CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
*                                 the BLACS process grid A is distribu-
*                                 ted over. The context itself is glo-
*                                 bal, but the handle (the integer
*                                 value) may vary.
*  M_A    (global) DESCA( M_ )    The number of rows in the distributed
*                                 matrix A.
*  N_A    (global) DESCA( N_ )    The number of columns in the distri-
*                                 buted matrix A.
*  MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute
*                                 the rest M_A-IMB_A rows of A.
*  NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute
*                                 the rest N_A-INB_A columns of A.
*  RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
*                                 row of the matrix A is distributed.
*  CSRC_A (global) DESCA( CSRC_ ) The process column over which the
*                                 first column of A is distributed.
*  LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local
*                                 array storing the local blocks of the
*                                 distributed matrix A.
*                                 LLD_A >= MAX(1,LOCp(M_A)).
*
*  Let K be the number of rows or columns of a distributed matrix,
*  and assume that its process grid has dimension p x q.
*  LOCp( K ) denotes the number of elements of K that a process
*  would receive if K were distributed over the p processes of its
*  process column.
*  Similarly, LOCq( K ) denotes the number of elements of K that a
*  process would receive if K were distributed over the q processes of
*  its process row.
*  The values of LOCp() and LOCq() may be determined via a call to the
*  ScaLAPACK tool function, NUMROC:
*          LOCp( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
*          LOCq( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
*
*  This routine requires square block decomposition ( MB_A = NB_A ).
*
*  Arguments
*  =========
*
*  WANTT   (global input) LOGICAL
*          = .TRUE. : the full Schur form T is required;
*          = .FALSE.: only eigenvalues are required.
*
*  WANTZ   (global input) LOGICAL
*          = .TRUE. : the matrix of Schur vectors Z is required;
*          = .FALSE.: Schur vectors are not required.
*
*  N       (global input) INTEGER
*          The number of rows and columns of the matrix A.
*
*  N2      (global input) INTEGER
*          The number of rows and columns of the matrix sub( A ).
*
*  K       (global output) INTEGER
*          On exit, K is the number of eigenvalues of sub( A ) in the
*          selected half-plane at x = X (see variable RHP).
*
*  A       (local input/output) DOUBLE PRECISION
*          An array of dimension (LLD_A, LOCq(JA+DESCA(N_)-1)) containing the
*          local pieces of the distributed matrix A.
*          On entry, A(IA:IA+N-1, JA:JA+N-1) is the original/'root' matrix 
*          whose spectrum is to be split, and
*          A(IA+ILO-1:IA+ILO+N-2, JA+ILO-1:JA+ILO+N-2) contains sub( A ),
*          the current diagonal sub-block to be deflated.
*          On exit, sub( A ) contains the deflated matrix, where the leading
*          principal submatrix A(IA+ILO-1:IA+ILO+K-2, JA+ILO-1:JA+ILO+K-2)
*          contains all K eigenvalues of the original matrix sub( A )
*          in the open half plane (right or left - as indicated by RHP)
*          at x = X. A(IA+ILO+K-1:IA+N, JA+ILO+K-1:JA+N) contains the
*          remaining eigenvalues.
*
*  IA      (global input) INTEGER
*          A's global column index. Points to the beginning of
*          the root submatrix.
*          That is, if PDSGNDFL is called repeatedly on sub-blocks to
*          upper triangularize an input matrix, then IA must point to the
*          beginning of the original/root matrix.
*
*  JA      (global input) INTEGER
*          A's global column index. Points to the beginning of
*          the root submatrix.
*          That is, if PDSGNDFL is called repeatedly on sub-blocks to
*          upper triangularize an input matrix, then JA must point to the
*          beginning of the original/root matrix.
*
*  ILO     (global input) INTEGER
*          Sub-index of A. IA+ILO-1, JA+ILO-1 are the starting row and
*          column of sub( A ) - the N2 x N2 sub matrix of A to be deflated.
*
*  DESCA   (global and local input) INTEGER array of dimension DLEN_.
*          The array descriptor for the distributed matrix A.
*
*  S       (local input/workspace) DOUBLE PRECISION
*          An array of dimension (LLD_S, LOCq(JS+N-1)) containing the
*          local pieces of the distributed matrix sign( sub(A) ) computed 
*          using PDGESGN.
*          S is unspecified on exit.
*
*  IS      (global input) INTEGER
*          S's global column index.
*
*  JS      (global input) INTEGER
*          S's global column index.
*
*  DESCS   (global and local input) INTEGER array of dimension DLEN_.
*          The array descriptor for the distributed matrix S.
*
*  Z       (local input/output) DOUBLE PRECISION
*          An array of dimension (LLD_Z, LOCq(JA+DESCZ(N_)-1))
*          If WANTZ is .FALSE. Z is not referenced.
*          If WANTZ is .TRUE., on entrance Z(IZ:IZ+N,JZ:JZ+N) must be
*          either
*          (1) the accumulated orthogonal transformations from previous
*              calls to PDSGNDFL; or
*          (2) the identity.
*          On exit Z will have been multiplied by the orthogonal matrix
*          from the deflation process.
*
*  IZ      (global input) INTEGER
*          Z's global row index.
*
*  JZ      (global input) INTEGER
*          Z's global column index.
*
*  DESCZ   (global and local input) INTEGER array of dimension DLEN_.
*          The array descriptor for the distributed matrix Z.
*
*  RHP     (global input) LOGICAL
*          Indicates which halfplane to map to the upper block of
*          sub( A ) after deflation.
*          = .TRUE. : Put the eigenvalues in the right halfplane at X in the
*                     upper block of the matrix. On exit sub(A)(1:K, 1:K)
*                     contains the eigenvalues in the RHP,
*                     sub(A)(K+1:N2, K+1:N2) contains those in the LHP.
*          = .FALSE.: Put the eigenvalues in the left halfplane at X in the
*                     upper block of the matrix. On exit sub(A)(1:K, 1:K)
*                     contains the eigenvalues in the LHP,
*                     sub(A)(K+1:N2, K+1:N2) contains those in the RHP.
*
*  E21NORM (global output) DOUBLE PRECISION
*          A posteriori measure of error in the decomposition process.
*          On exit, E21NORM = largest entry in the E21 block (see the
*          comments at the top).
*
*  WORK    (local workspace) DOUBLE PRECISION array, dimension (LWORK)
*
*  LWORK   (local input) INTEGER
*          The dimension of the array WORK.  
*          LWORK >= MAX( MAX(3,Mp0 + Nq0) + LOCq(JA+N-1)+Nq0 + 
*                            LOCq(JA+MIN(M,N)-1),
*                        MAX( (NB_A*(NB_A-1))/2, ( NqC0 + MAX( NpA0 +
*                              NUMROC( NUMROC( N+ICOFFC, NB_A, 0, 0, NPCOL ),
*                             NB_A, 0, 0, LCMQ ), MpC0 ) )*NB_A ) +
*                         NB_A * NB_A ) + Mp0 * Nq0
*
*          where:
*          IROFF = MOD( IA-1, MB_A ), ICOFF = MOD( JA-1, NB_A ),
*          IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ),
*          IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ),
*          Mp0   = NUMROC( M+IROFF, MB_A, MYROW, IAROW, NPROW ),
*          Nq0   = NUMROC( N+ICOFF, NB_A, MYCOL, IACOL, NPCOL ),
*          LOCq(JA+N-1) = NUMROC( JA+N-1, NB_A, MYCOL, CSRC_A, NPCOL )
*          LCMQ = LCM / NPCOL with LCM = ICLM( NPROW, NPCOL )
*
*          and NUMROC, INDXG2P are ScaLAPACK tool functions;
*          MYROW, MYCOL, NPROW and NPCOL can be determined by calling
*          the subroutine BLACS_GRIDINFO.
*
*  IWORK    (local workspace) INTEGER array, dimension LOCq(JA+N-1)+NB_A.
*
*  INFO    (output) INTEGER
*          = 0:  successful exit
*          < 0: if INFO = -k, the k-th argument had an illegal value
*          = 1: Error in QRP factorization (PDGEQPF0)
*          = 2: Error in PDORMQR
*
*
*  =====================================================================
*
*  Further Details
*  ===============
*
*     Floating operation count:
*     Factor routine costs (4/3)*N3, get Q is 4/3 *N3, 2 MMs cost 4N3
*     Total cost = (20/3)*N3
*
*  =====================================================================
*
*  Working Note:
*  ============
*
*  This routine is still in the prototype stage and much of the debugging
*  code has been left in. We have tried to isolate these blocks of code
*  and identify them wherever possible.
*
*  =====================================================================
*
*     .. Parameters ..
      INTEGER            CSRC_, CTXT_, DLEN_, DT_,
     $                   LLD_, MB_, M_, NB_, N_, RSRC_
      PARAMETER          ( DLEN_ = 9, DT_ = 1,
     $                     CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6,
     $                     RSRC_ = 7, CSRC_ = 8, LLD_ = 9 )
      DOUBLE PRECISION   ZERO, ONE
      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
*     ..
*     .. Local Scalars ..
      INTEGER            NPROW, NPCOL, MYROW, MYCOL, INFO2,
     $                   IPB, IPTAU, IPW, ICTXT, MP, NQ,
     $                   MB, NB, IAROW, IZ2, JZ2, IA2, JA2,
     $                   IB, JB, ISROW, ISCOL, IROFFS, ICOFFS, LWMIN,
     $                   LWMIN1, LWMIN2, LWMIN3, LCMQ, NQ0,
     $                   IROFFA, NPA0, LCM
      DOUBLE PRECISION   TRACE, ANORM
*     ..
*     .. Local arrays ..
      INTEGER            DESCB( DLEN_ )
*     ..
*     .. External Subroutines ..
      EXTERNAL           BLACS_GRIDINFO, CHK1MAT, PDGEQPF,
     $                   PDAJDIAG, PDORMQR, DESCSET
*     ..
*     .. External Functions ..
      LOGICAL            LSAME
      INTEGER            ICEIL, INDXG2P, NUMROC, ILCM
      DOUBLE PRECISION   PDLANGE, PDLATRA
      EXTERNAL           ICEIL, INDXG2P, NUMROC, LSAME,
     $                   PDLANGE, PDLATRA, ILCM
*     ..
*     .. Intrinsic Functions ..
      INTRINSIC          ABS, ANINT, MAX
***
***   Debugging variables/functions/subroutines
***
      double precision   dummy
      logical            debug, printit
      external           print_desc
*     ..
*     .. Executable Statements ..
*
*
*     Get grid parameters
*
      ICTXT = DESCA( CTXT_ )
      MB = DESCA( MB_ )
      NB = DESCA( NB_ )
      CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL )
***
***   Debugging - intialization
***
      debug = .false.
      printit = (debug .and. myrow.eq.0 .and. mycol.eq.0)
*
*     Test input
*
      INFO = 0
      IF ( N .LT. 0 ) THEN
         INFO = -3
      ELSE IF ( N2 .LT. 0 .OR. N2 .GT. N ) THEN
         INFO = -4
      ELSE IF ( IA .LE. 0 ) THEN
         INFO = -7
      ELSE IF ( JA .LE. 0) THEN
         INFO = -8
      ELSE IF ( ILO .LE. 0 ) THEN
         INFO = -9
      ELSE IF ( IS .LE. 0 ) THEN
         INFO = -11
      ELSE IF ( JS .LE. 0) THEN
         INFO = -13
      ELSE IF ( IZ .LE. 0 ) THEN
         INFO = -16
      ELSE IF ( JZ .LE. 0 ) THEN
         INFO = -17
      ELSE
*
*        Compute workspace requirements
*
         IROFFS = MOD( IS-1, DESCS( MB_ ) )
         ICOFFS = MOD( JS-1, DESCS( NB_ ) )
         ISROW = INDXG2P( IS, DESCS( MB_ ), MYROW,
     $                    DESCS( RSRC_ ), NPROW )
         ISCOL = INDXG2P( JS, DESCS( NB_ ), MYCOL,
     $                    DESCS( CSRC_ ), NPCOL )
         MP = NUMROC( N2+IROFFS, DESCS( MB_ ), MYROW, ISROW, NPROW )
         NQ = NUMROC( N2+ICOFFS, DESCS( NB_ ), MYCOL, ISCOL, NPCOL )
         NQ0 = NUMROC( IROFFS+N2, DESCS( NB_ ), MYCOL, DESCS( CSRC_ ),
     $                  NPCOL )
*
*        For PDGEQPF0
*
         LWMIN1 = MAX( 3, MP + NQ ) + NQ0 + NQ
*
*        For PDORMQR on N2 x N2 matrix
*
         IA2 = IA + ILO - 1
         JA2 = JA + ILO - 1

         IROFFA = MOD( IA2-1, DESCA( MB_ ) )
         IAROW = INDXG2P( IA2, DESCA( MB_ ), MYROW, DESCA( RSRC_ ),
     $                    NPROW )
         LWMIN2 = MAX( ( DESCA( NB_ ) * ( DESCA( NB_ ) - 1 ) ) / 2,
     $                ( MP + NQ ) * DESCA( NB_ ) ) +
     $              DESCA( NB_ ) * DESCA( NB_ ) +
     $      NUMROC( ICOFFS+N2, DESCS( NB_ ), MYCOL, ISCOL, NPCOL )
         NPA0 = NUMROC( N2+IROFFA, DESCA( MB_ ), MYROW, IAROW,
     $                  NPROW )
         LCM = ILCM( NPROW, NPCOL )
         LCMQ = LCM / NPCOL
         LWMIN3 =  MAX( ( DESCA( NB_ ) * ( DESCA( NB_ ) - 1 ) )
     $                  / 2, ( NQ + MAX( NPA0 + NUMROC( NUMROC(
     $                  N2+ICOFFS, DESCA( NB_ ), 0, 0, NPCOL ),
     $                  DESCA( NB_ ), 0, 0, LCMQ ), MP ) ) *
     $                  DESCA( NB_ ) ) + DESCA( NB_ ) * DESCA( NB_ ) +
     $      NUMROC( ICOFFS+N2, DESCS( NB_ ), MYCOL, ISCOL, NPCOL )

         LWMIN = MP * NQ + MAX( LWMIN1, MAX( LWMIN2, LWMIN3 ) )

         IF ( WANTT .OR. WANTZ ) THEN
*
*           For PDORMQR on N x N2 matrix
*
            WORK( 1 ) = ZERO
*	print *, 'checking pdormqr {'
            CALL PDORMQR( 'RIGHT', 'NO TRANS', N, N2, N2,
     $                    WORK, IA, JA, DESCA, WORK,
     $                    Z, IZ, JZ, DESCZ, WORK, -1, INFO2 )
*	print *, '} done'
            LWMIN = LWMIN + ANINT( WORK( 1 ) )
         END IF

         IF ( LWORK .EQ. -1 ) THEN
            if (printit) print *, 'PDSGNDFL: Min Work = ', lwmin
            GOTO 500
         ELSE IF ( LWORK .LT. LWMIN ) THEN
            if (printit) print *, 'PDSGNDFL: Min Work, LWORK',
     $                    lwmin, lwork
            INFO = -15
         END IF
      END IF

      IF ( INFO .NE. 0 ) THEN
         CALL PXERBLA( ICTXT, 'PDSGNDFL', -INFO )
         GOTO 500
      END IF

      K = 0
      E21NORM = ZERO
*
*     Quick return if possible
*
      IF ( N2 .EQ. 0 )
     $   GOTO 500
*
*     Compute the norm of sub( A ); need it for posteori error
*
      ANORM = PDLANGE( 'M', N2, N2, A, IA2, JA2, DESCA, WORK )
*
*     Carve up workspace - create an N2 x N2 matrix
*
      IROFFS = MOD( IS-1, DESCS( MB_ ) )
      ICOFFS = MOD( JS-1, DESCS( NB_ ) )
      IB = IROFFS + 1
      JB = ICOFFS + 1
      ISROW = INDXG2P( IS, DESCS( MB_ ), MYROW, 
     $                 DESCS( RSRC_ ), NPROW )
      ISCOL = INDXG2P( JS, DESCS( NB_ ), MYCOL, 
     $                 DESCS( CSRC_ ), NPCOL )
      MP = NUMROC( N2 + IROFFS, DESCS( MB_ ), MYROW, ISROW, NPROW )
      NQ = NUMROC( N2 + ICOFFS, DESCS( NB_ ), MYCOL, ISCOL, NPCOL )
      IPB = 1
      IPW = IPB + MP * NQ
      CALL DESCSET( DESCB, N2 + IROFFS, N2 + ICOFFS, DESCS( MB_ ), 
     $              DESCS( NB_ ), ISROW, ISCOL, ICTXT, MAX( 1, MP ) )
*
*     S := S + I or S := S - I, depending on which halfplane.
*
      CALL PDLACPY( 'ALL', N2, N2, S, IS, JS, DESCS, 
     $              WORK( IPB ), IB, JB, DESCB )
      IF ( RHP ) THEN
         CALL PDAJDIAG( 'ADD', N2, WORK( IPB ), IB, JB, DESCB, ONE )
      ELSE
         CALL PDAJDIAG( 'ADD', N2, WORK( IPB ), IB, JB, DESCB, -ONE )
      END IF
*
*     Compute the trace to tell us where the matrix splits.
*     K := The number of eigenvalues in the desired halfplane.
*
      TRACE = PDLATRA( N2, WORK( IPB ), IB, JB, DESCB )
      K = ANINT( ABS( TRACE ) ) / 2
      if (printit) print *, 'K = ', K, ', (N2 = ', N2, ')'
*
*     Quick return if none or all eigenvalues in desired half plane
*
      IF ( K .EQ. 0 .OR. K .EQ. N2 ) THEN
         if (printit) print *, 'None or All eigenvalues are in ' //
     $                         'the specified region'
         GOTO 500
      END IF
*
*     Similarity transformation to deflate an invariant subspace:
*              A := Q'*A*Q       where Sign(A)*P = QR
*     Compute the QRP factorization of the sign matrix
*
      IPTAU = IPW
      IPW = IPTAU + NUMROC( JB+N2-1, NB, MYCOL, DESCB( CSRC_ ), NPCOL )

      CALL PDGEQPF0( N2, N2, WORK( IPB ), IB, JB, DESCB, IWORK,
     $              WORK( IPTAU ), WORK( IPW ), 
     $              LWORK - IPW, INFO2 )
      IF ( INFO2 .NE. 0 ) THEN
         if (printit) print *, 'Info of PDGEQPF0 = ', info2
         INFO = 1
         GOTO 500
      END IF
***
***   Debugging - check the accuracy of Q
***
      if ( debug ) then
         CALL PDLASET( 'ALL', N2, N2, ZERO, ONE, S, IS, JS, DESCS )
         CALL PDORMQR( 'LEFT', 'TRANS', N2, N2, N2, WORK( IPB ), 
     $              IB, JB, DESCB, WORK( IPTAU ), S, IS, JS, 
     $              DESCS, WORK( IPW ), LWORK - IPW, INFO2 )
         if (info2 .ne. 0 .and. printit)
     $      print *, '(Debug1) Info of PDORMQR = ', info2
         CALL PDORMQR( 'RIGHT', 'NO TRANS', N2, N2, N2,
     $                 WORK( IPB ), IB, JB, DESCB, WORK( IPTAU ), 
     $                 S, IS, JS, DESCS, WORK( IPW ), 
     $                 LWORK - IPW, INFO2 )
         if (info2 .ne. 0 .and. printit)
     $      print *, '(Debug2) Info of PDORMQR = ', info2
         dummy = PDLANGE( '1', N2, N2, S, IS, JS, DESCS, WORK(IPW))
         if (printit .and. abs(dummy-one) .gt. 1.0D-10) 
     $      print *, 'Poor accuracy of orthogonal matrix: ', dummy
      end if
*
*     sub( A ) := Q^t * sub( A ) * Q
*
      CALL PDORMQR( 'LEFT', 'TRANS', N2, N2, N2, WORK( IPB ), 
     $              IB, JB, DESCB, WORK( IPTAU ), A, IA2, JA2, 
     $              DESCA, WORK( IPW ), LWORK - IPW, INFO2 )
      IF ( INFO2 .NE. 0 ) THEN
         if (printit) print *, 'Failed in PDORMQR Q^t * A'
         INFO = 2
         GOTO 500
      END IF
      CALL PDORMQR( 'RIGHT', 'NO TRANS', N2, N2, N2, WORK( IPB ), 
     $              IB, JB, DESCB, WORK( IPTAU ), A, IA2, JA2, 
     $              DESCA, WORK( IPW ), LWORK - IPW, INFO2 )
      IF ( INFO2 .NE. 0 ) THEN
         if (printit) print *, 'Failed in PDORMQR A * Q'
         INFO = 2
         GOTO 500
      END IF

      IF ( WANTZ ) THEN
*
*        Accumulate orthogonal transformations.
*        Must compute the new orthogonal matrix from the previous
*        Z and the Schur vectors of the (N2 x N2) system just computed.
*        Only update the (IA:IA+N-1, IA+ILO:IA+ILO+N2-1) block of Z.
*
         IZ2 = IZ
         JZ2 = JZ + ILO - 1
         CALL PDORMQR( 'RIGHT', 'NO TRANS', N, N2, N2,
     $                 WORK( IPB ), IB, JB, DESCB, WORK( IPTAU ), 
     $                 Z, IZ2, JZ2, DESCZ, WORK( IPW ), 
     $                 LWORK - IPW, INFO2 )
         IF ( INFO2 .NE. 0 ) THEN
            if (printit) print *, 'Failed in PDORMQR Z * Q'
            INFO = 2
            GOTO 500
         END IF
      END IF

      IF ( WANTT ) THEN
*
*     Update the off-diagonal blocks.
*
         IF ( ILO .NE. 1 ) THEN
*
*           A12 = A12 * Q
*
            CALL PDORMQR( 'RIGHT', 'NO TRANS', ILO-1, N2, N2,
     $                    WORK( IPB ), IB, JB, DESCB, WORK( IPTAU ), 
     $                    A, IA, JA2, DESCA, WORK( IPW ), 
     $                    LWORK - IPW, INFO2 )
            IF ( INFO2 .NE. 0 ) THEN
               if (printit) print *, 'Failed in PDORMQR A12 * Q'
               INFO = 2
               GOTO 500
            END IF

         END IF

         IF ( ( ILO + N2 - 1 ) .NE. N ) THEN
*
*           A23 = Q^t * A23
*
            CALL PDORMQR( 'LEFT', 'TRANS', N2, N-(ILO+N2-1), N2,
     $                    WORK( IPB ), IB, JB, DESCB, WORK( IPTAU ), 
     $                    A, IA2, JA2+N2, DESCA, WORK( IPW ), 
     $                    LWORK - IPW, INFO2 )
            IF ( INFO2 .NE. 0 ) THEN
               if (printit) print *, 'Failed in PDORMQR Q^t * A23'
               INFO = 2
               GOTO 500
            END IF
         END IF
      END IF
*
*     Check the accuracy/stability of the procedure. Things are good
*     if || E21 || is small.
*
      E21NORM = PDLANGE( 'MAX', N2-K, K, A, IA2+K, JA2,
     $                   DESCA, WORK( IPW ) ) / ANORM
      if (printit) print *, '|| E21 || = ', E21NORM
      if (e21norm .GT. 1.0D-6 .and. printit)
     $   print *, '|| E21 || is large!'

 500  WORK( 1 ) = LWMIN
*
*     End of PDSGNDFL
*
      RETURN
      END
