
      SUBROUTINE PDGESGN( N, A, IA, JA, DESCA, MAXITER,
     $                    SCALING, NITER, WORK, LWORK,
     $                    IWORK, LIWORK, INFO )
*     ..
*     .. Scalar Arguments ..
      INTEGER            N, IA, JA, MAXITER, NITER, LWORK,
     $                   LIWORK, INFO
      CHARACTER          SCALING
*     ..
*     .. Array Arguments ..
      INTEGER            DESCA( * ), IWORK( * )
      DOUBLE PRECISION   A( * ), WORK( * )

*
*  Purpose
*  =======
*
*
*  PDGESGN computes sign(A) using the Newton Iteration:
*
*       A{i+1} :=  ALPHA * A{i}  +  BETA * A{i}^-1
*
*  where ALPHA and BETA are appropriate constants which depend on the
*  scaling scheme used.
*
*  Notes
*  =====
*
*  A description vector is associated with each 2D block-cyclicly dis-
*  tributed matrix.  This vector stores the information required to
*  establish the mapping between a matrix entry and its corresponding
*  process and memory location.
*
*  In the following comments, the character _ should be read as
*  "of the distributed matrix".  Let A be a generic term for any 2D
*  block cyclicly distributed matrix.  Its description vector is DESCA:
*
*  NOTATION        STORED IN      EXPLANATION
*  --------------- -------------- --------------------------------------
*  DT_A   (global) DESCA( DT_ )   The descriptor type.
*  CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
*                                 the BLACS process grid A is distribu-
*                                 ted over. The context itself is glo-
*                                 bal, but the handle (the integer
*                                 value) may vary.
*  M_A    (global) DESCA( M_ )    The number of rows in the distributed
*                                 matrix A.
*  N_A    (global) DESCA( N_ )    The number of columns in the distri-
*                                 buted matrix A.
*  MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute
*                                 the rest M_A-IMB_A rows of A.
*  NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute
*                                 the rest N_A-INB_A columns of A.
*  RSRC_A (global) DESCA( RSRC_ ) The process row over which the first

*                                 row of the matrix A is distributed.
*  CSRC_A (global) DESCA( CSRC_ ) The process column over which the
*                                 first column of A is distributed.
*  LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local
*                                 array storing the local blocks of the
*                                 distributed matrix A.
*                                 LLD_A >= MAX(1,LOCp(M_A)).
*
*  Let K be the number of rows or columns of a distributed matrix,
*  and assume that its process grid has dimension p x q.
*  LOCp( K ) denotes the number of elements of K that a process
*  would receive if K were distributed over the p processes of its
*  process column.
*  Similarly, LOCq( K ) denotes the number of elements of K that a
*  process would receive if K were distributed over the q processes of
*  its process row.
*  The values of LOCp() and LOCq() may be determined via a call to the
*  ScaLAPACK tool function, NUMROC:
*          LOCp( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
*          LOCq( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
*
*
*  Arguments
*  =========
*
*
*  N       (global input) INTEGER
*          The number of rows and columns of the matrix A.
*
*  A       (local input/output) DOUBLE PRECISION
*          An array of dimension (LLD_A, LOCq(JA+N-1)) containing the
*          local pieces of the distributed matrix A.
*
*  IA      (global input) INTEGER
*          A's global column index.
*
*  JA      (global input) INTEGER
*          A's global column index.
*
*  DESCA   (global and local input) INTEGER array of dimension DLEN_.
*          The array descriptor for the distributed matrix A.
*
*  MAXITER (global input) INTEGER
*          The maximam number of iterations to take when computing sign(A).
*
*  SCALING (global input) CHARACTER
*          Indicates the scaling scheme to use.
*          'N' ==> No scaling
*          'R' ==> Robert's scaling
*
*  NITER   (global output) INTEGER
*          On exit, NITER is number of iterations taken in computing sign(A).
*
*  WORK    (workspace) DOUBLE PRECISION array, dimension (LWORK)
*          If INFO returns 0, then WORK(1) returns the minimum
*          value of LWORK required.
*
*  LWORK   (input) INTEGER
*          The dimension of the array WORK. 
*          LWORK >= LOCp(N+MOD(IA-1,MB_A))*NB_A
*                   + LOCp(N+MOD(IA-1,MB_A))*LOCq(N+MOD(JA-1,MB_A))
*
*  IWORK   (local workspace) INTEGER array, dimension (LIWORK)
*          On exit, if INFO = 0, IWORK(1) returns the optimal and
*          minimal LIWORK.
*
*  LIWORK  (local input) INTEGER
*          Integer workspace.
*          if NPROW == NPCOL then
*             LIWORK = LOCq( M_A + MOD(IA-1, MB_A) ) + MB_A,
*                      + LOCp(M_A)+MB_A
*          else if PIVROC == 'C' then
*             LIWORK =  LOCq( M_A + MOD(IA-1, MB_A) )
*                       + MB_A*CEIL(CEIL(LOCp(M_A)/MB_A)/(LCM/NPROW))
*                       + LOCp(M_A)+MB_A
*          where LCM is the least common multiple of process
*          rows and columns (NPROW and NPCOL).
*
*  INFO    (output) INTEGER
*          = 0:  successful exit
*          < 0: if INFO = -k, the k-th argument had an illegal value
*          = 1: the iteration failed to converge after the
*               specified maximum number of iterations.
*          = 2: There was a failure computing the inversion
*
*  =====================================================================
*
*  Working Note:
*  =============
*
*  Additional scaling schemes will be added for the next release.
*
*  This routine is still in the prototype stage and much of the debugging
*  code has been left in. We have tried to isolate these blocks of code
*  and identify them wherever possible.
*
*  =====================================================================
*
*     .. Parameters ..
      INTEGER            CSRC_, CTXT_, DLEN_, DT_,
     $                   LLD_, MB_, M_, NB_, N_, RSRC_
      PARAMETER          ( DLEN_ = 9, DT_ = 1,
     $                     CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6,
     $                     RSRC_ = 7, CSRC_ = 8, LLD_ = 9 )
      DOUBLE PRECISION   HALF, ONE, ZERO
      PARAMETER          ( HALF = 0.5D+0, ONE = 1.0D+0,
     $                     ZERO = 0.0D+0 )
*     ..
*     .. Local Scalars ..
      INTEGER            ICTXT, NPROW, NPCOL, MYROW, MYCOL,
     $                   IPB, IPC, IPW, MB, NB, IPIW,
     $                   IROFF, ICOFF, IAROW, IACOL, NP, NQ, MP,
     $                   IB, JB, INFO2, IPIPIV, LCM, NQ0,
     $                   LWMIN, LWMIN1, LWMIN2, LIWMIN
      DOUBLE PRECISION   TOL, ANORM, CONV, ALPHA,
     $                   BETA, DUMMY
      LOGICAL            ROBERTS_SCALING, NO_SCALING
*     ..
*     .. Local arrays ..
      INTEGER            DESCB( DLEN_ )
*     ..
*     .. External Subroutines ..
      EXTERNAL           PDLACPY, PDGETRF0, PDGETRI0, XERBLA
     $                   PDMATADD, PDAJDIAG, PCHK1MAT, PDGEMM0
*     ..
*     .. External Functions ..
      LOGICAL            LSAME
      INTEGER            ICEIL, ILCM, INDXG2P, NUMROC
      DOUBLE PRECISION   PDLAMCH, PDLANGE, PDLATRA
      EXTERNAL           ICEIL, ILCM, INDXG2P, NUMROC, 
     $                   LSAME, PDLAMCH
*     ..
*     .. Intrinsic Functions ..
      INTRINSIC          DBLE, MAX, MIN, MOD
***
***   Debugging variables/functions/subroutines
***
      logical            debug, printit
      double precision   dummy2
      external           print_desc
*
*     ..
*     .. Executable Statements ..
*
      ROBERTS_SCALING = LSAME( SCALING, 'ROBERTS' )
      NO_SCALING = LSAME( SCALING, 'NONE' )
      ICTXT = DESCA( CTXT_ )
      MB = DESCA( MB_ )
      NB = DESCA( NB_ )
      CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL )
***
***   Debugging - intialization
***
      debug = .false.
      printit = debug .and. (myrow .eq. 0) .and. (mycol .eq. 0)
*
*     Test the input parameters
*
      INFO = 0
      IF ( N .LT. 0 ) THEN
         INFO = -1
      ELSE IF ( IA .LE. 0 ) THEN
         INFO = -3
      ELSE IF ( JA .LE. 0 ) THEN
         INFO = -4
      ELSE IF ( MAXITER .LE. 0 ) THEN
         INFO = -6
      ELSE IF ( .NOT. (LSAME(SCALING, 'ROBERTS') .OR.
     $                 LSAME(SCALING, 'NONE')) ) THEN
         INFO = -7
      ELSE IF( NPROW .EQ. -1 ) THEN
         INFO = -507
      ELSE IF( DESCA( MB_ ) .NE. DESCA( NB_ ) ) THEN
            INFO = -504
      ELSE
         IROFF = MOD( IA-1, DESCA( MB_ ) )
         ICOFF = MOD( JA-1, DESCA( NB_ ) )
         IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), 
     $                    NPROW )
         NP = NUMROC( N+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW )
         MP = NUMROC( DESCA( M_ ), DESCA( MB_ ), MYROW,
     $                DESCA( RSRC_ ), NPROW )
         NQ = NUMROC( DESCA( N_ ), DESCA( NB_ ), MYCOL,
     $                DESCA( CSRC_ ), NPCOL )
         LWMIN = NP * DESCA( NB_ ) + MP * NP

         IROFF = MOD( IA-1, MB )
         ICOFF = MOD( JA-1, NB )
         IAROW = INDXG2P( IA, MB, MYROW, DESCA( RSRC_ ), NPROW )
         IACOL = INDXG2P( JA, NB, MYCOL, DESCA( CSRC_ ), NPCOL )
         MP = NUMROC( N+IROFF, NB, MYROW, IAROW, NPROW )
         NQ = NUMROC( N+ICOFF, NB, MYCOL, IACOL, NPCOL )
*
*        Compute work for PDGEQPF0
*
         NQ0 = NUMROC( ICOFF+N, NB, MYCOL, IACOL, NPCOL )
         LWMIN1 = MAX( 3, MP + NQ ) + NQ0 + NQ
*
*        Compute work for PDGETRI0
*
         LWMIN2 = MP * DESCA( NB_ )
*
         LWMIN = MP * NQ + MAX( LWMIN1, LWMIN2 )
         LWMIN = LWMIN + MP * NQ
*
         IF( NPROW .EQ. NPCOL ) THEN
            LIWMIN = NQ + DESCA( NB_ )
         ELSE
            LCM = ILCM( NPROW, NPCOL )
            LIWMIN = NQ + MAX( ICEIL( ICEIL( MP, DESCA( MB_ ) ),
     $                            LCM / NPROW ), DESCA( NB_ ) )
         END IF
         LIWMIN = LIWMIN + NUMROC( IA+N-1, DESCA( NB_ ), MYCOL, 
     $                DESCA( CSRC_ ), NPCOL )
*
         IF( LWORK .EQ. -1 ) THEN
*
*           Satisfy LWORK request
*
            INFO = 0
            if (printit) print *, 'PDGESGN: Min Work = ', lwmin
            WORK( 1 ) = DBLE( LWMIN )
            RETURN
         ELSE IF( LWORK .LT. LWMIN ) THEN
            if (printit) print *, 'PDGESGN: Min Work, LWORK', 
     $                            lwmin, lwork
            INFO = -10
         END IF
      END IF

      IF ( INFO .NE. 0 ) THEN
         CALL PXERBLA( ICTXT, 'PDGESGN', -INFO )
         RETURN
      END IF

*
*     Quick return if possible
*
      IF ( N .EQ. 0 )
     $   RETURN

*
*     Set stopping tolerance
*
      TOL = DBLE( N ) * PDLAMCH( ICTXT, 'PRECISION' )
*
*      Set ALPHA and BETA once if no scaling
*
      IF ( NO_SCALING ) THEN
         ALPHA = HALF
         BETA = HALF
      END IF
*
*
*     Designate an N x N workspace matrix B for iteration
*
      IROFF = MOD( IA-1, MB )
      ICOFF = MOD( JA-1, NB )
      IAROW = INDXG2P( IA, MB, MYROW, DESCA( RSRC_ ), NPROW )
      IACOL = INDXG2P( JA, NB, MYCOL, DESCA( CSRC_ ), NPCOL )
      MP = NUMROC( N+IROFF, NB, MYROW, IAROW, NPROW )
      NQ = NUMROC( N+ICOFF, NB, MYCOL, IACOL, NPCOL )
      CALL DESCSET( DESCB, N+IROFF, N+ICOFF, MB, NB,
     $              IAROW, IACOL, ICTXT, MAX( 1, MP ) )
      IB = IROFF + 1
      JB = ICOFF + 1
      IPB = 1
      IPW = IPB + MP * NQ
      if (debug) then
         IPC = IPW
         IPW = IPC + MP * NQ
      end if
*
*     Will use the first  LOCp(M_A)+MB_A elements of IWORK for pivot 
*     information.
*
      IPIPIV = 1
      IPIW = IPIPIV + NUMROC( DESCB( M_ ), MB, MYROW,
     $                        DESCB( RSRC_ ), NPROW ) + MB
      NITER = 0
*
*
*      Entry point for Main Iteration Loop
*
100   NITER = NITER + 1

***
***   Debugging
***   Compute and display || Ai^2 - I || / || Ai ||
***
      if (debug) then
         DUMMY = PDLANGE( '1', N, N, A, IA, JA, DESCA, WORK( IPW ) )
         call pdgemm0( 'No trans', 'No trans', N, N, N, ONE, A, 
     $                 IA, JA, DESCA, A, IA, JA, DESCA, 
     $                 ZERO, WORK( IPB ), IB, JB, DESCB )
         call pdajdiag( 'ADD', N, WORK( IPB ), IB, JB, DESCB, ONE )
         dummy2 = pdlange( '1', N, N, WORK( IPB ), IB, JB, 
     $                     DESCB, WORK( IPW ) ) / DUMMY
         if (printit) then
            print *, ' '
            print *, ' >>> Iteration #: ', NITER
            print *, '   || Ai || = ', DUMMY
            print *, '   || Ai^2 - I || / || Ai || = ', dummy2
         end if
      end if
*
*     Compute || Ai ||
*
      ANORM = PDLANGE( '1', N, N, A, IA, JA, DESCA, WORK( IPW ) )
*
*      B = Ai
*
      CALL PDLACPY( 'All', N, N, A, IA, JA, DESCA, 
     $              WORK( IPB ), IB, JB, DESCB )
*
*     Compute the LU factorization of B
*
      CALL PDGETRF0( N, N, WORK( IPB ), IB, JB, DESCB, 
     $               IWORK( IPIPIV ), INFO2 )
      IF ( INFO2 .NE. 0 ) THEN
         if (debug) print *, 'Info of PDGETRF0 = ', info2
         INFO = 2
         RETURN
      END IF
*
*     Compute the inverse
*
      CALL PDGETRI0( N, WORK( IPB ), IB, JB, DESCB, IWORK( IPIPIV ),
     $               WORK( IPW ), LWORK - IPW + 1, IWORK( IPIW ), 
     $               LIWORK - IPIW + 1, INFO2 )
      IF ( INFO2 .NE. 0 ) THEN
         if (debug) print *, 'Info of PDGETRI0 = ', info2
         INFO = 2
         RETURN
      END IF

***
***   Debugging - check the accuracy of the inversion
***
      if (debug) then
         call pdgemm0( 'No trans', 'No trans', N, N, N, 
     $                 ONE, A, IA, JA, DESCA, WORK( IPB ), 
     $                 IB, JB, DESCB, ZERO, WORK( IPC ), 
     $                 IB, JB, DESCB )
         dummy = pdlange( '1', N, N, WORK( IPC ), IB, JB, 
     $                    DESCB, WORK( IPW ) )
         if ( (dummy-one) .gt. 1.0D-3 .and. MYROW .EQ. 0 .AND. 
     $        MYCOL .EQ. 0 ) then
            print *, '   Accuracy of inversion:  || Ai * Ai^-1 || = ',
     $              dummy 
         end if
      end if
*
*      Now finish up scaling...
*
      IF ( ROBERTS_SCALING ) THEN
*      
*        Roberts scaling needs || Ai || and || A^-1 ||.
*
         ALPHA = PDLANGE( '1', N, N, WORK( IPB ), IB, JB, 
     $                    DESCB, WORK( IPW ) )
         BETA = ANORM
         DUMMY = ALPHA + BETA
         ALPHA = ALPHA / DUMMY
         BETA = BETA / DUMMY
      END IF
*
*      Compute || Ai+1 - Ai || / || Ai || (and set Ai+1)
*      B = alpha * A + beta * B
*
      CALL PDMATADD( N, N, ALPHA, A, IA, JA, DESCA, BETA,
     $              WORK( IPB ), IB, JB, DESCB )
*
*     A = B - A
*
      CALL PDMATADD( N, N, ONE, WORK( IPB ), IB, JB, DESCB, -ONE,
     $              A, IA, JA, DESCA )
      CONV = PDLANGE( '1', N, N, A, IA, JA, DESCA, WORK( IPW ) ) 
     $         / ANORM
      CALL PDLACPY( 'Full', N, N, WORK( IPB ), IB, JB, 
     $              DESCB, A, IA, JA, DESCA )

***
***   Debugging - display convergence information
***
      if (debug) then
         DUMMY = PDLATRA( N, A, IA, JA, DESCA )
         if (printit) then
            print *, 
     $        '   Convergence value (|| Ai+1 - Ai || / || Ai ||) = ',
     $        CONV
            print *, '   || Ai+1 - Ai || = ', CONV*ANORM
            print *, '   Trace(Ai) = ', DUMMY
         END IF
      end if
*
*     Continue iterating if we have not converged and have not
*     exceeded the maximum number of iterations.
*
      IF ( ( CONV .GT. TOL ) .AND. ( NITER .LT. MAXITER ) )
     $   GO TO 100
*
*      Get here if the sign function has been computed successfully or the
*      maximum number of iterations have been performed.
*

***
***   Debugging
***   Check the accuracy of the matrix sign calculation.
***   Compute and display || sign(A)^2 - I ||
***
      if (debug) then
         call pdgemm0( 'No trans', 'No trans', N, N, N, ONE, A, 
     $                 IA, JA, DESCA, A, IA, JA, DESCA, 
     $                 ZERO, WORK( IPB ), 
     $                 IB, JB, DESCB )
         call pdajdiag( 'ADD', N, WORK( IPB ), IB, JB, DESCB, -ONE )
         dummy = pdlange( '1', N, N, WORK( IPB ), IB, JB, 
     $                    DESCB, WORK( IPW ) )
         if (printit) then
            print *, '   || sign(A)^2 - I || = ', dummy
         end if
      end if
*
*     Set INFO if the iteration has failed to converge after the maximum
*     number of iterations.
*
      IF ( ( CONV .GT. TOL ) .AND. ( NITER .EQ. MAXITER ) ) THEN
         INFO = 1
         RETURN
      END IF
     
      if (printit)
     $    print *, ' PDGESGN: Number of iterations = ', NITER
*
*      Set optimal values of LIWORK, LWORK
*
500   IWORK( 1 ) = LIWMIN
      WORK( 1 ) = LWMIN
*
*     End of PDGESGN
*
      RETURN
      END
