     SUBROUTINE BABDCR_SOLVT( NRWBLK, NBLOKS, MATR_A, LFTBLK, RGTBLK, &
                              PERM, FILL_IN, VECT_B )

     USE PRECISION
!
!  ==== BABDCR package ================
!  == subroutine BABDCR_SOLVT =========
!  Giuseppe Romanazzi, Pierluigi Amodio
!  Dipartimento di Matematica
!  Universita' di Bari
!  October 20, 2005
!  ====================================
!
! Purpose
! =======
!
!  BABDCR_SOLVT  solves the babd linear system whose coefficient matrix
!  is the transpose of the matrix that has been factorized by
!  BABDCR_FACT. The algorithm consists of three phases: reduction (by 
!  using the subroutine REDUCE_RHST), solution of the 2 by 2 block
!  system (by using the subroutine DGETRS) and back-substitution (by 
!  using the subroutine SOLVE_BLOCKT).
!
!  In input, BABDCR_SOLVT requires the coefficient matrix factorized by
!  the subroutine BABDCR_FACT and the right hand side which is stored
!  in the block vector VECT_B in the following form
!
!     VECT_B  = [f(0), f(1), ...., f(NBLOKS)].
!
!  On exit, BABDCR_SOLVT gives the solution of the babd linear system.
!  The solution is stored in the block vector VECT_B.
! 
!
! Parameters
! ==========
!
! Input variables:
!   NRWBLK  integer,            the dimension of each block of the
!                               babd matrix, NRWBLK>0
!
!   NBLOKS  integer,            the number of blocks S(i) and R(i)
!                               in the babd matrix, NBLOKS>0
!
!   MATR_A  double precision,   NRWBLK by NRWBLK by NBLOKS*2 array,
!                               part of the factorization of the babd 
!                               matrix
!
!   LFTBLK  double precision,   NRWBLK by NRWBLK array,
!                               part of the factorization of the last 
!                               2 by 2 reduced matrix
!
!   RGTBLK  double precision,   NRWBLK by NRWBLK array,
!                               part of the factorization of the last 
!                               2 by 2 reduced matrix
!
!   PERM    integer,            NRWBLK*2 by NBLOKS array,
!                               the permutations associated to the
!                               NBLOKS LU factorizations
!
!   FILL_IN double precision,   NRWBLK by NRWBLK by NBLOKS-1 array,
!                               the fill-in matrix generated by the
!                               reduction routine REDUCE_BLOCK
!
!   VECT_B double precision,    NRWBLK by NBLOKS+1 array,
!                               the r.h.s. of the linear system
! Output variables:
!   VECT_B double precision,    NRWBLK by NBLOKS+1 array,
!                               the solution of the linear system
!
!
     INTEGER,  INTENT(IN)     :: NRWBLK, NBLOKS, &
                                 PERM( NRWBLK*2, NBLOKS )
     REAL(DP), INTENT(IN)     :: MATR_A( NRWBLK, NRWBLK, NBLOKS*2 ), &
                                 FILL_IN( NRWBLK, NRWBLK, NBLOKS-1 ), &
                                 LFTBLK( NRWBLK, NRWBLK ), &
                                 RGTBLK( NRWBLK, NRWBLK )
     REAL(DP), INTENT(IN OUT) :: VECT_B( NRWBLK, 0:NBLOKS )
! Local variables:
     INTEGER  :: INDEXP, JUMP, H, I, INFO, Z, K, NSOLSTEP( 20 ), NSTEPS
     REAL(DP) :: TC( NRWBLK*2 ), MATR_TEMP( NRWBLK*2, NRWBLK*2 )
! Lapack routine: DGETRS
! Used subroutines: REDUCE_RHST, SOLVE_BLOCKT

!
! initialize indices
!
      INDEXP= 0
      JUMP= 1
      NSTEPS= 0
      K= NBLOKS
!
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!!!! reduction phase for the r.h.s. !!!!
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!
      DO WHILE ( K.GT.1 )
!
! set the number of internal reduction steps and initialize the index
! of the r.h.s. block to reduce
!
        NSTEPS= NSTEPS+1
        NSOLSTEP(NSTEPS)= K/2
        H= JUMP
!
!! internal reduction cycle
!
        DO I= 1, K/2-1
          INDEXP= INDEXP+1
          Z= H+JUMP
!
!  obtain from the 3 by 2 block array
!
!       (  Sp^T   (Sa')^T ) ( Ya ) = ( Vm )
!       ( (LU)^T          ) ( Yc )   ( Vn )
!       (  Rp^T   (Rc')^T )          ( Vs )
!
!  the reduced block
!
!       ( (Sa')^T ) ( Yc ) = ( Vm' )
!       ( (Rc')^T )          ( Vs' )
!
          CALL REDUCE_RHST( NRWBLK, PERM(1,INDEXP), MATR_A(1,1,H*2), &
                            FILL_IN(1,1,INDEXP), VECT_B(1,H-JUMP), &
                            VECT_B(1,H), VECT_B(1,Z) )

          H= Z+JUMP
        ENDDO
!
!! last step of internal reduction cycle
!
        INDEXP= INDEXP+1
        Z= H+JUMP
        IF ( Z.GT.NBLOKS ) THEN
          Z= NBLOKS
        ENDIF
        CALL REDUCE_RHST( NRWBLK, PERM(1,INDEXP), MATR_A(1,1,H*2), &
                          FILL_IN(1,1,INDEXP), VECT_B(1,H-JUMP), &
                          VECT_B(1,H), VECT_B(1,Z) )
!
!! end internal reduction cycle
!
        JUMP= JUMP*2
        K= K-K/2
      ENDDO
!
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!!!! 2 by 2 block linear system solution !!!!
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!
!     ( Ba^T  S^(1)^T ) ( x(0)          ) = ( f(0)^(1)      )
!     ( Bb^T  R^(1)^T ) ( x(NBLOKS)^(1) )   ( f(NBLOKS)^(1) )
!
      MATR_TEMP( 1:NRWBLK, 1:NRWBLK )= LFTBLK(:,:)
      MATR_TEMP( 1:NRWBLK, NRWBLK+1:NRWBLK*2 )= RGTBLK(:,:)
      MATR_TEMP( NRWBLK+1:NRWBLK*2, 1:NRWBLK )= MATR_A(:,:,1)
      MATR_TEMP( NRWBLK+1:NRWBLK*2, NRWBLK+1:NRWBLK*2 )= &
           MATR_A(:,:,NBLOKS*2)

      TC= (/ VECT_B(:,0), VECT_B(:,NBLOKS) /)
      CALL DGETRS( 'T', NRWBLK*2, 1, MATR_TEMP(1,1), NRWBLK*2, &
                   PERM(1,NBLOKS), TC(1), NRWBLK*2, INFO )
      VECT_B(:,0)= TC( 1:NRWBLK )
      VECT_B(:,NBLOKS)= TC( NRWBLK+1:NRWBLK*2 )
!
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!!!! back-substitution phase !!!!
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!
      INDEXP= NBLOKS-1
      DO K= NSTEPS,1,-1 
!
!! first internal back-substitution step
!
        Z= JUMP*NSOLSTEP( K )
        JUMP= JUMP/2
        H= Z-JUMP
        IF ( Z.GT.NBLOKS ) THEN
          Z= NBLOKS
        ENDIF

        CALL SOLVE_BLOCKT( NRWBLK, PERM(1,INDEXP), MATR_A(1,1,H*2), &
                           VECT_B(1,H), VECT_B(1,Z) )

        INDEXP= INDEXP-1
!
!! internal back-substitution cycle
!
        DO I= 2, NSOLSTEP( K )
          H= H-JUMP*2 
!
!  computes the solutions Xb and Xc from
!
!     ( I  G^T ) * P * ( Xb ) = ( Ya )
!     (    I   )       ( Xc )   ( Yc )
!
          CALL SOLVE_BLOCKT( NRWBLK, PERM(1,INDEXP), MATR_A(1,1,H*2), &
                             VECT_B(1,H), VECT_B(1,H+JUMP) )

          INDEXP= INDEXP-1
        ENDDO
      ENDDO

     RETURN
      
     CONTAINS

       SUBROUTINE REDUCE_RHST( NRWBLK, P, RS, F, Vm, Vn, Vs )
!
! Purpose
! =======
!
!  REDUCE_RHST  computes the r.h.s. of the new reduced system from
!
!       (  Sp^T   (Sa')^T ) ( Ya ) = ( Vm )
!       ( (LU)^T          ) ( Yc )   ( Vn )
!       (  Rp^T   (Rc')^T )          ( Vs )
!
!  by performing the following operations:
!
!       (LU)^T * Ya = Vn
!
!       Vm' = Vm - Sp^T * Ya
!       Vs' = Vs - Rp^T * Ya
!
!  In input this algorithm requires the array containing the L and
!  U factors, the matrix F containing the nonnull rows of Sp and Rp,
!  the permutation matrix and the three vectors involved.
! 
!  On exit, REDUCE_RHST gives the computed vector Vm, Vn and Vs.
!
!
! Parameters
! ==========
!
! Input variables:
!   NRWBLK  integer,            the dimension of each block of the
!                               babd matrix, NRWBLK>0
!
!   P       integer,            NRWBLK*2 vector,
!                               permutation vector that comes out
!                               from REDUCE_BLOCK
!
!   RS      double precision,   NRWBLK*2 by NRWBLK array,
!                               the L\U factors of the factorization of
!                               ( Rb ) in the first NRWBLK rows 
!                               ( Sb )
!
!   F       double precision,   NRWBLK by NRWBLK array,
!                               fill-in block obtained in the
!                               subroutine REDUCE_BLOCK, that is the
!                               nonnull rows of  ( Sp )
!                                                ( Rp )
!
!   Vm      double precision,   NRWBLK vector,
!                               first block vector of the r.h.s. to
!                               reduce
!
!   Vn      double precision,   NRWBLK vector,
!                               second block vector of the r.h.s. to
!                               reduce
!
!   Vs      double precision,   NRWBLK vector,
!                               third block vector of the r.h.s. to
!                               reduce
! Output variables:
!   Vm      double precision,   NRWBLK vector,
!                               the first block Vm' of the reduced
!                               vector
!
!   Vn      double precision,   NRWBLK vector,
!                               the block vector Ya
! 
!   Vs      double precision,   NRWBLK vector,
!                               the second block Vs' of the reduced
!                               vector
!
       INTEGER,  INTENT(IN)     ::  NRWBLK, P( NRWBLK*2 )
       REAL(DP), INTENT(IN)     ::  RS( NRWBLK*2, NRWBLK ), &
                                    F( NRWBLK,NRWBLK )
       REAL(DP), INTENT(IN OUT) ::  Vm( NRWBLK ), Vn( NRWBLK ), &
                                    Vs( NRWBLK )
! Local variables:
       INTEGER  :: I, INFO
       INTEGER ::  P1( NRWBLK )
       REAL(DP) :: VAL
! my routine: DGETRS_MOD
! Blas routine: DAXPY

!
! solve the system (L*U)^T * Ya = Vn
!
        DO I= 1, NRWBLK
          P1( I )= I
        ENDDO
        CALL DGETRS( 'N', NRWBLK, 1, RS(1,1), NRWBLK*2, P1(1), Vn(1), &
                     NRWBLK, INFO )
! the solution of the above system may be improved by using an
! appropriate subroutine solving linear systems without any
! permutation
!        CALL DGETRS_MOD( 'N', NRWBLK, 1, RS(1,1), NRWBLK*2, Vn(1), &
!     &                   NRWBLK, INFO )

!
! update the r.h.s.  Vm' = Vm - Sp^T * Ya  and 
!                    Vs' = Vs - Rp^T * Ya
!
        DO I= 1, NRWBLK
          VAL= -Vn(I) 
          IF ( P(I).LE.NRWBLK ) THEN
!           Vm= Vm-F(:,I)*Vn(I) 
            CALL DAXPY( NRWBLK, VAL, F(1,I), 1, Vm(1), 1 )
          ELSE
!           Vs= Vs-F(:,I)*Vn(I)  
            CALL DAXPY( NRWBLK, VAL, F(1,I), 1, Vs(1), 1 )
          ENDIF
        ENDDO

       RETURN

       END SUBROUTINE REDUCE_RHST


       SUBROUTINE SOLVE_BLOCKT( NRWBLK, P, RS, Xb, Xc )
!
! Purpose
! =======
!
!  SOLVE_BLOCKT computes the solutions Xb and Xc
!
!     ( I  G^T ) * P * ( Xb ) = ( Ya )
!     (    I   )       ( Xc )   ( Yc )
!
!  by means of the following operations:
!
!     Ya = Ya - G^T * Yc
!
!     ( Xb ) = P^T * ( Ya )
!     ( Xc ) =       ( Yc )
! 
!  Each block is NRWBLK by NRWBLK. The matrix G is obtained by the
!  subroutine REDUCE_BLOCK; the vectors Ya and Yc are obtained by 
!  the subroutine REDUCE_RHS.
!
!  On exit, SOLVE_BLOCKT gives the solution Xb and Xc.
!
!
! Parameters
! ==========
!
! Input variables:
!   NRWBLK  integer,            the dimension of each block of the
!                               babd matrix, NRWBLK>0
!
!   P       integer,            NRWBLK*2 vector,
!                               permutation vector that comes out
!                               from REDUCE_BLOCK
!
!   RS      double precision,   NRWBLK*2 by NRWBLK array,
!                               the L\U factors of ( Rb ) in the first 
!                                                  ( Sb ) 
!                               NRWBLK rows; the block matrix G that
!                               comes out from REDUCE_BLOCK in the last
!                               NRWBLK rows
!
!   Xb      double precision,   NRWBLK vector,
!                               the first block vector Ya 
!
!   Xc      double precision,   NRWBLK vector,
!                               the second block vector Yc
! Output variables:
!   Xb      double precision,   NRWBLK vector,
!                               the first computed block 
!
!   Xc      double precision,   NRWBLK vector,
!                               the second computed block
!
       INTEGER,  INTENT(IN)     :: NRWBLK, P( NRWBLK*2 )
       REAL(DP), INTENT(IN)     :: RS( NRWBLK*2, NRWBLK )
       REAL(DP), INTENT(IN OUT) :: Xb( NRWBLK ), Xc( NRWBLK )
! Parameters:
       REAL(DP), PARAMETER :: ONE= 1.0D0, NONE= -1.0D0
! Local variables:
       INTEGER  :: I, Q( NRWBLK*2 )
       REAL(DP) :: TEMP( NRWBLK*2 )
! Blas routine: DGEMV

!
! update the r.h.s.  Ya = Ya - G^T * Yc
!
        CALL DGEMV( 'T', NRWBLK, NRWBLK, NONE, RS(NRWBLK+1,1), &
                    NRWBLK*2, Xc(1), 1, ONE, Xb(1), 1 )             
!
!define the inverse of P
!
        DO I= 1, NRWBLK*2
          Q( P( I ) )= I
        ENDDO
!
! compute the solution ( Xb ) by means of  P * ( Ya )
!                      ( Xc )                  ( Yc )
!
        TEMP( 1:NRWBLK )= Xb
        TEMP( NRWBLK+1:NRWBLK*2 )= Xc
        Xb= TEMP( Q(1:NRWBLK) )
        Xc= TEMP( Q(NRWBLK+1:NRWBLK*2) )

       RETURN
       
       END SUBROUTINE SOLVE_BLOCKT

     END SUBROUTINE BABDCR_SOLVT
