MODULE MAT_DIST
  PUBLIC MATDIST
  PUBLIC ZMATDIST
CONTAINS
  SUBROUTINE MATDIST (A_GLOB, A, PARTS, ICONTXT, DESC_A,&
       & B_GLOB, B, INROOT)
    !
    ! An utility subroutine to distribute a matrix among processors
    ! according to a user defined data distribution, using PESSL
    ! sparse matrix subroutines.
    !
    !  Type(D_SPMAT)                            :: A_GLOB
    !     On Entry: this contains the global sparse matrix as follows:
    !        A%FIDA =='CSR'
    !        A%ASPK for coefficient values
    !        A%IA1  for column indices
    !        A%IA2  for row pointers
    !        A%M    for number of global matrix rows
    !        A%K    for number of global matrix columns
    !     On Exit : undefined, with unassociated pointers.
    !
    !  Type(D_SPMAT)                            :: A
    !     On Entry: fresh variable.
    !     On Exit : this will contain the local sparse matrix.
    !
    !       INTERFACE PARTS
    !         !   .....user passed subroutine.....
    !         SUBROUTINE PARTS(GLOBAL_INDX,N,NP,PV,NV)
    !           IMPLICIT NONE
    !           INTEGER, INTENT(IN)  :: GLOBAL_INDX, N, NP
    !           INTEGER, INTENT(OUT) :: NV
    !           INTEGER, INTENT(OUT) :: PV(*)
    !
    !       END SUBROUTINE PARTS
    !       END INTERFACE
    !     On Entry:  subroutine providing user defined data distribution.
    !        For each GLOBAL_INDX the subroutine should return
    !        the list  PV of all processes owning the row with
    !        that index; the list will contain NV entries.
    !        Usually NV=1; if NV >1 then we have an overlap in the data
    !        distribution.
    !
    !  Integer                                  :: ICONTXT
    !     On Entry: BLACS context.
    !     On Exit : unchanged.
    !
    !  Type (DESC_TYPE)                  :: DESC_A
    !     On Entry: fresh variable.
    !     On Exit : the updated array descriptor
    !
    !  Real(Kind(1.D0)), Pointer, Optional      :: B_GLOB(:)
    !     On Entry: this contains right hand side.
    !     On Exit :
    !
    !  Real(Kind(1.D0)), Pointer, Optional      :: B(:)
    !     On Entry: fresh variable.
    !     On Exit : this will contain the local right hand side.
    !
    !  Integer, Optional    :: inroot
    !     On Entry: specifies processor holding A_GLOB. Default: 0
    !     On Exit : unchanged.
    !
    USE TYPESP
    USE TYPEDESC
    USE F90TOOLS
    Implicit None   ! Parameters
    Type(D_SPMAT)              :: A_GLOB
    Real(Kind(1.D0)), Pointer  :: B_GLOB(:)
    Integer                    :: ICONTXT
    Type(D_SPMAT)              :: A
    Real(Kind(1.D0)), Pointer  :: B(:)
    integer, pointer           :: ierrv(:)
    Type (DECOMP_DATA_TYPE)    :: DESC_A
    INTEGER, OPTIONAL          :: INROOT
    INTERFACE 
      !   .....user passed subroutine.....
      SUBROUTINE PARTS(GLOBAL_INDX,N,NP,PV,NV)
        IMPLICIT NONE
        INTEGER, INTENT(IN)  :: GLOBAL_INDX, N, NP
        INTEGER, INTENT(OUT) :: NV
        INTEGER, INTENT(OUT) :: PV(*) 
      END SUBROUTINE PARTS
    END INTERFACE   ! Local variables
    Integer                     :: NPROW, NPCOL, MYPROW, MYPCOL
    Integer                     :: IRCODE, LENGTH_ROW, I_COUNT, J_COUNT,&
         & K_COUNT, BLOCKDIM, ROOT, LIWORK, NROW, NCOL, NNZERO, NRHS,&
         & I,J,K, LL, INFO, ISIZE, IPROC, nnr
    Integer, Pointer            :: IWORK(:)
    CHARACTER                   :: AFMT*5, atyp*5
    Type(D_SPMAT)               :: BLCK   
    integer, parameter          :: nb = 30
    ! Executable statements    
    IF (PRESENT(INROOT)) THEN
      ROOT = INROOT
    ELSE
      ROOT = 0
    END IF
    allocate(ierrv(6))
    CALL BLACS_GRIDINFO(ICONTXT, NPROW, NPCOL, MYPROW, MYPCOL)     
    IF (MYPROW == ROOT) THEN
      ! Extract information from A_GLOB
      IF (A_GLOB%FIDA.NE. 'CSR') THEN
        WRITE(0,*) 'Unsupported input matrix format'
        CALL BLACS_ABORT(ICONTXT,-1)
      ENDIF
      NROW = A_GLOB%M
      NCOL = A_GLOB%K
      IF (NROW /= NCOL) THEN
        WRITE(0,*) 'A rectangular matrix ? ',NROW,NCOL
        CALL BLACS_ABORT(ICONTXT,-1)
      ENDIF
      NNZERO = Size(A_GLOB%ASPK)
      NRHS   = 1
      ! Broadcast informations to other processors
      CALL IGEBS2D(ICONTXT, 'A', ' ', 1, 1, NROW, 1)
      CALL IGEBS2D(ICONTXT, 'A', ' ', 1, 1, NCOL, 1)
      CALL IGEBS2D(ICONTXT, 'A', ' ', 1, 1, NNZERO, 1)
      CALL IGEBS2D(ICONTXT, 'A', ' ', 1, 1, NRHS, 1)
    ELSE !(MYPROW /= root)
      ! Receive informations
      CALL IGEBR2D(ICONTXT, 'A', ' ', 1, 1, NROW, 1, ROOT, 0)
      CALL IGEBR2D(ICONTXT, 'A', ' ', 1, 1, NCOL, 1, ROOT, 0)
      CALL IGEBR2D(ICONTXT, 'A', ' ', 1, 1, NNZERO, 1, ROOT, 0)
      CALL IGEBR2D(ICONTXT, 'A', ' ', 1, 1, NRHS, 1, ROOT, 0)
    END IF   ! Allocate integer work area
    LIWORK = MAX(NPROW, NROW + NCOL)
    ALLOCATE(IWORK(LIWORK), STAT = IRCODE)
    IF (IRCODE /= 0) THEN
      WRITE(0,*) 'MATDIST Allocation failed'
      RETURN
    ENDIF
    IF (MYPROW == ROOT) THEN
      WRITE (*, FMT = *) 'Start matdist',root, size(iwork)
    ENDIF
    CALL F90_PSDSCALL(NROW,NROW,PARTS,ICONTXT,IERRV,DESC_A)
    CALL F90_PSSPALL(A,IERRV,DESC_A,NNZ=NNZERO/NPROW)
    CALL F90_PSDSALL(NROW,B,IERRV,DESC_A)   
    ISIZE = MAX(3*NB,NCOL)
    
    
    ALLOCATE(BLCK%ASPK(NNZERO),BLCK%IA1(NNZERO),BLCK%IA2(NNZERO),STAT=IRCODE)
    IF (IRCODE /= 0) THEN
      WRITE(0,*) 'Error on allocating BLCK'
      CALL BLACS_ABORT(ICONTXT,-1)
      STOP
    ENDIF

    BLCK%M    = 1
    BLCK%K    = NCOL
    BLCK%FIDA = 'CSR'
    I_COUNT   = 1

    DO WHILE (I_COUNT.LE.NROW)

       CALL PARTS(I_COUNT,NROW,NPROW,IWORK, LENGTH_ROW)

      IF (LENGTH_ROW.EQ.1) THEN 
        J_COUNT = I_COUNT + 1
        IPROC   = IWORK(1) 
        CALL PARTS(J_COUNT,NROW,NPROW,IWORK, LENGTH_ROW)

        DO WHILE ((J_COUNT.LE.NROW).AND.(J_COUNT-I_COUNT.LT.NB)&
             &.AND.(LENGTH_ROW.EQ.1).AND.(IWORK(1).EQ.IPROC))
          J_COUNT = J_COUNT + 1
          if (j_count.le.nrow) &
               & CALL PARTS(J_COUNT,NROW,NPROW,IWORK, LENGTH_ROW)
        END DO
        

        ! Now we should insert rows I_COUNT..J_COUNT-1
        nnr = j_count - i_count

        IF (MYPROW == ROOT) THEN

          do j = i_count, j_count
            blck%ia2(j-i_count+1) = a_glob%ia2(j) - &
                 & a_glob%ia2(i_count) + 1
          enddo

          k = a_glob%ia2(i_count)
          do j = k, a_glob%ia2(j_count)-1
            blck%aspk(j-k+1) = a_glob%aspk(j)
            blck%ia1(j-k+1) = a_glob%ia1(j)
          enddo

          ll     = blck%ia2(nnr+1) - 1
          blck%m = nnr
          blck%k = nrow

          IF (IPROC == MYPROW) THEN
            CALL F90_PSSPINS(A,I_COUNT,1,BLCK,IERRV,DESC_A)
            CALL F90_PSDSINS(NNR,B,I_COUNT,B_GLOB(I_COUNT:J_COUNT-1),&
                 &IERRV,DESC_A)
          ELSE
            CALL IGESD2D(ICONTXT,1,1,NNR,1,IPROC,0)
            CALL IGESD2D(ICONTXT,1,1,LL,1,IPROC,0)
            CALL IGESD2D(ICONTXT,NNR+1,1,BLCK%IA2,NNR+1,IPROC,0)
            CALL IGESD2D(ICONTXT,LL,1,BLCK%IA1,LL,IPROC,0)
            CALL DGESD2D(ICONTXT,LL,1,BLCK%ASPK,LL,IPROC,0)
            CALL DGESD2D(ICONTXT,NNR,1,B_GLOB(I_COUNT:J_COUNT-1),NNR,IPROC,0)
            CALL IGERV2D(ICONTXT,1,1,LL,1,IPROC,0)
          ENDIF
        ELSE IF (MYPROW /= ROOT) THEN

          IF (IPROC == MYPROW) THEN
            CALL IGERV2D(ICONTXT,1,1,NNR,1,ROOT,0)
            CALL IGERV2D(ICONTXT,1,1,LL,1,ROOT,0)
            CALL IGERV2D(ICONTXT,NNR+1,1,BLCK%IA2,NNR+1,ROOT,0)
            CALL IGERV2D(ICONTXT,LL,1,BLCK%IA1,LL,ROOT,0)
            CALL DGERV2D(ICONTXT,LL,1,BLCK%ASPK,LL,ROOT,0)
            CALL DGERV2D(ICONTXT,NNR,1,B_GLOB(I_COUNT:I_COUNT+NNR-1),NNR,ROOT,0)
            CALL IGESD2D(ICONTXT,1,1,LL,1,ROOT,0)
            BLCK%M = NNR
            BLCK%K = NROW
            CALL F90_PSSPINS(A,I_COUNT,1,BLCK,IERRV,DESC_A)
            CALL F90_PSDSINS(NNR,B,I_COUNT,B_GLOB(I_COUNT:I_COUNT+NNR-1),&
                 &IERRV,DESC_A)
          ENDIF
        ENDIF

        i_count = j_count
      else
        ! Here processors are counted 1..NPROW
        DO J_COUNT = 1, LENGTH_ROW
          K_COUNT = IWORK(J_COUNT)
          IF (MYPROW == ROOT) THEN
            BLCK%IA2(1) = 1
            BLCK%IA2(2) = 1
            DO J = A_GLOB%IA2(I_COUNT), A_GLOB%IA2(I_COUNT+1)-1
              BLCK%ASPK(BLCK%IA2(2)) = A_GLOB%ASPK(J)
              BLCK%IA1(BLCK%IA2(2)) = A_GLOB%IA1(J)
              BLCK%IA2(2) =BLCK%IA2(2) + 1
            ENDDO
            LL = BLCK%IA2(2) - 1
            IF (K_COUNT == MYPROW) THEN
              BLCK%INFOA(1) = LL
              BLCK%INFOA(2) = LL
              BLCK%INFOA(3) = 2
              BLCK%INFOA(4) = 1
              BLCK%INFOA(5) = 1
              BLCK%INFOA(6) = 1
              BLCK%M = 1
              BLCK%K = NROW

              CALL F90_PSSPINS(A,I_COUNT,1,BLCK,IERRV,DESC_A)
              CALL F90_PSDSINS(1,B,I_COUNT,B_GLOB(I_COUNT:I_COUNT),&
                   &IERRV,DESC_A)
            ELSE
              CALL IGESD2D(ICONTXT,1,1,LL,1,K_COUNT,0)
              CALL IGESD2D(ICONTXT,LL,1,BLCK%IA1,LL,K_COUNT,0)
              CALL DGESD2D(ICONTXT,LL,1,BLCK%ASPK,LL,K_COUNT,0)
              CALL DGESD2D(ICONTXT,1,1,B_GLOB(I_COUNT),1,K_COUNT,0)
              CALL IGERV2D(ICONTXT,1,1,LL,1,K_COUNT,0)
            ENDIF
          ELSE IF (MYPROW /= ROOT) THEN
            IF (K_COUNT == MYPROW) THEN
              CALL IGERV2D(ICONTXT,1,1,LL,1,ROOT,0)
              BLCK%IA2(1) = 1
              BLCK%IA2(2) = LL+1
              CALL IGERV2D(ICONTXT,LL,1,BLCK%IA1,LL,ROOT,0)
              CALL DGERV2D(ICONTXT,LL,1,BLCK%ASPK,LL,ROOT,0)
              CALL DGERV2D(ICONTXT,1,1,B_GLOB(I_COUNT),1,ROOT,0)
              CALL IGESD2D(ICONTXT,1,1,LL,1,ROOT,0)
              BLCK%M = 1
              BLCK%K = NROW          
              CALL F90_PSSPINS(A,I_COUNT,1,BLCK,IERRV,DESC_A)
              CALL F90_PSDSINS(1,B,I_COUNT,B_GLOB(I_COUNT:I_COUNT),&
                   &IERRV,DESC_A)
            ENDIF
          ENDIF
        END DO
        I_COUNT = I_COUNT + 1
      endif
    END DO 
    ! Default storage format for sparse matrix; we do not
    ! expect duplicated entries.
    AFMT = 'CSR'
    CALL F90_PSSPASB(A,IERRV,DESC_A,DUP=1)     
    CALL F90_PSDSASB(B,IERRV,DESC_A)     
    DEALLOCATE(BLCK%ASPK,BLCK%IA1,BLCK%IA2,IWORK)   
    IF (MYPROW == root) Write (*, FMT = *) 'End matdist'     
    RETURN   
  END SUBROUTINE MATDIST



  SUBROUTINE ZMATDIST (A_GLOB, A, PARTS, ICONTXT, DESC_A,&
       & B_GLOB, B, INROOT)
    !
    ! An utility subroutine to distribute a matrix among processors
    ! according to a user defined data distribution, using PESSL
    ! sparse matrix subroutines.
    !
    !  Type(D_SPMAT)                            :: A_GLOB
    !     On Entry: this contains the global sparse matrix as follows:
    !        A%FIDA =='CSR'
    !        A%ASPK for coefficient values
    !        A%IA1  for column indices
    !        A%IA2  for row pointers
    !        A%M    for number of global matrix rows
    !        A%K    for number of global matrix columns
    !     On Exit : undefined, with unassociated pointers.
    !
    !  Type(D_SPMAT)                            :: A
    !     On Entry: fresh variable.
    !     On Exit : this will contain the local sparse matrix.
    !
    !       INTERFACE PARTS
    !         !   .....user passed subroutine.....
    !         SUBROUTINE PARTS(GLOBAL_INDX,N,NP,PV,NV)
    !           IMPLICIT NONE
    !           INTEGER, INTENT(IN)  :: GLOBAL_INDX, N, NP
    !           INTEGER, INTENT(OUT) :: NV
    !           INTEGER, INTENT(OUT) :: PV(*)
    !
    !       END SUBROUTINE PARTS
    !       END INTERFACE
    !     On Entry:  subroutine providing user defined data distribution.
    !        For each GLOBAL_INDX the subroutine should return
    !        the list  PV of all processes owning the row with
    !        that index; the list will contain NV entries.
    !        Usually NV=1; if NV >1 then we have an overlap in the data
    !        distribution.
    !
    !  Integer                                  :: ICONTXT
    !     On Entry: BLACS context.
    !     On Exit : unchanged.
    !
    !  Type (DESC_TYPE)                  :: DESC_A
    !     On Entry: fresh variable.
    !     On Exit : the updated array descriptor
    !
    !  Real(Kind(1.D0)), Pointer, Optional      :: B_GLOB(:)
    !     On Entry: this contains right hand side.
    !     On Exit :
    !
    !  Real(Kind(1.D0)), Pointer, Optional      :: B(:)
    !     On Entry: fresh variable.
    !     On Exit : this will contain the local right hand side.
    !
    !  Integer, Optional    :: inroot
    !     On Entry: specifies processor holding A_GLOB. Default: 0
    !     On Exit : unchanged.
    !
    USE TYPESP
    USE TYPEDESC
    USE F90TOOLS
    Implicit None   ! Parameters
    Type(Z_SPMAT)              :: A_GLOB
    COMPLEX(Kind(1.D0)), Pointer  :: B_GLOB(:)
    Integer                    :: ICONTXT
    TYPE(Z_SPMAT)              :: A
    COMPLEX(Kind(1.D0)), Pointer  :: B(:)
    integer, pointer           :: ierrv(:)
    Type (DECOMP_DATA_TYPE)    :: DESC_A
    INTEGER, OPTIONAL          :: INROOT
    INTERFACE 
      !   .....user passed subroutine.....
      SUBROUTINE PARTS(GLOBAL_INDX,N,NP,PV,NV)
        IMPLICIT NONE
        INTEGER, INTENT(IN)  :: GLOBAL_INDX, N, NP
        INTEGER, INTENT(OUT) :: NV
        INTEGER, INTENT(OUT) :: PV(*) 
      END SUBROUTINE PARTS
    END INTERFACE   ! Local variables
    Integer                     :: NPROW, NPCOL, MYPROW, MYPCOL
    Integer                     :: IRCODE, LENGTH_ROW, I_COUNT, J_COUNT,&
         & K_COUNT, BLOCKDIM, ROOT, LIWORK, NROW, NCOL, NNZERO, NRHS,&
         & I,J,K, LL, INFO, ISIZE, IPROC, nnr
    Integer, Pointer            :: IWORK(:)
    CHARACTER                   :: AFMT*5, atyp*5
    Type(Z_SPMAT)               :: BLCK   
    integer, parameter          :: nb = 30
    ! Executable statements    
    IF (PRESENT(INROOT)) THEN
      ROOT = INROOT
    ELSE
      ROOT = 0
    END IF
    allocate(ierrv(6))
    CALL BLACS_GRIDINFO(ICONTXT, NPROW, NPCOL, MYPROW, MYPCOL)     
    IF (MYPROW == ROOT) THEN
      ! Extract information from A_GLOB
      IF (A_GLOB%FIDA.NE. 'CSR') THEN
        WRITE(0,*) 'Unsupported input matrix format'
        CALL BLACS_ABORT(ICONTXT,-1)
      ENDIF
      NROW = A_GLOB%M
      NCOL = A_GLOB%K
      IF (NROW /= NCOL) THEN
        WRITE(0,*) 'A rectangular matrix ? ',NROW,NCOL
        CALL BLACS_ABORT(ICONTXT,-1)
      ENDIF
      NNZERO = Size(A_GLOB%ASPK)
      NRHS   = 1
      ! Broadcast informations to other processors
      CALL IGEBS2D(ICONTXT, 'A', ' ', 1, 1, NROW, 1)
      CALL IGEBS2D(ICONTXT, 'A', ' ', 1, 1, NCOL, 1)
      CALL IGEBS2D(ICONTXT, 'A', ' ', 1, 1, NNZERO, 1)
      CALL IGEBS2D(ICONTXT, 'A', ' ', 1, 1, NRHS, 1)
    ELSE !(MYPROW /= root)
      ! Receive informations
      CALL IGEBR2D(ICONTXT, 'A', ' ', 1, 1, NROW, 1, ROOT, 0)
      CALL IGEBR2D(ICONTXT, 'A', ' ', 1, 1, NCOL, 1, ROOT, 0)
      CALL IGEBR2D(ICONTXT, 'A', ' ', 1, 1, NNZERO, 1, ROOT, 0)
      CALL IGEBR2D(ICONTXT, 'A', ' ', 1, 1, NRHS, 1, ROOT, 0)
    END IF   ! Allocate integer work area
    LIWORK = MAX(NPROW, NROW + NCOL)
    ALLOCATE(IWORK(LIWORK), STAT = IRCODE)
    IF (IRCODE /= 0) THEN
      WRITE(0,*) 'MATDIST Allocation failed'
      RETURN
    ENDIF
    IF (MYPROW == ROOT) THEN
      WRITE (*, FMT = *) 'Start matdist',root, size(iwork)
    ENDIF
    CALL F90_PSDSCALL(NROW,NROW,PARTS,ICONTXT,IERRV,DESC_A)
    CALL F90_PSSPALL(A,IERRV,DESC_A,NNZ=NNZERO/NPROW)
    CALL F90_PSDSALL(NROW,B,IERRV,DESC_A)   
    ISIZE = MAX(3*NB,NCOL)


    ALLOCATE(BLCK%ASPK(nnzero),BLCK%IA1(nnzero),BLCK%IA2(nnzero),STAT=IRCODE)
    IF (IRCODE /= 0) THEN
      WRITE(0,*) 'Error on allocating BLCK'
      CALL BLACS_ABORT(ICONTXT,-1)
      STOP
    ENDIF

    BLCK%M    = 1
    BLCK%K    = NCOL
    BLCK%FIDA = 'CSR'
    I_COUNT   = 1

    DO WHILE (I_COUNT.LE.NROW)
      CALL PARTS(I_COUNT,NROW,NPROW,IWORK, LENGTH_ROW)

      IF (LENGTH_ROW.EQ.1) THEN 
        J_COUNT = I_COUNT + 1
        IPROC   = IWORK(1) 
        CALL PARTS(J_COUNT,NROW,NPROW,IWORK, LENGTH_ROW)

        DO WHILE ((J_COUNT.LE.NROW).AND.(J_COUNT-I_COUNT.LT.NB)&
             &.AND.(LENGTH_ROW.EQ.1).AND.(IWORK(1).EQ.IPROC))
          J_COUNT = J_COUNT + 1
          if (j_count.le.nrow) &
               & CALL PARTS(J_COUNT,NROW,NPROW,IWORK, LENGTH_ROW)
        END DO


        ! Now we should insert rows I_COUNT..J_COUNT-1
        nnr = j_count - i_count

        IF (MYPROW == ROOT) THEN
          do j = i_count, j_count
            blck%ia2(j-i_count+1) = a_glob%ia2(j) - &
                 & a_glob%ia2(i_count) + 1
          enddo

          k = a_glob%ia2(i_count)
          do j = k, a_glob%ia2(j_count)-1
            blck%aspk(j-k+1) = a_glob%aspk(j)
            blck%ia1(j-k+1) = a_glob%ia1(j)
          enddo

          ll     = blck%ia2(nnr+1) - 1
          blck%m = nnr
          blck%k = nrow
          IF (IPROC == MYPROW) THEN
            CALL F90_PSSPINS(A,I_COUNT,1,BLCK,IERRV,DESC_A)
            CALL F90_PSDSINS(NNR,B,I_COUNT,B_GLOB(I_COUNT:J_COUNT-1),&
                 &IERRV,DESC_A)
          ELSE
            CALL IGESD2D(ICONTXT,1,1,NNR,1,IPROC,0)
            CALL IGESD2D(ICONTXT,1,1,LL,1,IPROC,0)
            CALL IGESD2D(ICONTXT,NNR+1,1,BLCK%IA2,NNR+1,IPROC,0)
            CALL IGESD2D(ICONTXT,LL,1,BLCK%IA1,LL,IPROC,0)
            CALL ZGESD2D(ICONTXT,LL,1,BLCK%ASPK,LL,IPROC,0)
            CALL ZGESD2D(ICONTXT,NNR,1,B_GLOB(I_COUNT:J_COUNT-1),NNR,IPROC,0)
            CALL IGERV2D(ICONTXT,1,1,LL,1,IPROC,0)
          ENDIF

        ELSE IF (MYPROW /= ROOT) THEN

          IF (IPROC == MYPROW) THEN
            CALL IGERV2D(ICONTXT,1,1,NNR,1,ROOT,0)
            CALL IGERV2D(ICONTXT,1,1,LL,1,ROOT,0)
            CALL IGERV2D(ICONTXT,NNR+1,1,BLCK%IA2,NNR+1,ROOT,0)
            CALL IGERV2D(ICONTXT,LL,1,BLCK%IA1,LL,ROOT,0)
            CALL ZGERV2D(ICONTXT,LL,1,BLCK%ASPK,LL,ROOT,0)
            CALL ZGERV2D(ICONTXT,NNR,1,B_GLOB(I_COUNT:I_COUNT+NNR-1),NNR,ROOT,0)
            CALL IGESD2D(ICONTXT,1,1,LL,1,ROOT,0)
            BLCK%M = NNR
            BLCK%K = NROW
            CALL F90_PSSPINS(A,I_COUNT,1,BLCK,IERRV,DESC_A)
            CALL F90_PSDSINS(NNR,B,I_COUNT,B_GLOB(I_COUNT:I_COUNT+NNR-1),&
                 &IERRV,DESC_A)
          ENDIF
        ENDIF

        i_count = j_count
      else
        ! Here processors are counted 1..NPROW
        DO J_COUNT = 1, LENGTH_ROW
          K_COUNT = IWORK(J_COUNT)
          IF (MYPROW == ROOT) THEN
            BLCK%IA2(1) = 1
            BLCK%IA2(2) = 1
            DO J = A_GLOB%IA2(I_COUNT), A_GLOB%IA2(I_COUNT+1)-1
              BLCK%ASPK(BLCK%IA2(2)) = A_GLOB%ASPK(J)
              BLCK%IA1(BLCK%IA2(2)) = A_GLOB%IA1(J)
              BLCK%IA2(2) =BLCK%IA2(2) + 1
            ENDDO
            LL = BLCK%IA2(2) - 1
            IF (K_COUNT == MYPROW) THEN
              BLCK%INFOA(1) = LL
              BLCK%INFOA(2) = LL
              BLCK%INFOA(3) = 2
              BLCK%INFOA(4) = 1
              BLCK%INFOA(5) = 1
              BLCK%INFOA(6) = 1
              BLCK%M = 1
              BLCK%K = NROW

              CALL F90_PSSPINS(A,I_COUNT,1,BLCK,IERRV,DESC_A)
              CALL F90_PSDSINS(1,B,I_COUNT,B_GLOB(I_COUNT:I_COUNT),&
                   &IERRV,DESC_A)
            ELSE
              CALL IGESD2D(ICONTXT,1,1,LL,1,K_COUNT,0)
              CALL IGESD2D(ICONTXT,LL,1,BLCK%IA1,LL,K_COUNT,0)
              CALL ZGESD2D(ICONTXT,LL,1,BLCK%ASPK,LL,K_COUNT,0)
              CALL ZGESD2D(ICONTXT,1,1,B_GLOB(I_COUNT),1,K_COUNT,0)
              CALL IGERV2D(ICONTXT,1,1,LL,1,K_COUNT,0)
            ENDIF
          ELSE IF (MYPROW /= ROOT) THEN
            IF (K_COUNT == MYPROW) THEN
              CALL IGERV2D(ICONTXT,1,1,LL,1,ROOT,0)
              BLCK%IA2(1) = 1
              BLCK%IA2(2) = LL+1
              CALL IGERV2D(ICONTXT,LL,1,BLCK%IA1,LL,ROOT,0)
              CALL ZGERV2D(ICONTXT,LL,1,BLCK%ASPK,LL,ROOT,0)
              CALL ZGERV2D(ICONTXT,1,1,B_GLOB(I_COUNT),1,ROOT,0)
              CALL IGESD2D(ICONTXT,1,1,LL,1,ROOT,0)
              BLCK%M = 1
              BLCK%K = NROW          
              CALL F90_PSSPINS(A,I_COUNT,1,BLCK,IERRV,DESC_A)
              CALL F90_PSDSINS(1,B,I_COUNT,B_GLOB(I_COUNT:I_COUNT),&
                   &IERRV,DESC_A)
            ENDIF
          ENDIF
        END DO
        I_COUNT = I_COUNT + 1
      endif
    END DO
    ! Default storage format for sparse matrix; we do not
    ! expect duplicated entries.

    AFMT = 'CSR'
    CALL F90_PSSPASB(A,IERRV,DESC_A,DUP=1)     
    CALL F90_PSDSASB(B,IERRV,DESC_A)     
    DEALLOCATE(BLCK%ASPK,BLCK%IA1,BLCK%IA2,IWORK)   
    IF (MYPROW == root) Write (*, FMT = *) 'End matdist'     
    RETURN   
  END SUBROUTINE ZMATDIST

END MODULE MAT_DIST
