      PROGRAM DLUTIMER
*
*     Simple Timing Program for ScaLAPACK routine for LU factiorization
*     and solver
*
*     The program must be driven by a short data file. The name of
*     the data file is 'LU.dat'. An annotated example of a data
*     file can be obtained by deleting the first 6 characters from
*     the following 5 lines: (The number in the first line is the
*     number of different problems the user wants to test. If it
*     is 'n', the user should input exactly 'n' numbers in each line
*     after. Also, this program will use the first column of N, NB,
*     P, Q to do the first test, second column of N, NB, P, Q to do
*     the second test, etc.)
*     2                 number of problems sizes
*     500 600           values of N (N x N square matrix)
*     64 64             values of NB
*     2 2               values of P
*     2 2               values of Q
*
*     .. Parameters ..
      INTEGER            CSRC, DBLESZ, INTGSZ, TOTMEM, MEMSIZ, NBRHS,
     $                   NOUT, NRHS, NTESTS, RSRC, CSRC_, DLEN_, LLD_,
     $                   M_, MB_, N_, NB_, RSRC_
      PARAMETER          ( CSRC = 0, DBLESZ = 8, INTGSZ = 4,
     $                   TOTMEM = 40000000, MEMSIZ = TOTMEM / DBLESZ,
     $                   NBRHS = 1, NOUT = 6, NRHS = 1, NTESTS = 20,
     $                   RSRC = 0, CSRC_ = 8, DLEN_ = 9, LLD_ = 9,
     $                   M_ = 3, MB_ = 5, N_ = 4, NB_ = 6, RSRC_ = 7 )
      DOUBLE PRECISION   ONE, ZERO
      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
*     ..
*     .. Local Scalars ..
      INTEGER            I, IAM, IASEED, IBSEED, ICTXT, INFO, IPA, IPB,
     $                   IPPIV, IPW, LIPIV, LLDA, LLDB, MYCOL, MYROW,
     $                   NCOLA, NCOLB, NMAT, NPA, NPB, NPROCS, NQA, NQB
      DOUBLE PRECISION   ANORM, NOPS, SRESID, TMFLOPS, XNORM
*     ..
*     .. Local Arrays ..
      INTEGER            DESCA( DLEN_ ), DESCB( DLEN_ ), IERR( 1 ),
     $                   N( NTESTS ), NB( NTESTS ), NPCOL( NTESTS ),
     $                   NPROW( NTESTS )
      DOUBLE PRECISION   MEM( MEMSIZ ), WTIME( 2 )
*     ..
*     .. External Functions ..
      INTEGER            ICEIL, NUMROC
      DOUBLE PRECISION   PDLANGE
      EXTERNAL           ICEIL, NUMROC, PDLANGE
*     ..
*     .. External Subroutines ..
      EXTERNAL           BLACS_EXIT, BLACS_GET, BLACS_GRIDEXIT,
     $                   BLACS_GRIDINFO, BLACS_GRIDINIT, BLACS_PINFO,
     $                   DESCINIT, GETINFO, IGSUM2D, PDGETRF, PDGETRS,
     $                   PDLASCHK, PDMATGEN, SLBOOT, SLCOMBINE, SLTIMER
*     ..
*     .. Intrinsic Functions ..
      INTRINSIC          DBLE, MAX
*     ..
*     .. Executable Statements ..
*
*     Getting start information
*
      CALL BLACS_PINFO( IAM, NPROCS )
      IASEED = 100
      IBSEED = 200
      CALL GETINFO( NMAT, N, NTESTS, NB, NPROW, NPCOL, MEM, IAM,
     $              NPROCS )
*
*     Print out headings
*
      IF( IAM.EQ.0 ) THEN
         WRITE( NOUT, FMT = 9999 )
         WRITE( NOUT, FMT = 9983 )NPROCS
         WRITE( NOUT, FMT = 9998 )
         WRITE( NOUT, FMT = 9997 )
      END IF
*
*     Begin the loop
*
      DO 10 I = 1, NMAT
*
*        Make sure grid information is correct
*
         IERR( 1 ) = 0
         IF( NPROW( I ).LT.1 ) THEN
            IF( IAM.EQ.0 )
     $         WRITE( NOUT, FMT = 9996 )'GRID', 'nprow', NPROW( I )
            IERR( 1 ) = 1
         ELSE IF( NPCOL( I ).LT.1 ) THEN
            IF( IAM.EQ.0 )
     $         WRITE( NOUT, FMT = 9996 )'GRID', 'npcol', NPCOL( I )
            IERR( 1 ) = 1
         ELSE IF( NPROW( I )*NPCOL( I ).GT.NPROCS ) THEN
            IF( IAM.EQ.0 )
     $         WRITE( NOUT, FMT = 9995 )NPROW( I )*NPCOL( I ), NPROCS
            IERR( 1 ) = 1
         END IF
*
         IF( IERR( 1 ).GT.0 ) THEN
            IF( IAM.EQ.0 )
     $         WRITE( NOUT, FMT = 9992 )'grid'
            GO TO 10
         END IF
*
*        Make sure matrix information is correct
*
         IF( N( I ).LT.1 ) THEN
            IF( IAM.EQ.0 ) THEN
               WRITE( NOUT, FMT = 9996 )'MATRIX', 'N', N( I )
               WRITE( NOUT, FMT = 9992 )'matrix'
            END IF
            GO TO 10
         END IF
*
*        Make sure nb is legal
*
         IF( NB( I ).LT.1 ) THEN
            IF( IAM.EQ.0 ) THEN
               WRITE( NOUT, FMT = 9996 )'NB', 'NB', NB( I )
               WRITE( NOUT, FMT = 9992 )'NB'
            END IF
            GO TO 10
         END IF
*
*        Initialize the process grid
*
         CALL BLACS_GET( -1, 0, ICTXT )
         CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW( I ),
     $                        NPCOL( I ) )
         CALL BLACS_GRIDINFO( ICTXT, NPROW( I ), NPCOL( I ), MYROW,
     $                        MYCOL )
*
*        Go to bottom of loop if this case doesn't use my process
*
         IF( MYROW.GE.NPROW( I ) .OR. MYCOL.GE.NPCOL( I ) )
     $      GO TO 10
*
*        Distribute the matrix on the process grid
*        Initialize the array descriptors for the matrices A and B
*
         NPA = NUMROC( N( I ), NB( I ), MYROW, RSRC, NPROW( I ) )
         LLDA = MAX( 1, NPA )
         NQA = NUMROC( N( I ), NB( I ), MYCOL, CSRC, NPCOL( I ) )
         NCOLA = MAX( 1, NQA )
         NPB = NUMROC( N( I ), NB( I ), MYROW, RSRC, NPROW( I ) )
         LLDB = MAX( 1, NPB )
         NQB = NUMROC( NRHS, NB( I ), MYCOL, CSRC, NPCOL( I ) )
         NCOLB = MAX( 1, NQB )
*
         CALL DESCINIT( DESCA, N( I ), N( I ), NB( I ), NB( I ), RSRC,
     $                  CSRC, ICTXT, LLDA, INFO )
         CALL DESCINIT( DESCB, N( I ), NRHS, NB( I ), NBRHS, RSRC, CSRC,
     $                  ICTXT, LLDB, INFO )
*
*        Assign pointers into MEM for SCALAPACK arrays
*
         IPA = 1
         IPB = IPA + LLDA*NCOLA
         IPPIV = IPB + LLDB*NCOLB
         LIPIV = ICEIL( INTGSZ*( LLDA+NB( I ) ), DBLESZ )
         IPW = IPPIV + LIPIV
*
*        Check for adequate memory for problem size
*
         IERR( 1 ) = 0
         IF( IPW.GT.MEMSIZ ) THEN
            IF( IAM.EQ.0 )
     $         WRITE( NOUT, FMT = 9991 )'LU-solve', IPW*DBLESZ
            IERR( 1 ) = 1
         END IF
*
*        Check all processes for an error
*
         CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 )
         IF( IERR( 1 ).GT.0 ) THEN
            IF( IAM.EQ.0 )
     $         WRITE( NOUT, FMT = 9992 )'MEMORY'
            GO TO 10
         END IF
*
*        Generate matrices A and B and distribute to the process grid
*
         CALL PDMATGEN( ICTXT, 'NO', 'NO', DESCA( M_ ), DESCA( N_ ),
     $                  DESCA( MB_ ), DESCA( NB_ ), MEM( IPA ),
     $                  DESCA( LLD_ ), DESCA( RSRC_ ), DESCA( CSRC_ ),
     $                  IASEED, 0, NPA, 0, NQA, MYROW, MYCOL,
     $                  NPROW( I ), NPCOL( I ) )
*
         CALL PDMATGEN( ICTXT, 'NO', 'NO', DESCB( M_ ), DESCB( N_ ),
     $                  DESCB( MB_ ), DESCB( NB_ ), MEM( IPB ),
     $                  DESCB( LLD_ ), DESCB( RSRC_ ), DESCB( CSRC_ ),
     $                  IBSEED, 0, NPB, 0, NQB, MYROW, MYCOL,
     $                  NPROW( I ), NPCOL( I ) )
         ANORM = PDLANGE( 'I', DESCA( M_ ), DESCA( N_ ), MEM( IPA ), 1,
     $           1, DESCA, MEM( IPW ) )
*
*        CALL THE SCALAPACK ROUTINE PDGESV
*        Solve the linear system A * X = B
*
         CALL SLBOOT
         CALL SLTIMER( 1 )
         CALL PDGETRF( N( I ), N( I ), MEM( IPA ), 1, 1, DESCA,
     $                 MEM( IPPIV ), INFO )
*
         CALL SLTIMER( 1 )
*
         IF( INFO.EQ.0 ) THEN
            CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 1, 1, WTIME )
         END IF
*
         CALL SLTIMER( 2 )
         CALL PDGETRS( 'No', N( I ), NRHS, MEM( IPA ), 1, 1, DESCA,
     $                 MEM( IPPIV ), MEM( IPB ), 1, 1, DESCB, INFO )
         CALL SLTIMER( 2 )
         XNORM = PDLANGE( 'I', DESCB( M_ ), DESCB( N_ ), MEM( IPB ), 1,
     $           1, DESCA, MEM( IPW ) )
*
*
         IF( INFO.EQ.0 ) THEN
            CALL PDLASCHK( 'No', 'N', DESCA( N_ ), 1, MEM( IPB ), 1, 1,
     $                     DESCB, IASEED, 1, 1, DESCA, IBSEED, ANORM,
     $                     SRESID, MEM( IPW ) )
*
*           Gather max. of WALL clock timings
*
            CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 2, 1, WTIME )
*
*
*           Print result
*
            IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN
*
*              2/3 N^3 -1/2 N^2 flops for LU factorization
*              2 N^2 flops for LU solve
*
               NOPS = ( 2.0D+0 / 3.0D+0 )*( DBLE( N( I ) )**3 ) -
     $                ( ONE / 2.0D+0 )*( DBLE( N( I ) )**2 ) +
     $                2.0D+0*( DBLE( N( I ) )**2 )
*
*              Calculate total megaflops over WALL time, and
*              print output
*
               IF( WTIME( 1 ).GT.ZERO ) THEN
                  TMFLOPS = NOPS / ( ( WTIME( 1 )+WTIME( 2 ) )*1.0D+6 )
               ELSE
                  TMFLOPS = ZERO
               END IF
*
               IF( WTIME( 1 ).GT.ZERO )
     $            WRITE( NOUT, FMT = 9994 )'WALL', N( I ), NB( I ),
     $            NPROW( I ), NPCOL( I ), WTIME( 1 ), WTIME( 2 ),
     $            TMFLOPS, SRESID, 'PASSED'
*
            END IF
*
         ELSE
            WRITE( NOUT, FMT = 9993 )
         END IF
*
*        RELEASE THE PROCESS GRID
*        Free the BLACS context
*
         CALL BLACS_GRIDEXIT( ICTXT )
*
*     End the Loop
*
   10 CONTINUE
*
*     Comments for the output
*
      IF( IAM.EQ.0 ) THEN
         WRITE( NOUT, FMT = 9990 )
         WRITE( NOUT, FMT = 9989 )
         WRITE( NOUT, FMT = 9988 )
         WRITE( NOUT, FMT = 9987 )
         WRITE( NOUT, FMT = 9986 )
         WRITE( NOUT, FMT = 9985 )
         WRITE( NOUT, FMT = 9984 )
      END IF
*
*     Exit the BLACS
*
      CALL BLACS_EXIT( 0 )
*
*
 9999 FORMAT( / 'Simple Timer for ScaLAPACK routine PDGESV' )
 9998 FORMAT( / 'TIME     N  NB   P   Q  LU Time   Sol Time',
     $      '  MFLOP/S Residual  CHECK' )
 9997 FORMAT( '---- ----- --- --- --- --------- --------- --------',
     $      ' -------- -------' )
 9996 FORMAT( 'ILLEGAL ', A6, ': ', A5, ' = ', I3,
     $      '; It should be at least 1' )
 9995 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', I4, '. It can be at most',
     $      I4 )
 9994 FORMAT( A4, 1X, I5, 1X, I3, 1X, I3, 1X, I3, 1X, F9.2, 1X, F9.2,
     $      1X, F8.2, 1X, F8.6, 1X, A6 )
 9993 FORMAT( ' Info from PDGESV is not zero' )
 9992 FORMAT( 'Bad ', A6, ' parameters: going to next test case.' )
 9991 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least',
     $      I11 )
 9990 FORMAT( / 'Comments for the output:' )
 9989 FORMAT( 'Column 1: the time is the wall clock time' )
 9988 FORMAT( 'Column 2 & 3: matrix size (N by N) and block size',
     $      ' (NB by NB)' )
 9987 FORMAT( 'Column 4 & 5: grid size (P by Q)' )
 9986 FORMAT( 'Column 6: total time (in seconds) to run PDGESV' )
 9985 FORMAT( 'Column 7: mega flops per seconds' )
 9984 FORMAT( 'Column 8: if PDGESV works fine' )
 9983 FORMAT( 'Number of processors used: ', I3 )
      STOP
      END
*
*
      SUBROUTINE GETINFO( NMAT, NVAL, LDNVAL, NBVAL, PVAL, QVAL, WORK,
     $                    IAM, NPROCS )
*
*     .. Scalar Arguments ..
      INTEGER            IAM, LDNVAL, NMAT, NPROCS
*     ..
*     .. Array Arguments ..
*
      INTEGER            NBVAL( LDNVAL ), NVAL( LDNVAL ),
     $                   PVAL( LDNVAL ), QVAL( LDNVAL ), WORK( * )
*     ..
*     .. Parameters ..
      INTEGER            NIN, NOUT
      PARAMETER          ( NIN = 11, NOUT = 6 )
*     ..
*     .. Local Scalars ..
      INTEGER            I, ICTXT
*     ..
*     .. External Subroutines ..
      EXTERNAL           BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT,
     $                   BLACS_GRIDINIT, BLACS_SETUP, ICOPY, IGEBR2D,
     $                   IGEBS2D
*     ..
*     .. Intrinsic Functions ..
      INTRINSIC          MAX
*     ..
*     .. Executable Statements ..
*
*     Proecss 0 reads the input data, broadcasts to other processes
*
      IF( IAM.EQ.0 ) THEN
*
*        Open file and read in user-supplied info
*
         OPEN( NIN, FILE = 'LUTIME.dat', STATUS = 'OLD' )
         READ( NIN, FMT = * )NMAT
         IF( NMAT.LT.1 .OR. NMAT.GT.LDNVAL ) THEN
            WRITE( NOUT, FMT = 9999 )'N', LDNVAL
            GO TO 20
         END IF
         READ( NIN, FMT = * )( NVAL( I ), I = 1, NMAT )
         READ( NIN, FMT = * )( NBVAL( I ), I = 1, NMAT )
         READ( NIN, FMT = * )( PVAL( I ), I = 1, NMAT )
         READ( NIN, FMT = * )( QVAL( I ), I = 1, NMAT )
         CLOSE ( NIN )
*
*        For pvm only: if virtual machine not set up, allocate it
*        and spawn the correct number of processes
*
         IF( NPROCS.LT.1 ) THEN
            NPROCS = 0
            DO 10 I = 1, NMAT
               NPROCS = MAX( NPROCS, PVAL( I )*QVAL( I ) )
   10       CONTINUE
            CALL BLACS_SETUP( IAM, NPROCS )
         END IF
*
*        Temporarily define blacs grid to include all processes
*        so information can be broadcast to all processes
*
         CALL BLACS_GET( -1, 0, ICTXT )
         CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS )
*
*        Pack infomation array and broadcast
*
         CALL IGEBS2D( ICTXT, 'All', ' ', 1, 1, NMAT, 1 )
*
         I = 1
         CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 )
         I = I + NMAT
         CALL ICOPY( NMAT, NBVAL, 1, WORK( I ), 1 )
         I = I + NMAT
         CALL ICOPY( NMAT, PVAL, 1, WORK( I ), 1 )
         I = I + NMAT
         CALL ICOPY( NMAT, QVAL, 1, WORK( I ), 1 )
         I = I + NMAT - 1
         CALL IGEBS2D( ICTXT, 'All', ' ', I, 1, WORK, I )
*
      ELSE
*
*        If in pvm, must participate setting up virtual machine
*
         IF( NPROCS.LT.1 )
     $      CALL BLACS_SETUP( IAM, NPROCS )
*
*        Temporarily define blacs grid to include all processes
*        so information can be broadcast to all processes
*
         CALL BLACS_GET( -1, 0, ICTXT )
         CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS )
*
         CALL IGEBR2D( ICTXT, 'All', ' ', 1, 1, NMAT, 1, 0, 0 )
         I = 4*NMAT
         CALL IGEBR2D( ICTXT, 'All', ' ', I, 1, WORK, I, 0, 0 )
         I = 1
         CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 )
         I = I + NMAT
         CALL ICOPY( NMAT, WORK( I ), 1, NBVAL, 1 )
         I = I + NMAT
         CALL ICOPY( NMAT, WORK( I ), 1, PVAL, 1 )
         I = I + NMAT
         CALL ICOPY( NMAT, WORK( I ), 1, QVAL, 1 )
*
      END IF
*
      CALL BLACS_GRIDEXIT( ICTXT )
*
      RETURN
*
   20 CONTINUE
      WRITE( NOUT, FMT = 9998 )'LUTIME.dat'
      CLOSE ( NIN )
      CALL BLACS_ABORT( ICTXT, 1 )
*
      STOP
*
 9999 FORMAT( ' Number of values of ', A, ' is less than 1 or greater ',
     $      ' than ', I2 )
 9998 FORMAT( ' Illegal input in file ', A, '. Aborting run.' )
*
      END
