real a,x integer list common /prbdef/n,lda,a(10,10),x(10) common /debug/idebug,lockd c nproc = 10 idebug = 1 c lda = 10 n = 5 write(6,*)' fill the matrix and arrays' do 20 i = 1,n x(i) = 0.0 do 10 j = 1,i a(i,j) = (i-1)*n + j x(i) = x(i) + a(i,j) 10 continue 20 continue write(6,*)' the matrix' call out(a,lda,n,n) write(6,*)' the vector' call out(x,1,n,1) write(6,*)' about to call libopn' c call init(nproc) c call graph() c call sched(nproc) c write(6,*)' returned from libopn' write(6,*)' the solution' call out(x,1,n,1) stop end subroutine out(a,lda,m,n) real a(lda,*) do 10 i = 1,m write(6,*)(a(i,j),j=1,n) 10 continue return end subroutine work(id) c c *** common area variables *** c integer gtprb integer nproc c integer list(100) common /prbdef/n,lda,a(10,10),x(10) common /debug/idebug,lockd c c *** local variables *** c c c *** initialize variables ***/ c if( idebug .eq. 1 ) then call lockon(lockd) write(6,*)'work: started id =',id call lockoff(lockd) endif if( id .eq. 1 ) then c if( idebug .eq. 1 ) then call lockon(lockd) write(6,*)'work: id = ',id, 'call start ' call lockoff(lockd) endif c call start endif 10 continue myjob = gtprb(jobtag) call gtprms(jobtag,list) if( idebug .eq. 1 ) then call lockon(lockd) write(6,*) 'work: id = ',id,'myjob = ',myjob, $ 'jobtag = ',jobtag call lockoff(lockd) endif c goto (100,200,300,400),myjob c 100 continue return c 200 continue list1 = list(1) list2= list(2) if( idebug .eq. 1 ) then call lockon(lockd) write(6,*)'work: id =',id,'about to call strsv, list1,list2', $ list1,list2 call lockoff(lockd) endif call strsv('low','n','n',list1,a(list2,list2),lda,x(list2),1) call chekin(jobtag) go to 10 c 300 continue list1 = list(1) list2 = list(2) list3 = list(3) list4 = list(4) if( idebug .eq. 1 ) then call lockon(lockd) write(6,*)'work: id =',id, $ 'about to call sgemv,list1,list2,list3,list4' write(6,*)' ',list1,list2,list3,list4 call lockoff(lockd) endif call sgemv('n',list1,list2,-1.0,a(list3,list4),lda, $ x(list4),1,1.0,x(list3),1) call chekin(jobtag) go to 10 c 400 continue c return end C C*********************************************************************** C C File of the proposed REAL Level 2 BLAS routines: C C SGEMV, SGBMV, SSYMV, SSBMV, SSPMV, STRMV, STBMV, STPMV, C SGER , SSYR , SSPR , C SSYR2, SSPR2, C STRSV, STBSV, STPSV. C C See: C C Dongarra J. J., Du Croz J. J., Hammarling S. and Hanson R. J.. C A proposal for an extended set of Fortran Basic Linear Algebra C Subprograms. Technical Memorandum No.41 (revision 1), C Mathematics and Computer Science Division, Argone National C Laboratory, 9700 South Cass Avenue, Argonne, Illinois 60439, C USA. C C*********************************************************************** C SUBROUTINE SGEMV ( TRANS, M, N, ALPHA, A, LDA, X, INCX, $ BETA, Y, INCY ) CHARACTER*1 TRANS INTEGER M, N, LDA, INCX, INCY REAL ALPHA, A( LDA, * ), X( * ), BETA REAL Y( * ) * * Purpose * ======= * * SGEMV performs one of the matrix-vector operations * * y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y, * * where alpha and beta are scalars, x and y are vectors and A is an * m by n matrix. * * Parameters * ========== * * TRANS - CHARACTER*1. * On entry, TRANS specifies the operation to be performed as * follows: * * TRANS = 'N' or 'n' y := alpha*A*x + beta*y. * * TRANS = 'T' or 't' y := alpha*A'*x + beta*y. * * TRANS = 'C' or 'c' y := alpha*A'*x + beta*y *. * Unchanged on exit. * * M - INTEGER. * On entry, M specifies the number of rows of the matrix A. * M must be at least zero. * Unchanged on exit. * * N - INTEGER. * On entry, N specifies the number of columns of the matrix A. * N must be at least zero. * Unchanged on exit. * * ALPHA - REAL . * On entry, ALPHA specifies the scalar alpha. * Unchanged on exit. * * A - REAL array of DIMENSION ( LDA, n ). * Before entry, the leading m by n part of the array A must * contain the matrix of coefficients. * Unchanged on exit. * * LDA - INTEGER. * On entry, LDA specifies the leading dimension of A as * declared in the calling (sub) program. LDA must be at least * m. * Unchanged on exit. * * X - REAL array of DIMENSION at least * ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' * and at least * ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. * Before entry, the incremented array X must contain the * vector x. * Unchanged on exit. * * INCX - INTEGER. * On entry, INCX specifies the increment for the elements of * X. * Unchanged on exit. * * BETA - REAL . * On entry, BETA specifies the scalar beta. When BETA is * supplied as zero then Y need not be set on input. * Unchanged on exit. * * Y - REAL array of DIMENSION at least * ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' * and at least * ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. * Before entry with BETA non-zero, the incremented array Y * must contain the vector y. On exit, Y is overwritten by the * updated vector y. * * INCY - INTEGER. * On entry, INCY specifies the increment for the elements of * Y. * Unchanged on exit. * * * Note that TRANS, M, N and LDA must be such that the value of the * LOGICAL variable OK in the following statement is true. * * OK = ( ( TRANS.EQ.'N' ).OR.( TRANS.EQ.'n' ).OR. * $ ( TRANS.EQ.'T' ).OR.( TRANS.EQ.'t' ).OR. * $ ( TRANS.EQ.'C' ).OR.( TRANS.EQ.'c' ) ) * $ .AND. * $ ( M.GE.0 ) * $ .AND. * $ ( N.GE.0 ) * $ .AND. * $ ( LDA.GE.M ) * * * * Level 2 Blas routine. * * -- Written on 30-August-1985. * Sven Hammarling, Nag Central Office. * INTEGER I , IX , IY , J , JX , JY INTEGER KX , KY , LENX , LENY REAL ONE PARAMETER ( ONE = 1.0E+0 ) REAL ZERO PARAMETER ( ZERO = 0.0E+0 ) REAL TEMP c common /debug/idebug,lockd c if( idebug .eq. 1 ) then c call lockon(lockd) c write(6,*)'in sgemv' c write(6,*)'lda =',lda c write(6,*)'a' c call out(a,lda,n,n) c write(6,*)'x' c call out(x,1,n,1) c call lockoff(lockd) c endif * * Quick return if possible. * IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR. $ ( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) $ RETURN * * Set LENX and LENY, the lengths of the vectors x and y. * IF( ( TRANS.EQ.'N' ).OR.( TRANS.EQ.'n' ) )THEN LENX = N LENY = M ELSE LENX = M LENY = N END IF * * Start the operations. In this version the elements of A are * accessed sequentially with one pass through A. * * First form y := beta*y and set up the start points in X and Y if * the increments are not both unity. * IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN IF( BETA.NE.ONE )THEN IF( BETA.EQ.ZERO )THEN DO 10, I = 1, LENY Y( I ) = ZERO 10 CONTINUE ELSE DO 20, I = 1, LENY Y( I ) = BETA*Y( I ) 20 CONTINUE END IF END IF ELSE IF( INCX.GT.0 )THEN KX = 1 ELSE KX = 1 - ( LENX - 1 )*INCX END IF IF( INCY.GT.0 )THEN KY = 1 ELSE KY = 1 - ( LENY - 1 )*INCY END IF IF( BETA.NE.ONE )THEN IY = KY IF( BETA.EQ.ZERO )THEN DO 30, I = 1, LENY Y( IY ) = ZERO IY = IY + INCY 30 CONTINUE ELSE DO 40, I = 1, LENY Y( IY ) = BETA*Y( IY ) IY = IY + INCY 40 CONTINUE END IF END IF END IF IF( ALPHA.EQ.ZERO ) $ RETURN IF( ( TRANS.EQ.'N' ).OR.( TRANS.EQ.'n' ) )THEN * * Form y := alpha*A*x + y. * IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN DO 60, J = 1, N IF( X( J ).NE.ZERO )THEN TEMP = ALPHA*X( J ) DO 50, I = 1, M Y( I ) = Y( I ) + TEMP*A( I, J ) 50 CONTINUE END IF 60 CONTINUE ELSE JX = KX DO 80, J = 1, N IF( X( JX ).NE.ZERO )THEN TEMP = ALPHA*X( JX ) IY = KY DO 70, I = 1, M Y( IY ) = Y( IY ) + TEMP*A( I, J ) IY = IY + INCY 70 CONTINUE END IF JX = JX + INCX 80 CONTINUE END IF ELSE * * Form y := alpha*A'*x + y. * IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN DO 100, J = 1, N TEMP = ZERO DO 90, I = 1, M TEMP = TEMP + A( I, J )*X( I ) 90 CONTINUE Y( J ) = Y( J ) + ALPHA*TEMP 100 CONTINUE ELSE JY = KY DO 120, J = 1, N TEMP = ZERO IX = KX DO 110, I = 1, M TEMP = TEMP + A( I, J )*X( IX ) IX = IX + INCX 110 CONTINUE Y( JY ) = Y( JY ) + ALPHA*TEMP JY = JY + INCY 120 CONTINUE END IF END IF RETURN * * End of SGEMV . * END * ************************************************************************ * SUBROUTINE STRSV ( UPLO, TRANS, DIAG, N, A, LDA, X, INCX ) CHARACTER*1 UPLO, TRANS, DIAG INTEGER N, LDA, INCX REAL A( LDA, * ) REAL X( * ) * * Purpose * ======= * * STRSV solves one of the systems of equations * * A*x = b, or A'*x = b, * * where b and x are n element vectors and A is an n by n unit, or * non-unit, upper or lower triangular matrix. * * No test for singularity or near-singularity is included in this * routine. Such tests must be performed before calling this routine. * * Parameters * ========== * * UPLO - CHARACTER*1. * On entry, UPLO specifies whether the matrix is an upper or * lower triangular matrix as follows: * * UPLO = 'U' or 'u' A is an upper triangular matrix. * * UPLO = 'L' or 'l' A is a lower triangular matrix. * * Unchanged on exit. * * TRANS - CHARACTER*1. * On entry, TRANS specifies the equations to be solved as * follows: * * TRANS = 'N' or 'n' A*x = b. * * TRANS = 'T' or 't' A'*x = b. * * TRANS = 'C' or 'c' A'*x = b. * * Unchanged on exit. * * DIAG - CHARACTER*1. * On entry, DIAG specifies whether or not A is unit * triangular as follows: * * DIAG = 'U' or 'u' A is assumed to be unit triangular. * * DIAG = 'N' or 'n' A is not assumed to be unit * triangular. * * Unchanged on exit. * * N - INTEGER. * On entry, N specifies the order of the matrix A. * N must be at least zero. * Unchanged on exit. * * A - REAL array of DIMENSION ( LDA, n ). * Before entry with UPLO = 'U' or 'u', the leading n by n * upper triangular part of the array A must contain the upper * triangular matrix and the strictly lower triangular part of * A is not referenced. * Before entry with UPLO = 'L' or 'l', the leading n by n * lower triangular part of the array A must contain the lower * triangular matrix and the strictly upper triangular part of * A is not referenced. * Note that when DIAG = 'U' or 'u', the diagonal elements of * A are not referenced either, but are assumed to be unity. * Unchanged on exit. * * LDA - INTEGER. * On entry, LDA specifies the first dimension of A as declared * in the calling (sub) program. LDA must be at least n. * Unchanged on exit. * * X - REAL array of dimension at least * ( 1 + ( n - 1 )*abs( INCX ) ). * Before entry, the incremented array X must contain the n * element right-hand side vector b. On exit, X is overwritten * with the solution vector x. * * INCX - INTEGER. * On entry, INCX specifies the increment for the elements of * X. * Unchanged on exit. * * * Note that UPLO, TRANS, DIAG, N and LDA must be such that the value of * the LOGICAL variable OK in the following statement is true. * * OK = ( ( UPLO.EQ.'U' ).OR.( UPLO.EQ.'u' ).OR. * $ ( UPLO.EQ.'L' ).OR.( UPLO.EQ.'l' ) ) * $ .AND. * $ ( ( TRANS.EQ.'N' ).OR.( TRANS.EQ.'n' ).OR. * $ ( TRANS.EQ.'T' ).OR.( TRANS.EQ.'t' ).OR. * $ ( TRANS.EQ.'C' ).OR.( TRANS.EQ.'c' ) ) * $ .AND. * $ ( ( DIAG.EQ.'U' ).OR.( DIAG.EQ.'u' ).OR. * $ ( DIAG.EQ.'N' ).OR.( DIAG.EQ.'n' ) ) * $ .AND. * $ ( N.GE.0 ) * $ .AND. * $ ( LDA.GE.N ) * * * * Level 2 Blas routine. * * -- Written on 30-September-1985. * Sven Hammarling, Nag Central Office. * LOGICAL NO UNIT INTEGER I , IX , J , JX , KX REAL ZERO PARAMETER ( ZERO = 0.0E+0 ) * * Quick return if possible. * c common /debug/idebug,lockd c if( idebug .eq. 1 ) then c call lockon(lockd) c write(6,*)'in strsv' c write(6,*)'lda =',lda c write(6,*)'a' c call out(a,lda,n,n) c write(6,*)'x' c call out(x,1,n,1) c call lockoff(lockd) c endif IF( N.EQ.0 ) $ RETURN NO UNIT = ( DIAG .EQ.'N' ).OR.( DIAG .EQ.'n' ) * * Set up the start point in X if the increment is not unity. This * will be ( N - 1 )*INCX too small for descending loops. * IF( INCX.LE.0 )THEN KX = 1 - ( N - 1 )*INCX ELSE IF( INCX.NE.1 )THEN KX = 1 END IF * * Start the operations. In this version the elements of A are * accessed sequentially with one pass through A. * IF( ( TRANS.EQ.'N' ).OR.( TRANS.EQ.'n' ) )THEN * * Form x := inv( A )*x. * IF( ( UPLO.EQ.'U' ).OR.( UPLO.EQ.'u' ) )THEN IF( INCX.EQ.1 )THEN DO 20, J = N, 1, -1 IF( X( J ).NE.ZERO )THEN IF( NO UNIT ) $ X( J ) = X( J )/A( J, J ) DO 10, I = J - 1, 1, -1 X( I ) = X( I ) - X( J )*A( I, J ) 10 CONTINUE END IF 20 CONTINUE ELSE JX = KX + ( N - 1 )*INCX DO 40, J = N, 1, -1 IF( X( JX ).NE.ZERO )THEN IF( NO UNIT ) $ X( JX ) = X( JX )/A( J, J ) IX = JX DO 30, I = J - 1, 1, -1 IX = IX - INCX X( IX ) = X( IX ) - X( JX )*A( I, J ) 30 CONTINUE END IF JX = JX - INCX 40 CONTINUE END IF ELSE IF( INCX.EQ.1 )THEN DO 60, J = 1, N IF( X( J ).NE.ZERO )THEN IF( NO UNIT ) $ X( J ) = X( J )/A( J, J ) DO 50, I = J + 1, N X( I ) = X( I ) - X( J )*A( I, J ) 50 CONTINUE END IF 60 CONTINUE ELSE JX = KX DO 80, J = 1, N IF( X( JX ).NE.ZERO )THEN IF( NO UNIT ) $ X( JX ) = X( JX )/A( J, J ) IX = JX DO 70, I = J + 1, N IX = IX + INCX X( IX ) = X( IX ) - X( JX )*A( I, J ) 70 CONTINUE END IF JX = JX + INCX 80 CONTINUE END IF END IF ELSE * * Form x := inv( A' )*x. * IF( ( UPLO.EQ.'U' ).OR.( UPLO.EQ.'u' ) )THEN IF( INCX.EQ.1 )THEN DO 100, J = 1, N DO 90, I = 1, J - 1 X( J ) = X( J ) - A( I, J )*X( I ) 90 CONTINUE IF( NO UNIT ) $ X( J ) = X( J )/A( J, J ) 100 CONTINUE ELSE JX = KX DO 120, J = 1, N IX = KX DO 110, I = 1, J - 1 X( JX ) = X( JX ) - A( I, J )*X( IX ) IX = IX + INCX 110 CONTINUE IF( NO UNIT ) $ X( JX ) = X( JX )/A( J, J ) JX = JX + INCX 120 CONTINUE END IF ELSE IF( INCX.EQ.1 )THEN DO 140, J = N, 1, -1 DO 130, I = N, J + 1, -1 X( J ) = X( J ) - A( I, J )*X( I ) 130 CONTINUE IF( NO UNIT ) $ X( J ) = X( J )/A( J, J ) 140 CONTINUE ELSE KX = KX + ( N - 1 )*INCX JX = KX DO 160, J = N, 1, -1 IX = KX DO 150, I = N, J + 1, -1 X( JX ) = X( JX ) - A( I, J )*X( IX ) IX = IX - INCX 150 CONTINUE IF( NO UNIT ) $ X( JX ) = X( JX )/A( J, J ) JX = JX - INCX 160 CONTINUE END IF END IF END IF RETURN * * End of STRSV . * END subroutine chekin(jobtag) CVD$G NOCONCUR integer jobtag c*********************************************************************** c c this subroutine records problem identified by jobtag is c done to appropriate nodes. these nodes are recorded in c parmq(i,jobtag) where nparms+5 .le. i .le. nchks+nparms+4 c checkin consists of decrementing the value in each of these c locations by 1. each of these is done in a critical section c protected by qlock(ichek) c c if the value in parmq(2,ichek) is 0 where ichek is a process c dependent upon this one then ichek is placed on the readyq c by entering the critical section protected by rtlock. the c pointer rtail to the tail of the readyq is incremented c unless task done is to be recorded. task done is placed on c the ready q and the pointer rtail left in place if nchks .eq. 0 c is found. c c see the common block description in libopn for more detail. c c*********************************************************************** integer parmq,readyq,qlock,hrlock,trlock,phead,ptail,rhead,rtail, * done,qtail,qtlock common /qdata/ parmq(50,1000),phead,ptail,readyq(1000),rhead, * rtail,qtail common /qsync/ qlock(1000),hrlock,trlock,done,qtlock c c c common block description: c c a complete common block description is given in the routine libopn c c**************************************************************************** c c check to see if this process has completed. if not do not checkin c offset = parmq(3,jobtag) + parmq(4,jobtag) + 5 if (parmq(offset,jobtag) .ne. 0) return c c the process has completed so chekin proceeds c nchks = parmq(3,jobtag) c c if this is the final process (indicated by nchks .eq. 0) then c record task done. do not advance the tail so task done sequence c is set. all subsequent gtprb queries will leave rhead .eq. rtail c with readyq(rhead) .eq. done. c if (nchks .eq. 0) then call lockon(trlock) readyq(rtail) = done call lockoff(trlock) return endif nparms = parmq(4,jobtag) do 50 j = nparms+5,nchks+nparms+4 mychek = parmq(j,jobtag) c c get unique access to the checkin node mychek c and checkin by decrementing the appropriate counter c mchkgo = 1 call lockon(qlock(mychek)) parmq(2,mychek) = parmq(2,mychek) - 1 mchkgo = parmq(2,mychek) call lockoff(qlock(mychek)) c c place mychek on readyq if parmq(2,mychek) is 0 c if (mchkgo .eq. 0 ) then call lockon(trlock) readyq(rtail) = mychek rtail = rtail + 1 call lockoff(trlock) endif 50 continue return end subroutine gtprms(jobtag,indx) CVD$G NOCONCUR c************************************************************************* c c this subroutine places the parameters recorded in parmq(i,jobtag) c for 5 .le. i .le. nparms + 4 into the first nparm locations of c the integer array indx in order of appearance in parmq. c c input parameters c c jobtag an integer identifying the parmq descriptor for c which the parameter request is made c c output parameters c c indx an integer array holding the values of the parameters c required to make the call requested by jobtag. c c************************************************************************* c integer jobtag,indx(1) integer parmq,readyq,qlock,hrlock,trlock,phead,ptail,rhead,rtail, * done,qtail,qtlock common /qdata/ parmq(50,1000),phead,ptail,readyq(1000),rhead, * rtail,qtail common /qsync/ qlock(1000),hrlock,trlock,done,qtlock c c c common block description: c c for a complete common block description see libopn c c c get parameters from the parmq c nparms = parmq(4,jobtag) do 100 j = 5,4+nparms indx(j-4) = parmq(j,jobtag) 100 continue return end integer function gtprb(jobtag) CVD$G NOCONCUR c************************************************************************** c c this routine gets unique access to the head of the readyq c strips off the value and returns with the process identifier in c the integer jobtag and with the subroutine request myjob in gtprb c c output parameters c c jobtag an integer containing the next process to be executed c c gtprb the return value of this integer function contains c identifier of the subroutine call to be made on return c from this routine. (see the work template for usage) c c*************************************************************************** integer parmq,readyq,qlock,hrlock,trlock,phead,ptail,rhead,rtail, * done,qtail,qtlock common /qdata/ parmq(50,1000),phead,ptail,readyq(1000),rhead, * rtail,qtail common /qsync/ qlock(1000),hrlock,trlock,done,qtlock c c c common block description: c c for a complete common block description see the routine libopn c c c*************************************************************************** c c 10 continue mhead = -1 call lockon(hrlock) c c gain access to head of readyq. if task done has not been recorded c then increment the head of the readyq. otherwise the head pointer c is left fixed so the next active process will receive task done. c if (rhead .lt. rtail) then mhead = rhead rhead = rhead + 1 endif call lockoff(hrlock) if (mhead .gt. 0) then c c there was a work unit on the readyq c jobtag = readyq(mhead) c if (jobtag .ne. done) then c c record the subroutine call identifier in gtprb and return c the process identifier in jobtag. c gtprb = parmq(1,jobtag) c else c c task done has been indicated. request a return from subroutine work c by returning the value 1 in gtprb. c gtprb = 1 c endif else c jobtag = readyq(rhead) if (jobtag .eq. done) then c c task done has been posted c gtprb = 1 c else c c there was not any work on the readyq c go to 10 c endif endif return end subroutine sched(nproc) integer nproc c************************************************************************ c c this routine sets locks and initializes variables c and then spawns nproc generic work routines. c c nproc is a positive integer. care should be taken to c match nproc to the number of physical processors c available. c c************************************************************************ integer parmq,readyq,qlock,hrlock,trlock,phead,ptail,rhead,rtail, * done,qtail,qtlock common /qdata/ parmq(50,1000),phead,ptail,readyq(1000),rhead, * rtail,qtail common /qsync/ qlock(1000),hrlock,trlock,done,qtlock c c c common block description: c c common/qdata/ c c parmq is a two dimensional integer array. each column of c this array represents a schedulable process. a process is c identified by its jobtag which corresponds to a unique c column of parmq. a column of parmq has the following c entries c c parmq(1,jobtag) = subroutine call identifier c an integer .ge. 2 which c will specify the appropriate c label in subroutine work c c parmq(2,jobtag) = icango c an integer specifying the number c of processes that must check in c before this process may scheduled c (ie. be placed on the ready queue) c c parmq(3,jobtag) = nchks c an integer specifying the number c of processes that this process c must checkin to. identifiers of c these processes are recorded below. c if nchks .eq. 0 then completion of c this process signifies completion of c task. c c parmq(4,jobtag) = nparms c an integer specifying the number c of parmeters needed to make a subroutine c call of type indicated by parmq(1,jobtag) c c parmq(i,jobtag) = indices specifying array indices needed c to make the subroutine call c 5 .le. i .le. nparms + 4 c c parmq(j,jobtag) = the jobtag of a process that depends c upon completion of this process c nparms + 5 .le. j .le. nchks + nparms + 4 c c parmq(nchks+nparms+5,jobtag) is reserved for c reentry information. if this value c is greater than 0 then the process c jobtag has spawned process below it c and will resume execution at a label c identified with the value in this location c (see routines spawn,enter,wait,prtspn) c c parmq(nchks+nparms+6,jobtag) is reserved for the number c kparms c of resume parameters needed to resume c computation at the indicated reentry c label. c c parmq(nchks+nparms+6+j,jobtag) 1 .le. j .le. kparms c contain the values of the resume c parameters. c c phead pointer to head of parmq c c ptail pointer to tail of parmq c c readyq a one dimensional integer array that holds the jobtags of those c processes that are ready to execute. c if readyq(j) .eq. done has been set then a return from subroutine c work is indicated. c c rhead is a pointer to the head of readyq c c rtail is a pointer to the tail of readyq c c qtail is a pointer to the next available space on the parmq c c common/sync/ c c qlock is an integer array of locks. there is one lock for each c column of parmq. the purpose of this lock is to ensure c unique access to a column of parmq during the checkin operation. c c hrlock is an integer lock. the purpose of this lock is to ensure c unique access to the pointer rhead to the head of the readyq. c c trlock is an integer lock. the purpose of this lock is to ensure c unique access to the pointer rtail to the tail of the readyq. c c done is a unique non positive integer set in libopn to indicate c task done. c c qtlock is a lock variable that protects access to qtail c c c set lock on pointer to head of readyq so c no process may start until all process and data dependencies c have been specified by the user supplied routine driver. c call lockon(hrlock) c c now spawn virtual processors. these generic work routines will c assume the identity of any schedulable process specified by driver. c write(6,*)' in sched, about to call work nproc times', nproc CVD$L CNCALL do 200 j = 1,nproc call work(j) 200 continue write(6,*)' in sched, workers started' return end subroutine putq(jobtag,myjob,icango,nchks,mchkin,nparms,indx) CVD$G NOCONCUR integer jobtag,myjob,icango,nchks,mchkin(1),nparms,indx(1) c************************************************************************* c c warning - this routine may only be used by driver in a static definition c of the data dependencies in the task. c c see the template for work and the example driver for usage c c this subroutine puts a problem on the queue. no synchronization c is necessary because each index of a column of parmq is associated c with a jobtag specified by the user and associated with a unique c schedulable process. the arguments of putq specify a process and are c placed in a column of jobq according to the menue specified in the c common block description given below. c c jobtag is an integer specifying a unique schedulable process c c myjob is a positive integer specifying a subroutine call. c the work subroutine label associated with this integer c through a computed goto will make the call when this c process is picked off of the readyq. it will only be c placed on the readyq when all data dependencies have been c satisfied. ie when all processes that this one depends upon c have checked in by decrementing the icango counter. c c icango is a positive integer specifying how many processes must check in c to this process before it can be placed on the readyq. c c nchks is the number of processes that depend upon the completion of c this process. c c mchkin is an integer array specifying the jobtags of the processes c which depend upon completion of this process. c c nparms is the number of parameters needed to specify the subroutine c call specified by myjob. c c indx is an integer array holding the indices of arrays specified in c user common area common/probdef/ which are required to make the c subroutine call specified by myjob. c c************************************************************************* c integer parmq,readyq,qlock,hrlock,trlock,phead,ptail,rhead,rtail, * done,qtail,qtlock common /qdata/ parmq(50,1000),phead,ptail,readyq(1000),rhead, * rtail,qtail common /qsync/ qlock(1000),hrlock,trlock,done,qtlock c c c common block description: c c for a complete common block description see the subroutine libopn c c c place process jobtag on the problem queue c qtail = qtail + 1 next = jobtag parmq(1,next) = myjob parmq(2,next) = icango parmq(3,next) = nchks parmq(4,next) = nparms c c specify identifiers of processes which depend on this one c do 50 j = 1,nchks parmq(j+4+nparms,next) = mchkin(j) 50 continue c c last entry indicates reenter point of parent spawning processes c parmq(nchks+5+nparms,next) = 0 c c specify the parameters needed to make the subroutine call indicated c by myjob. c do 100 j = 1,nparms parmq(j+4,next) = indx(j) 100 continue c c place this process on readyq if icango is 0 c when icango .eq. 0 this process does not depend on any c others. c if (icango .eq. 0 ) then call lockon(trlock) readyq(rtail) = next rtail = rtail + 1 call lockoff(trlock) endif return end subroutine start c c this routine allows parallel processing to start after user supplied c driver has completed by unlocking the head of the readyq c integer parmq,readyq,qlock,hrlock,trlock,phead,ptail,rhead,rtail, * done,qtail,qtlock common /qdata/ parmq(50,1000),phead,ptail,readyq(1000),rhead, * rtail,qtail common /qsync/ qlock(1000),hrlock,trlock,done,qtlock c c c for common block description see subroutine libopn. c call lockoff(hrlock) c return end subroutine spawn(ptag,myjob,nparms,indx) CVD$G NOCONCUR integer ptag,myjob,nparms,indx(1) c************************************************************************* c c this subroutine puts a problem on the queue. the problem will be c need to report completion (checkin) to the same nodes as the spawning c process. synchronization will be necessary because the icango entry c for each checkin node ( ie a column of putq ) must be incremented. c the arguments of spawn specify a process and are c placed in a column of jobq according to the menue specified in the c common block description given below. it is assumed that this c process has an icango = 0 so that it is placed on the readyq immediately. c c it should only be called if the proper set up has been put in place. c this is accomplished by using this subroutine in conjunction with c the subroutines enter, prtspn and the logical function wait. the required c syntax is c c call enter(jobtag,label,kndx) c go to (...,llll,...), label c . c . c . c if (nproc .gt. 0) call prtspn(jobtag) c do 100 j = 1,nproc c . c . (set parameters to define spawned process) c . c call spawn(jobtag,...) c 100 continue c . c . (record parameters needed to resume at label llll ) c . c label = l c if (nproc .gt. 0 .and. wait(jobtag,label,kparms,kndx)) return c llll continue c c c ptag is the jobtag of the spawning process c c c myjob is a positive integer specifying a subroutine call. c the work subroutine label associated with this integer c through a computed goto will make the call when this c process is picked off of the readyq. it will only be c placed on the readyq when all data dependencies have been c satisfied. ie when all processes that this one depends upon c have checked in by decrementing the icango counter. c c nparms is the number of parameters needed to specify the subroutine c call specified by myjob. c c indx is an integer array holding the indices of arrays specified in c user common area common/probdef/ which are required to make the c subroutine call specified by myjob. c c************************************************************************* c integer parmq,readyq,qlock,hrlock,trlock,phead,ptail,rhead,rtail, * done,qtail,qtlock common /qdata/ parmq(50,1000),phead,ptail,readyq(1000),rhead, * rtail,qtail common /qsync/ qlock(1000),hrlock,trlock,done,qtlock c c common block description: c c common/qdata/ c c parmq is a two dimensional integer array. each column of c this array represents a schedulable process. a process is c identified by its jobtag which corresponds to a unique c column of parmq. a column of parmq has the following c entries c c parmq(1,jobtag) = subroutine call identifier c an integer .ge. 2 which c will specify the appropriate c label in subroutine work c c parmq(2,jobtag) = icango c an integer specifying the number c of processes that must check in c before this process may scheduled c (ie. be placed on the ready queue) c c parmq(3,jobtag) = nchks c an integer specifying the number c of processes that this process c must checkin to. identifiers of c these processes are recorded below. c if nchks .eq. 0 then completion of c this process signifies completion of c task. c c parmq(4,jobtag) = nparms c an integer specifying the number c of parmeters needed to make a subroutine c call of type indicated by parmq(1,jobtag) c c parmq(i,jobtag) = indices specifying array indices needed c to make the subroutine call c 5 .le. i .le. nparms + 4 c c parmq(j,jobtag) = the jobtag of a process that depends c upon completion of this process c nparms + 5 .le. j .le. nchks + nparms + 4 c c phead pointer to head of parmq c c ptail pointer to tail of parmq c c readyq a one dimensional integer array that holds the jobtags of those c processes that are ready to execute. c if readyq(j) .eq. done has been set then a return from subroutine c work is indicated. c c rhead is a pointer to the head of readyq c c rtail is a pointer to the tail of readyq c c common/sync/ c c qlock is an integer array of locks. there is one lock for each c column of parmq. the purpose of this lock is to ensure c unique access to a column of parmq during the checkin operation. c c hrlock is an integer lock. the purpose of this lock is to ensure c unique access to the pointer rhead to the head of the readyq. c c trlock is an integer lock. the purpose of this lock is to ensure c unique access to the pointer rtail to the tail of the readyq. c c done is a unique non positive integer set in libopn to indicate c task done. c c******************************************************************************* c c declare local storage c integer next,mchkin,j c c place this process on the next slot in the problem queue c call lockon(qtlock) next = qtail qtail = qtail + 1 call lockoff(qtlock) parmq(1,next) = myjob parmq(2,next) = 0 parmq(3,next) = 1 parmq(4,next) = nparms c c record the identifier of parent process which depends on this one c also increment the parent process icango counter by 1 c parmq(nparms+5,next) = ptag call lockon(qlock(ptag)) parmq(2,ptag) = parmq(2,ptag) + 1 call lockoff(qlock(ptag)) c c indicate that this is not a reenter c parmq(nparms+6,next) = 0 c c c c specify the parameters needed to make the subroutine call indicated c by myjob. c do 100 j = 1,nparms parmq(j+4,next) = indx(j) 100 continue c c place this process on readyq since icango is 0 c when icango .eq. 0 this process does not depend on any c others. c call lockon(trlock) readyq(rtail) = next rtail = rtail + 1 call lockoff(trlock) return end logical function wait(jobtag,ienter,kparms,kndx) c integer jobtag,ienter,kparms,kndx(1) c***************************************************************************** c c this routine will allow process jobtag to continue after c spawned processes have all checked in. it should only be called if c processes have been spawned by jobtag through the use of c the subroutine spawn. this routine must be used in conjunction with c subroutine prtspn. the required syntax is c c call enter(jobtag,label,kndx) c go to (...,llll,...), label c . c . c . c if (nproc .gt. 0) call prtspn(jobtag) c do 100 j = 1,nproc c . c . (set parameters to define spawned process) c . c call spawn(jobtag,...) c 100 continue c . c . (record parameters needed to resume at label llll ) c . c label = l c if (nproc .gt. 0 .and. wait(jobtag,label,kparms,kndx)) return c llll continue c c if this subroutine returns a value of .true. then the calling process c jobtag should issue a return. if a value of .false. is returned then c the calling process jobtag should resume execution at the c statement immediately following the reference to wait (ie. at llll in c the example above. a return value .true. indicates that some spawned c process has not yet completed and checked in. a return value .false. c indicates all spawned processes have checked in. c c***************************************************************************** integer parmq,readyq,qlock,hrlock,trlock,phead,ptail,rhead,rtail, * done,qtail,qtlock common /qdata/ parmq(50,1000),phead,ptail,readyq(1000),rhead, * rtail,qtail common /qsync/ qlock(1000),hrlock,trlock,done,qtlock integer offset wait = .true. c c for common block description see subroutine libopn. c c c record the reentry point and the parameters c needed to resume computation at the reentry point c offset = parmq(4,jobtag) + parmq(3,jobtag) + 5 parmq(offset,jobtag) = ienter parmq(offset+1,jobtag) = kparms do 100 j = 1,kparms parmq(j+1+offset,jobtag) = kndx(j) 100 continue c c decrement the icango counter so this process can be rescheduled c as soon as all spawned processes have checked in. c icango = 1 call lockon(qlock(jobtag)) icango = parmq(2,jobtag) - 1 parmq(2,jobtag) = icango call lockoff(qlock(jobtag)) if (icango .eq. 0) then wait = .false. parmq(offset,jobtag) = 0 endif c c note if parmq(2,jobtag) .eq. 0 this process may just resume c include this in next version c return end subroutine prtspn(jobtag) c integer jobtag c****************************************************************************** c c this routine must be called before spawning processes c it sets the icango entry for jobtag to 1 to prevent race c condition from developing. it should only be called if c processes are to be spawned by jobtag through the use of c the subroutine spawn. this routine must be used in conjunction with c logical function wait and subroutine enter. the required syntax is c c call enter(jobtag,label,kndx) c go to (...,llll,...), label c . c . c . c if (nproc .gt. 0) call prtspn(jobtag) c do 100 j = 1,nproc c . c . (set parameters to define spawned process) c . c call spawn(jobtag,...) c 100 continue c . c . (record parameters needed to resume at label llll ) c . c label = l c if (nproc .gt. 0 .and. wait(jobtag,label,kparms,kndx)) return c llll continue c c****************************************************************************** c integer parmq,readyq,qlock,hrlock,trlock,phead,ptail,rhead,rtail, * done,qtail,qtlock common /qdata/ parmq(50,1000),phead,ptail,readyq(1000),rhead, * rtail,qtail common /qsync/ qlock(1000),hrlock,trlock,done,qtlock c integer offset c c for common block description see subroutine libopn. c c c set the icango entry to 1 c parmq(2,jobtag) = 1 c 100 continue return end subroutine enter(jobtag,label,kndx) integer jobtag,label,kndx(1) c****************************************************************************** c c this routine gets label to branch to when process jobtag needs c to be reentered. the label, number of resume parameters and c values of those parmeters are recorded in parmq(jobtag) at c the locations specified in the parmq description (see libopn). c c this routine must only be used in conjunction with c subroutines spawn, prtspn and the logical function wait. c the required syntax is c c call enter(jobtag,label,kndx) c go to (...,llll,...), label c . c . c . c if (nproc .gt. 0) call prtspn(jobtag) c do 100 j = 1,nproc c . c . (set parameters to define spawned process) c . c call spawn(jobtag,...) c 100 continue c . c . (record parameters needed to resume at label llll ) c . c label = l c if (nproc .gt. 0 .and. wait(jobtag,label,kparms,kndx)) return c llll continue c c input parameters c c jobtag the uniqe parmq identifier of this prcess c c output parameters c c label a nonnegative integer specifying a jump to a label c specified by the user c c kndx an integer array containing a list of resume parmeters c needed to resume execution at reentry point label. c c******************************************************************************* c integer parmq,readyq,qlock,hrlock,trlock,phead,ptail,rhead,rtail, * done,qtail,qtlock common /qdata/ parmq(50,1000),phead,ptail,readyq(1000),rhead, * rtail,qtail common /qsync/ qlock(1000),hrlock,trlock,done,qtlock c integer offset,kparms c c for common block description see subroutine libopn. c c place entry point in label c offset = parmq(4,jobtag) + parmq(3,jobtag) + 5 label = parmq(offset,jobtag) if (label .gt. 0) then kparms = parmq(offset+1,jobtag) do 100 j = 1,kparms kndx(j) = parmq(j+1+offset,jobtag) 100 continue endif c c clear reentry flag c parmq(offset,jobtag) = 0 c return end subroutine popq() write(6,*)' this routine should not be called' write(6,*)' I am stopping' stop end subroutine graph() integer list(100),idepid(100) common /prbdef/n,lda,a(10,10),x(10) common /debug/idebug,lockd n1 = n/3 if( idebug .eq. 1 ) then call lockon(lockd) write(6,*)'driver: n1',n1 call lockoff(lockd) endif c c c external strsv c call routin(strsv,'low','n','n',n1,a,lda,x,1) c c routin is a c routine that will put together c a Fortran call to the routine named as the first c arguement, with a calling sequence made up of the c rest of the parameters to routin. c c c call depend(id,ndpup,ndpdwn,depids) c c depend is a routine that specifies the dependence of c the call to routin. c id is the id number for this task (the number itself is c arbitrary c ndpup is the number of processes this call depends upon. c (number of dependences up the tree from this node) c ndpdwn is the number of processes dependent on this call. c (number of dependences down the tree from this node) c depids an array of the ids for the dependent processes. c c c (NOTE that routin must be followed by a call to depend) c c c call strsv('low','n','n',n1,a,lda,x,1) list(1) = n1 list(2) = 1 idepid(1) = 2 idepid(2) = 4 call putq(1,2,0,2,idepid,2,list) c c n2 = n1 if( idebug .eq. 1 ) then call lockon(lockd) write(6,*)'driver: n2',n2 call lockoff(lockd) endif c c c call sgemv('n',n2,n1,-1.0,a(n1+1,1),lda,x,1,1.0,x(n1+1),1) list(1) = n2 list(2) = n1 list(3) = n1+1 list(4) = 1 idepid(1) = 3 call putq(2,3,1,1,idepid,4,list) c n3 = n - n1 - n2 if( idebug .eq. 1 ) then call lockon(lockd) write(6,*)'driver: n3',n3 call lockoff(lockd) endif c c #4 c call sgemv('n',n3,n1,-1.0,a(n1+n2+1,1),lda,x,1,1.0,x(n1+n2+1),1) list(1) = n3 list(2) = n1 list(3) = n1+n2+1 list(4) = 1 idepid(1) = 6 call putq(4,3,1,1,idepid,4,list) c c c c call strsv('low','n','n',n2,a(n1+1,n1+1),lda,x(n1+1),1) list(1) = n2 list(2) = n1 + 1 idepid(1) = 5 call putq(3,2,1,1,idepid,2,list) c c c c c #5 c call sgemv('n',n3,n2,-1.0,a(n1+n2+1,n1+1),lda,x(n1+1),1,1.0, c $ x(n1+n2+1),1) list(1) = n3 list(2) = n2 list(3) = n1+n2+1 list(4) = n1+1 idepid(1) = 6 call putq(5,3,1,1,idepid,4,list) c c #6 c call strsv('low','n','n',n3,a(n1+n2+1,n1+n2+1),lda,x(n1+n2+1),1) list(1) = n3 list(2) = n1 + n2 + 1 idepid(1) = 0 call putq(6,2,2,0,idepid,2,list) c c return end subroutine init(nproc) integer nproc c************************************************************************ c c this routine sets locks and initializes variables c and then spawns nproc generic work routines. c c nproc is a positive integer. care should be taken to c match nproc to the number of physical processors c available. c c************************************************************************ integer parmq,readyq,qlock,hrlock,trlock,phead,ptail,rhead,rtail, * done,qtail,qtlock common /qdata/ parmq(50,1000),phead,ptail,readyq(1000),rhead, * rtail,qtail common /qsync/ qlock(1000),hrlock,trlock,done,qtlock c c c common block description: c c common/qdata/ c c parmq is a two dimensional integer array. each column of c this array represents a schedulable process. a process is c identified by its jobtag which corresponds to a unique c column of parmq. a column of parmq has the following c entries c c parmq(1,jobtag) = subroutine call identifier c an integer .ge. 2 which c will specify the appropriate c label in subroutine work c c parmq(2,jobtag) = icango c an integer specifying the number c of processes that must check in c before this process may scheduled c (ie. be placed on the ready queue) c c parmq(3,jobtag) = nchks c an integer specifying the number c of processes that this process c must checkin to. identifiers of c these processes are recorded below. c if nchks .eq. 0 then completion of c this process signifies completion of c task. c c parmq(4,jobtag) = nparms c an integer specifying the number c of parmeters needed to make a subroutine c call of type indicated by parmq(1,jobtag) c c parmq(i,jobtag) = indices specifying array indices needed c to make the subroutine call c 5 .le. i .le. nparms + 4 c c parmq(j,jobtag) = the jobtag of a process that depends c upon completion of this process c nparms + 5 .le. j .le. nchks + nparms + 4 c c parmq(nchks+nparms+5,jobtag) is reserved for c reentry information. if this value c is greater than 0 then the process c jobtag has spawned process below it c and will resume execution at a label c identified with the value in this location c (see routines spawn,enter,wait,prtspn) c c parmq(nchks+nparms+6,jobtag) is reserved for the number c kparms c of resume parameters needed to resume c computation at the indicated reentry c label. c c parmq(nchks+nparms+6+j,jobtag) 1 .le. j .le. kparms c contain the values of the resume c parameters. c c phead pointer to head of parmq c c ptail pointer to tail of parmq c c readyq a one dimensional integer array that holds the jobtags of those c processes that are ready to execute. c if readyq(j) .eq. done has been set then a return from subroutine c work is indicated. c c rhead is a pointer to the head of readyq c c rtail is a pointer to the tail of readyq c c qtail is a pointer to the next available space on the parmq c c common/sync/ c c qlock is an integer array of locks. there is one lock for each c column of parmq. the purpose of this lock is to ensure c unique access to a column of parmq during the checkin operation. c c hrlock is an integer lock. the purpose of this lock is to ensure c unique access to the pointer rhead to the head of the readyq. c c trlock is an integer lock. the purpose of this lock is to ensure c unique access to the pointer rtail to the tail of the readyq. c c done is a unique non positive integer set in libopn to indicate c task done. c c qtlock is a lock variable that protects access to qtail c done = 0 c c set qlocks off c do 100 j = 1,1000 qlock(j) = 0 readyq(j) = -1 100 continue c c set readyq locks off c hrlock = 0 trlock = 0 tqlock = 0 c c initialize queue pointers c phead = 1 ptail = 1 rhead = 1 rtail = 1 qtail = 2 next = 1 c return end .