REAL MFLOPS,AA(301,300),A(301,300),B(301),X(301) REAL EMAX LDA = 301 C WRITE(6,10) 10 FORMAT(' CHOLESKY DECOMPOSITION TIMING') DO 100 N = 50,300,50 DO 30 J = 1,N DO 20 I = J,N AA(I,J) = FLOAT(-I*J) AA(J,I) = AA(I,J) 20 CONTINUE AA(J,J) = FLOAT(J*N*N+1)/2.0 X(J) = 1.0 30 CONTINUE DO 32 I = 1,N B(I) = 0.0 DO 31 J = 1,N B(I) = B(I) + AA(I,J)*X(J) 31 CONTINUE 32 CONTINUE C WRITE(6,40)LDA,N 40 FORMAT(/' SP SIZE OF THE ARRAYS',I5,' AND ORDER IS ',I5/) C DO 43 J = 1,N DO 42 I = 1,N A(I,J) = AA(I,J) 42 CONTINUE 43 CONTINUE T1 = SECOND(T1) CALL LLT1(A,LDA,N,X,IERR) T2 = SECOND(T2) - T1 CALL LLTS(A,LDA,N,X,B) EMAX = 0.0 DO 41 I = 1,N EMAX = AMAX1(EMAX,ABS(X(I))) 41 CONTINUE MFLOPS = FLOAT(N*(7+N*(3+N*2)))/(6.*T2*1.0E6) WRITE(6,50) T2,MFLOPS,EMAX 50 FORMAT(' UNROLLING DEPTH 1 TIME = ',E12.3,' MFLOPS = ',F9.4, $ ' CHECK = ',E12.3) C DO 73 J = 1,N DO 72 I = 1,N A(I,J) = AA(I,J) 72 CONTINUE 73 CONTINUE T1 = SECOND(T1) CALL LLTP(A,LDA,N,X,IERR) T2 = SECOND(T2) - T1 CALL LLTS(A,LDA,N,X,B) EMAX = 0.0 DO 71 I = 1,N EMAX = AMAX1(EMAX,ABS(X(I))) 71 CONTINUE MFLOPS = FLOAT(N*(7+N*(3+N*2)))/(6.*T2*1.0E6) WRITE(6,80) T2,MFLOPS,EMAX 80 FORMAT(' PARALLEL VERSION TIME = ',E12.3,' MFLOPS = ',F9.4, $ ' CHECK = ',E12.3) C DO 83 J = 1,N DO 82 I = 1,N A(I,J) = AA(I,J) 82 CONTINUE 83 CONTINUE T1 = SECOND(T1) CALL LLTX(A,LDA,N,X,IERR) T2 = SECOND(T2) - T1 CALL LLTS(A,LDA,N,X,B) EMAX = 0.0 DO 81 I = 1,N EMAX = AMAX1(EMAX,ABS(X(I))) 81 CONTINUE MFLOPS = FLOAT(N*(7+N*(3+N*2)))/(6.*T2*1.0E6) WRITE(6,90) T2,MFLOPS,EMAX 90 FORMAT(' UNROLLING DEPTH 16 TIME = ',E12.3,' MFLOPS = ',F9.4, $ ' CHECK = ',E12.3) C 100 CONTINUE STOP END SUBROUTINE LLTS (A,LDA,N,X,B) REAL A(LDA,*), X(*), B(*), XK C C PURPOSE: C SOLVE THE SYMMETRIC POSITIVE DEFINITE SYSTEM AX = B GIVEN THE C CHOLESKY FACTORIZATION OF A (AS COMPUTED IN LLT). C C ADDITIONAL PARAMETERS (NOT PARAMETERS OF LLT): C C X REAL(N), SOLUTION OF LINEAR SYSTEM C C B REAL(N), RIGHT-HAND-SIDE OF LINEAR SYSTEM C C ---------------------------------------------------------------------- C DO 10 K = 1, N X(K) = B(K) 10 CONTINUE C DO 30 K = 1, N XK = X(K)*A(K,K) DO 20 I = K+1, N X(I) = X(I) - A(I,K)*XK 20 CONTINUE X(K) = XK 30 CONTINUE C DO 50 K = N, 1, -1 XK = X(K)*A(K,K) DO 40 I = 1, K-1 X(I) = X(I) - A(K,I)*XK 40 CONTINUE X(K) = XK 50 CONTINUE C RETURN END SUBROUTINE LLT1(A,LDA,N,ROWI,INFO) C ** LLT.F -- LLT DECOMPOSITION C INTEGER N,LDA,INFO REAL A(LDA,N),ROWI(N),T C INFO = 0 DO 30 I = 1, N DO 10 J = 1, I-1 ROWI(J) = -A(I,J) 10 CONTINUE C CALL SMXPY1(N-I+1,A(I,I),I-1,LDA,ROWI,A(I,1)) C IF (A(I,I) .LE. 0) THEN INFO = I GO TO 40 ENDIF A(I,I) = 1/SQRT(A(I,I)) C T = A(I,I) DO 20 J = I+1, N A(J,I) = T*A(J,I) 20 CONTINUE 30 CONTINUE 40 RETURN END SUBROUTINE LLTP(A,LDA,N,ROWI,INFO) C ** LLT.F -- LLT DECOMPOSITION C INTEGER N,LDA,INFO REAL A(LDA,N),ROWI(N),T C INFO = 0 DO 30 I = 1, N DO 10 J = 1, I-1 ROWI(J) = -A(I,J) 10 CONTINUE C n1 = (n - i + 1)/2 n2 = n1 if( mod(n-i+1,2) .eq. 1 ) n2 = n2 + 1 C c ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: c start task 1 here c ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: c CALL SMXPYX(n1,A(I,I),I-1,LDA,ROWI,A(I,1)) c c ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: c start task 2 here c ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: c CALL SMXPYX(n2,A(I+n1,I),I-1,LDA,ROWI,A(I+n1,1)) c c synchronize processes here c C IF (A(I,I) .LE. 0) THEN INFO = I GO TO 40 ENDIF A(I,I) = 1/SQRT(A(I,I)) C T = A(I,I) DO 20 J = I+1, N A(J,I) = T*A(J,I) 20 CONTINUE 30 CONTINUE 40 RETURN END SUBROUTINE LLTX(A,LDA,N,ROWI,INFO) C ** LLT.F -- LLT DECOMPOSITION C INTEGER N,LDA,INFO REAL A(LDA,N),ROWI(N),T C INFO = 0 DO 30 I = 1, N DO 10 J = 1, I-1 ROWI(J) = -A(I,J) 10 CONTINUE C CALL SMXPYX(N-I+1,A(I,I),I-1,LDA,ROWI,A(I,1)) C IF (A(I,I) .LE. 0) THEN INFO = I GO TO 40 ENDIF A(I,I) = 1/SQRT(A(I,I)) C T = A(I,I) DO 20 J = I+1, N A(J,I) = T*A(J,I) 20 CONTINUE 30 CONTINUE 40 RETURN END SUBROUTINE SMXPY1 (N1,Y,N2,LDM,X,M) C ** SMXPY01.F -- SMXPY UNROLLED TO A DEPTH OF 1 C INTEGER LDM,N1,N2 REAL Y(N1),X(N2),M(LDM,N2) C DO 20 J = 1, N2 DO 10 I = 1, N1 Y(I) = (Y(I)) + X(J)*M(I,J) 10 CONTINUE 20 CONTINUE RETURN END SUBROUTINE SMXPYX(N1,Y,N2,LDM,X,M) C ** SMXPY16.F -- SMXPY UNROLLED TO A DEPTH OF 16 C INTEGER LDM,N1,N2 REAL Y(N1),X(N2),M(LDM,N2) C C CLEANUP ODD VECTOR C J = MOD(N2,2) IF (J .GE. 1) THEN DO 10 I = 1, N1 Y(I) = (Y(I)) + X(J)*M(I,J) 10 CONTINUE ENDIF C C CLEANUP ODD GROUP OF TWO VECTORS C J = MOD(N2,4) IF (J .GE. 2) THEN DO 20 I = 1, N1 Y(I) = ( (Y(I)) $ + X(J-1)*M(I,J-1)) + X(J)*M(I,J) 20 CONTINUE ENDIF C C CLEANUP ODD GROUP OF FOUR VECTORS C J = MOD(N2,8) IF (J .GE. 4) THEN DO 30 I = 1, N1 Y(I) = ((( (Y(I)) $ + X(J-3)*M(I,J-3)) + X(J-2)*M(I,J-2)) $ + X(J-1)*M(I,J-1)) + X(J) *M(I,J) 30 CONTINUE ENDIF C C CLEANUP ODD GROUP OF EIGHT VECTORS C J = MOD(N2,16) IF (J .GE. 8) THEN DO 40 I = 1, N1 Y(I) = ((((((( (Y(I)) $ + X(J-7)*M(I,J-7)) + X(J-6)*M(I,J-6)) $ + X(J-5)*M(I,J-5)) + X(J-4)*M(I,J-4)) $ + X(J-3)*M(I,J-3)) + X(J-2)*M(I,J-2)) $ + X(J-1)*M(I,J-1)) + X(J) *M(I,J) 40 CONTINUE ENDIF C C MAIN LOOP - GROUPS OF SIXTEEN VECTORS C JMIN = J+16 DO 60 J = JMIN, N2, 16 DO 50 I = 1, N1 Y(I) = ((((((((((((((( (Y(I)) $ + X(J-15)*M(I,J-15)) + X(J-14)*M(I,J-14)) $ + X(J-13)*M(I,J-13)) + X(J-12)*M(I,J-12)) $ + X(J-11)*M(I,J-11)) + X(J-10)*M(I,J-10)) $ + X(J- 9)*M(I,J- 9)) + X(J- 8)*M(I,J- 8)) $ + X(J- 7)*M(I,J- 7)) + X(J- 6)*M(I,J- 6)) $ + X(J- 5)*M(I,J- 5)) + X(J- 4)*M(I,J- 4)) $ + X(J- 3)*M(I,J- 3)) + X(J- 2)*M(I,J- 2)) $ + X(J- 1)*M(I,J- 1)) + X(J) *M(I,J) 50 CONTINUE 60 CONTINUE RETURN END .