c high performance lu decomposition (including testing & timing code) *** deck tlusp job ---test's single cpu algorithm job,jn=--lusp-,t=37.us=bnchbch. account,ac=865606,us=u2240,upw=u2240. cft,l=0. fetch,dn=luspx ,text=for . fetch,dn=red3 ,text=cal2. fetch,dn=prep3f ,text=for. cft,i=prep3f,l=0. fetch,dn=isamx2,text=cal. cal,i=isamx2,l=0. cft,i=luspx,l=0. cal,i=red3,l=0. segldr,i. xxx. /eof common /xxx/ a(1601,1600),ipvt(1600),b(1600),x(1600) integer nn(25) logical lusp data nn / 2,3,5,8,10 ,15,20,25,32,40 * ,50,64,75,100,150 ,200,250,300,400,500 * ,600,750,1000,1250,1600 / lda=1601 call ranget( iseed ) do 100 itst=1,25 n=nn(itst) call ranset(iseed) do 1 i=1,n do 1 j=1,n 1 a(j,i)=ranf()-.5 cdir$ block i1=irtc() if ( .not.lusp( a,lda,n,ipvt )) go to 200 i2=irtc() cdir$ block do 2 j=1,n b(j)=ranf()-.5 2 x(j)=b(j) call uselu1( lda,a,n ,ipvt ,x ) c --- recreate a --- call ranset(iseed) do 3 i=1,n do 3 j=1,n 3 a(j,i)=ranf()-.5 errs=0. do 10 i=1,n s=0. do 5 j=1,n 5 s=s+a(i,j)*x(j) 10 errs=errs+(s-b(i))**2 work=2*n**3/3+4*n/3-1 time=float(i2-i1)*9.5e-9 rate=1.e-6*work/time write(6,11) n,i2-i1,errs,rate 11 format(1x,i4,1x,i12,1x,g16.5,1x,f8.3) 100 continue stop 200 write(6,201) 201 format(' problem singular') stop end ***deck luspx for ---single-cpu lu decomp. (note: needs it's own c routine for forward&backward substitution logical function lusp( a,nd1,n, ipvt ) real a(nd1,1) integer ipvt(n) c *** linear system solver for all n *** c *** input vars c a - matrix containing lin systems c n - dim of system c nd1 - 1sr dim of a c *** output vars *** c a,ipvt - used by uselu1 to perform inverse c *** cray x-mp timing (nd1=1601)(64 bank x-mp48) c n t clocks 1.e-6*work/t (work=2n**3/3 +4*n/3 -1) c 2 807 0.783 c 3 1274 1.735 c 5 2367 3.913 c 8 4322 8.524 c 10 5901 12.094 c 15 10635 22.458 c 20 20646 27.318 c 25 31722 34.670 c 32 49584 46.462 c 40 76364 58.884 c 50 117094 74.972 c 64 195386 94.197 c 75 283153 104.593 c 100 567572 123.666 c 150 1604665 147.609 c 200 3496832 160.554 c 250 6570641 166.883 c 300 10989197 172.422 c 400 25323155 177.359 c 500 48571412 180.600 c 600 83075129 182.461 c 750 160696168 184.232 c 1000 377324871 185.982 c 1250 733318838 186.906 c 1600 1531340818 187.704 logical prep3 cdir$ vfunction isamx2 n2=0 if ( n.lt.16 ) go to 151 nhops=(n-13)/3 n2=3*nhops ** np1=n +1 ** nm1=n -1 do 150 ipiv1=1,n2,3 ** ipiv2=ipiv1+1 ipiv3=ipiv1+2 if ( .not.prep3( a,nd1,n,ipvt,ipiv1 )) go to 200 c do 140 i=ipiv3+1,n ccdir$ ivdep c do 140 j=ipiv3+1,n c140 a(i,j)=(((a(i,j)-(a(i,ipiv1))*a(ipiv1,j)) c 1 -(a(i,ipiv2))*a(ipiv2,j)) c 2 -(a(i,ipiv3))*a(ipiv3,j)) call red3(a(ipiv1,ipiv3+1),nd1,n-ipiv3 1 , a(ipiv3+1,ipiv3+1),n-ipiv3 2 , a(ipiv3+1,ipiv1) ) 150 continue c *** now reduce to upper triangular form with unit diagnols c *** by standard gauss elimination 151 continue numl=n-n2 go to ( 5001,5002,5003,5004,5005,5006,5007 * ,5008 ,5009,5010,5011,5012,5013,5014,5015 ) numl 5015 continue n2=n2+1 kk=n2-1+isamx2( 15,loc(a(n2,n2)),1) ipvt(n2)=kk if ( a(kk,n2).eq.0. ) go to 200 s=1./a(kk,n2) cdir$ ivdep do 366 j=n2,n ] pivot and normalize r=a(kk,j)*s a(kk,j)=a(n2,j) 366 a(n2,j)=r a(n2,n2)=s cdir$ ivdep do 367 j=n2+1,n ] reduce last 14*14 part of matrix a(n2+ 1,j)=a(n2+ 1,j) -a(n2+ 1,n2)*a(n2,j) a(n2+ 2,j)=a(n2+ 2,j) -a(n2+ 2,n2)*a(n2,j) a(n2+ 3,j)=a(n2+ 3,j) -a(n2+ 3,n2)*a(n2,j) a(n2+ 4,j)=a(n2+ 4,j) -a(n2+ 4,n2)*a(n2,j) a(n2+ 5,j)=a(n2+ 5,j) -a(n2+ 5,n2)*a(n2,j) a(n2+ 6,j)=a(n2+ 6,j) -a(n2+ 6,n2)*a(n2,j) a(n2+ 7,j)=a(n2+ 7,j) -a(n2+ 7,n2)*a(n2,j) a(n2+ 8,j)=a(n2+ 8,j) -a(n2+ 8,n2)*a(n2,j) a(n2+ 9,j)=a(n2+ 9,j) -a(n2+ 9,n2)*a(n2,j) a(n2+10,j)=a(n2+10,j) -a(n2+10,n2)*a(n2,j) a(n2+11,j)=a(n2+11,j) -a(n2+11,n2)*a(n2,j) a(n2+12,j)=a(n2+12,j) -a(n2+12,n2)*a(n2,j) a(n2+13,j)=a(n2+13,j) -a(n2+13,n2)*a(n2,j) 367 a(n2+14,j)=a(n2+14,j) -a(n2+14,n2)*a(n2,j) 5014 continue n2=n2+1 kk=n2-1+isamx2( 14,loc(a(n2,n2)),1) ipvt(n2)=kk if ( a(kk,n2).eq.0. ) go to 200 s=1./a(kk,n2) cdir$ ivdep do 361 j=n2,n ] pivot and normalize r=a(kk,j)*s a(kk,j)=a(n2,j) 361 a(n2,j)=r a(n2,n2)=s cdir$ ivdep do 362 j=n2+1,n ] reduce last 13*13 part of matrix a(n2+ 1,j)=a(n2+ 1,j) -a(n2+ 1,n2)*a(n2,j) a(n2+ 2,j)=a(n2+ 2,j) -a(n2+ 2,n2)*a(n2,j) a(n2+ 3,j)=a(n2+ 3,j) -a(n2+ 3,n2)*a(n2,j) a(n2+ 4,j)=a(n2+ 4,j) -a(n2+ 4,n2)*a(n2,j) a(n2+ 5,j)=a(n2+ 5,j) -a(n2+ 5,n2)*a(n2,j) a(n2+ 6,j)=a(n2+ 6,j) -a(n2+ 6,n2)*a(n2,j) a(n2+ 7,j)=a(n2+ 7,j) -a(n2+ 7,n2)*a(n2,j) a(n2+ 8,j)=a(n2+ 8,j) -a(n2+ 8,n2)*a(n2,j) a(n2+ 9,j)=a(n2+ 9,j) -a(n2+ 9,n2)*a(n2,j) a(n2+10,j)=a(n2+10,j) -a(n2+10,n2)*a(n2,j) a(n2+11,j)=a(n2+11,j) -a(n2+11,n2)*a(n2,j) a(n2+12,j)=a(n2+12,j) -a(n2+12,n2)*a(n2,j) 362 a(n2+13,j)=a(n2+13,j) -a(n2+13,n2)*a(n2,j) 5013 continue n2=n2+1 kk=n2-1+isamx2( 13,loc(a(n2,n2)),1) ipvt(n2)=kk if ( a(kk,n2).eq.0. ) go to 200 s=1./a(kk,n2) cdir$ ivdep do 356 j=n2,n ] pivot and normalize r=a(kk,j)*s a(kk,j)=a(n2,j) 356 a(n2,j)=r a(n2,n2)=s cdir$ ivdep do 357 j=n2+1,n ] reduce last 12*12 part of matrix a(n2+ 1,j)=a(n2+ 1,j) -a(n2+ 1,n2)*a(n2,j) a(n2+ 2,j)=a(n2+ 2,j) -a(n2+ 2,n2)*a(n2,j) a(n2+ 3,j)=a(n2+ 3,j) -a(n2+ 3,n2)*a(n2,j) a(n2+ 4,j)=a(n2+ 4,j) -a(n2+ 4,n2)*a(n2,j) a(n2+ 5,j)=a(n2+ 5,j) -a(n2+ 5,n2)*a(n2,j) a(n2+ 6,j)=a(n2+ 6,j) -a(n2+ 6,n2)*a(n2,j) a(n2+ 7,j)=a(n2+ 7,j) -a(n2+ 7,n2)*a(n2,j) a(n2+ 8,j)=a(n2+ 8,j) -a(n2+ 8,n2)*a(n2,j) a(n2+ 9,j)=a(n2+ 9,j) -a(n2+ 9,n2)*a(n2,j) a(n2+10,j)=a(n2+10,j) -a(n2+10,n2)*a(n2,j) a(n2+11,j)=a(n2+11,j) -a(n2+11,n2)*a(n2,j) 357 a(n2+12,j)=a(n2+12,j) -a(n2+12,n2)*a(n2,j) 5012 continue n2=n2+1 kk=n2-1+isamx2( 12,loc(a(n2,n2)),1) ipvt(n2)=kk if ( a(kk,n2).eq.0. ) go to 200 s=1./a(kk,n2) cdir$ ivdep do 351 j=n2,n ] pivot and normalize r=a(kk,j)*s a(kk,j)=a(n2,j) 351 a(n2,j)=r a(n2,n2)=s cdir$ ivdep do 352 j=n2+1,n ] reduce last 11*11 part of matrix a(n2+ 1,j)=a(n2+ 1,j) -a(n2+ 1,n2)*a(n2,j) a(n2+ 2,j)=a(n2+ 2,j) -a(n2+ 2,n2)*a(n2,j) a(n2+ 3,j)=a(n2+ 3,j) -a(n2+ 3,n2)*a(n2,j) a(n2+ 4,j)=a(n2+ 4,j) -a(n2+ 4,n2)*a(n2,j) a(n2+ 5,j)=a(n2+ 5,j) -a(n2+ 5,n2)*a(n2,j) a(n2+ 6,j)=a(n2+ 6,j) -a(n2+ 6,n2)*a(n2,j) a(n2+ 7,j)=a(n2+ 7,j) -a(n2+ 7,n2)*a(n2,j) a(n2+ 8,j)=a(n2+ 8,j) -a(n2+ 8,n2)*a(n2,j) a(n2+ 9,j)=a(n2+ 9,j) -a(n2+ 9,n2)*a(n2,j) a(n2+10,j)=a(n2+10,j) -a(n2+10,n2)*a(n2,j) 352 a(n2+11,j)=a(n2+11,j) -a(n2+11,n2)*a(n2,j) 5011 continue n2=n2+1 kk=n2-1+isamx2( 11,loc(a(n2,n2)),1) ipvt(n2)=kk if ( a(kk,n2).eq.0. ) go to 200 s=1./a(kk,n2) cdir$ ivdep do 346 j=n2,n ] pivot and normalize r=a(kk,j)*s a(kk,j)=a(n2,j) 346 a(n2,j)=r a(n2,n2)=s cdir$ ivdep do 347 j=n2+1,n ] reduce last 10*10 part of matrix a(n2+ 1,j)=a(n2+ 1,j) -a(n2+ 1,n2)*a(n2,j) a(n2+ 2,j)=a(n2+ 2,j) -a(n2+ 2,n2)*a(n2,j) a(n2+ 3,j)=a(n2+ 3,j) -a(n2+ 3,n2)*a(n2,j) a(n2+ 4,j)=a(n2+ 4,j) -a(n2+ 4,n2)*a(n2,j) a(n2+ 5,j)=a(n2+ 5,j) -a(n2+ 5,n2)*a(n2,j) a(n2+ 6,j)=a(n2+ 6,j) -a(n2+ 6,n2)*a(n2,j) a(n2+ 7,j)=a(n2+ 7,j) -a(n2+ 7,n2)*a(n2,j) a(n2+ 8,j)=a(n2+ 8,j) -a(n2+ 8,n2)*a(n2,j) a(n2+ 9,j)=a(n2+ 9,j) -a(n2+ 9,n2)*a(n2,j) 347 a(n2+10,j)=a(n2+10,j) -a(n2+10,n2)*a(n2,j) 5010 continue n2=n2+1 kk=n2-1+isamx2( 10,loc(a(n2,n2)),1) ipvt(n2)=kk if ( a(kk,n2).eq.0. ) go to 200 s=1./a(kk,n2) cdir$ ivdep do 341 j=n2,n ] pivot and normalize r=a(kk,j)*s a(kk,j)=a(n2,j) 341 a(n2,j)=r a(n2,n2)=s cdir$ ivdep do 342 j=n2+1,n ] reduce last 9*9 part of matrix a(n2+1,j)=a(n2+1,j) -a(n2+1,n2)*a(n2,j) a(n2+2,j)=a(n2+2,j) -a(n2+2,n2)*a(n2,j) a(n2+3,j)=a(n2+3,j) -a(n2+3,n2)*a(n2,j) a(n2+4,j)=a(n2+4,j) -a(n2+4,n2)*a(n2,j) a(n2+5,j)=a(n2+5,j) -a(n2+5,n2)*a(n2,j) a(n2+6,j)=a(n2+6,j) -a(n2+6,n2)*a(n2,j) a(n2+7,j)=a(n2+7,j) -a(n2+7,n2)*a(n2,j) a(n2+8,j)=a(n2+8,j) -a(n2+8,n2)*a(n2,j) 342 a(n2+9,j)=a(n2+9,j) -a(n2+9,n2)*a(n2,j) 5009 continue n2=n2+1 kk=n2-1+isamx2( 9,loc(a(n2,n2)),1) ipvt(n2)=kk if ( a(kk,n2).eq.0. ) go to 200 s=1./a(kk,n2) cdir$ ivdep do 336 j=n2,n ] pivot and normalize r=a(kk,j)*s a(kk,j)=a(n2,j) 336 a(n2,j)=r a(n2,n2)=s cdir$ ivdep do 337 j=n2+1,n ] reduce last 8*8 part of matrix a(n2+1,j)=a(n2+1,j) -a(n2+1,n2)*a(n2,j) a(n2+2,j)=a(n2+2,j) -a(n2+2,n2)*a(n2,j) a(n2+3,j)=a(n2+3,j) -a(n2+3,n2)*a(n2,j) a(n2+4,j)=a(n2+4,j) -a(n2+4,n2)*a(n2,j) a(n2+5,j)=a(n2+5,j) -a(n2+5,n2)*a(n2,j) a(n2+6,j)=a(n2+6,j) -a(n2+6,n2)*a(n2,j) a(n2+7,j)=a(n2+7,j) -a(n2+7,n2)*a(n2,j) 337 a(n2+8,j)=a(n2+8,j) -a(n2+8,n2)*a(n2,j) 5008 continue n2=n2+1 kk=n2-1+isamx2( 8,loc(a(n2,n2)),1) ipvt(n2)=kk if ( a(kk,n2).eq.0. ) go to 200 s=1./a(kk,n2) cdir$ ivdep do 331 j=n2,n ] pivot and normalize r=a(kk,j)*s a(kk,j)=a(n2,j) 331 a(n2,j)=r a(n2,n2)=s cdir$ ivdep do 332 j=n2+1,n ] reduce last 7*7 part of matrix a(n2+1,j)=a(n2+1,j) -a(n2+1,n2)*a(n2,j) a(n2+2,j)=a(n2+2,j) -a(n2+2,n2)*a(n2,j) a(n2+3,j)=a(n2+3,j) -a(n2+3,n2)*a(n2,j) a(n2+4,j)=a(n2+4,j) -a(n2+4,n2)*a(n2,j) a(n2+5,j)=a(n2+5,j) -a(n2+5,n2)*a(n2,j) a(n2+6,j)=a(n2+6,j) -a(n2+6,n2)*a(n2,j) 332 a(n2+7,j)=a(n2+7,j) -a(n2+7,n2)*a(n2,j) 5007 continue n2=n2+1 kk=n2-1+isamx2( 7,loc(a(n2,n2)),1) ipvt(n2)=kk if ( a(kk,n2).eq.0. ) go to 200 s=1./a(kk,n2) cdir$ ivdep do 326 j=n2,n ] pivot and normalize r=a(kk,j)*s a(kk,j)=a(n2,j) 326 a(n2,j)=r a(n2,n2)=s cdir$ ivdep do 327 j=n2+1,n ] reduce last 6*6 part of matrix a(n2+1,j)=a(n2+1,j) -a(n2+1,n2)*a(n2,j) a(n2+2,j)=a(n2+2,j) -a(n2+2,n2)*a(n2,j) a(n2+3,j)=a(n2+3,j) -a(n2+3,n2)*a(n2,j) a(n2+4,j)=a(n2+4,j) -a(n2+4,n2)*a(n2,j) a(n2+5,j)=a(n2+5,j) -a(n2+5,n2)*a(n2,j) 327 a(n2+6,j)=a(n2+6,j) -a(n2+6,n2)*a(n2,j) 5006 continue n2=n2+1 kk=n2-1+isamx2( 6,loc(a(n2,n2)),1) ipvt(n2)=kk if ( a(kk,n2).eq.0. ) go to 200 s=1./a(kk,n2) cdir$ ivdep do 321 j=n2,n ] pivot and normalize r=a(kk,j)*s a(kk,j)=a(n2,j) 321 a(n2,j)=r a(n2,n2)=s cdir$ ivdep do 322 j=n2+1,n ] reduce last 5*5 part of matrix a(n2+1,j)=a(n2+1,j) -a(n2+1,n2)*a(n2,j) a(n2+2,j)=a(n2+2,j) -a(n2+2,n2)*a(n2,j) a(n2+3,j)=a(n2+3,j) -a(n2+3,n2)*a(n2,j) a(n2+4,j)=a(n2+4,j) -a(n2+4,n2)*a(n2,j) 322 a(n2+5,j)=a(n2+5,j) -a(n2+5,n2)*a(n2,j) 5005 continue n2=n2+1 kk=n2-1+isamx2( 5,loc(a(n2,n2)),1) ipvt(n2)=kk if ( a(kk,n2).eq.0. ) go to 200 s=1./a(kk,n2) cdir$ ivdep do 316 j=n2,n ] pivot and normalize r=a(kk,j)*s a(kk,j)=a(n2,j) 316 a(n2,j)=r a(n2,n2)=s cdir$ ivdep do 317 j=n2+1,n ] reduce last 4*4 part of matrix a(n2+1,j)=a(n2+1,j) -a(n2+1,n2)*a(n2,j) a(n2+2,j)=a(n2+2,j) -a(n2+2,n2)*a(n2,j) a(n2+3,j)=a(n2+3,j) -a(n2+3,n2)*a(n2,j) 317 a(n2+4,j)=a(n2+4,j) -a(n2+4,n2)*a(n2,j) 5004 continue ] should be 4 n2=n2+1 kk=n2-1+isamx2( 4,loc(a(n2,n2)),1) ipvt(n2)=kk if ( a(kk,n2).eq.0. ) go to 200 s=1./a(kk,n2) cdir$ ivdep do 301 j=n2,n ] pivot and normalize r=a(kk,j)*s a(kk,j)=a(n2,j) 301 a(n2,j)=r a(n2,n2)=s cdir$ ivdep do 302 j=n2+1,n ] reduce last 3*3 part of matrix a(n2+1,j)=a(n2+1,j) -a(n2+1,n2)*a(n2,j) a(n2+2,j)=a(n2+2,j) -a(n2+2,n2)*a(n2,j) 302 a(n2+3,j)=a(n2+3,j) -a(n2+3,n2)*a(n2,j) 5003 continue ] should be 3 n2=n2+1 kk=n2-1+isamx2( 3,loc(a(n2,n2)),1) ipvt(n2)=kk if ( a(kk,n2).eq.0. ) go to 200 s=1./a(kk,n2) cdir$ ivdep do 306 j=n2,n ] pivot and normalize r=a(kk,j)*s a(kk,j)=a(n2,j) 306 a(n2,j)=r a(n2,n2)=s cdir$ ivdep do 307 j=n2+1,n ] reduce last 2*2 part of matrix a(n2+1,j)=a(n2+1,j) -a(n2+1,n2)*a(n2,j) 307 a(n2+2,j)=a(n2+2,j) -a(n2+2,n2)*a(n2,j) 5002 continue ] should be 2 n2=n2+1 kk=n2-1+isamx2( 2,loc(a(n2,n2)),1) ipvt(n2)=kk if ( a(kk,n2).eq.0. ) go to 200 s=1./a(kk,n2) cdir$ ivdep do 311 j=n2,n ] pivot and normalize r=a(kk,j)*s a(kk,j)=a(n2,j) 311 a(n2,j)=r a(n2,n2)=s a( n,n )=a( n,n ) -a( n,n-1)*a( n-1,n ) 5001 if ( a( n,n ).eq.0. ) go to 200 a( n,n )=1./a( n,n ) lusp=.true. return 200 write(13,201) n 201 format(' lusp found sing matrix',/, 1' n=',i6) lusp=.false. return end subroutine uselu1( lda,a,n,ipvt,b ) real a(lda,n),b(n) integer ipvt(n) do 1 j=1,n-1 1 if ( ipvt(j).lt.1 .or. ipvt(j).gt.n ) go to 200 n2=0 if ( n.lt.16 ) go to 151 nhops=(n-13)/3 n2=3*nhops do 150 ipiv1=1,n2,3 r=b(ipvt(ipiv1)) b(ipvt(ipiv1))=b(ipiv1) b(ipiv1)=r*a(ipiv1,ipiv1) c --- find second pivot point --- ipiv2=ipiv1+1 ipiv3=ipiv2+1 r=b(ipvt(ipiv2)) b(ipvt(ipiv2))=b(ipiv2) b(ipiv2)=r*a(ipiv2,ipiv2) c --- find third pivot point --- r=b(ipvt(ipiv3)) b(ipvt(ipiv3))=b(ipiv3) b(ipiv3)=r*a(ipiv3,ipiv3) c --- fix rows ipiv2 thru ipiv3 --- b(ipiv2 )= b(ipiv2 )-a(ipiv2,ipiv1)*b(ipiv1) b(ipiv3 )=(b(ipiv3 )-a(ipiv3,ipiv1)*b(ipiv1)) 1 -a(ipiv3,ipiv2)*b(ipiv2) cdir$ ivdep do 140 i=ipiv3+1,n 140 b(i )=(((b(i )-a(i,ipiv1)*b(ipiv1 )) 1 -a(i,ipiv2)*b(ipiv2 )) 2 -a(i,ipiv3)*b(ipiv3 )) 150 continue c *** now reduce to upper triangular form with unit diagnols c *** by standard gauss elimination 151 do 175 i=n2+1,n-1 c *** find pivot row r=b(ipvt(i)) b(ipvt(i))=b(i) b(i)=r*a(i,i) c *** use pivot row to eliminate rest of column i cdir$ ivdep do 170 j=i+1,n 170 b(j )=b(j )-a(j,i)*b(i ) 175 continue b(n)=b(n)*a(n,n) c *** now back subst to complete solution do 180 i=n,2,-1 cdir$ ivdep do 180 k=1,i-1 180 b(k )=b(k )-b(i )*a(k,i) return 201 format(' uselu1 error bad pivot') 200 write(6,201) stop end ***deck prep3f for --- prepares 3 pivot rows c --- replaces the suspect assembly language version logical function prep3 ( a,nd1,n, ipvt ,ipiv1 ) real a(nd1,1) integer ipvt(n) c ii1=irtc() prep3=.true. np1=n +1 nm1=n -1 c --- find first pivot point --- ipiv=isamax( np1-ipiv1,a(ipiv1,ipiv1),1 ) kpiv=isamax( np1-ipiv1,a(ipiv1,ipiv1),1 ) if ( ipiv.ne.kpiv ) write(6,301) ipiv1,kpiv,ipiv 301 format(' ipiv ne kpiv ... ipiv1,ipiv,kpiv',3i8) ipvt(ipiv1)=ipiv+ipiv1-1 if ( ipiv.eq.1 ) go to 107 ipiv=ipiv+ipiv1-1 if( a(ipiv,ipiv1).eq.0. ) go to 200 r=1./a(ipiv,ipiv1) cdir$ ivdep do 106 j=ipiv1,n rr=a(ipiv ,j) a(ipiv ,j)=a(ipiv1 ,j) 106 a(ipiv1 ,j)=rr*r go to 1085 107 if ( a(ipiv1,ipiv1).eq.0. ) go to 200 r = 1./a(ipiv1,ipiv1) do 108 j=ipiv1+1,n 108 a(ipiv1,j)=a(ipiv1,j)*r 1085 a(ipiv1,ipiv1)=r c --- reduce the next two columns Õonly whats needed to be able c to find the next pivot points cdir$ ivdep do 109 i=ipiv1+1,n a(i,ipiv1+1)=a(i,ipiv1+1)-(a(i,ipiv1))*a(ipiv1,ipiv1+1) 109 a(i,ipiv1+2)=a(i,ipiv1+2)-(a(i,ipiv1))*a(ipiv1,ipiv1+2) c --- find second pivot point --- ipiv2=ipiv1+1 ipiv=isamax( np1-ipiv2,a(ipiv2,ipiv2),1 ) kpiv=isamax( np1-ipiv2,a(ipiv2,ipiv2),1 ) if ( ipiv.ne.kpiv ) write(6,302) ipiv2,kpiv,ipiv 302 format(' ipiv ne kpiv ... ipiv2,ipiv,kpiv',3i8) ipvt(ipiv2)=ipiv+ipiv2-1 if ( ipiv.eq.1 ) go to 111 ipiv=ipiv+ipiv2-1 if ( a(ipiv,ipiv2).eq.0. ) go to 200 r=1./a(ipiv,ipiv2) cdir$ ivdep do 110 j=ipiv1,n rr=a(ipiv,j) a(ipiv,j)=a(ipiv2,j) 110 a(ipiv2,j)=rr*r go to 1125 111 if ( a(ipiv2,ipiv2).eq.0. ) go to 200 r = 1./a(ipiv2,ipiv2) do 112 j=ipiv1,n 112 a(ipiv2,j)=a(ipiv2,j)*r 1125 a(ipiv2,ipiv2)=r c --- reduce the next column Õonly whats needed to be able c to find the next pivot points cdir$ ivdep do 113 i=ipiv2+1,n 113 a(i,ipiv2+1)=a(i,ipiv2+1)-(a(i,ipiv2))*a(ipiv2,ipiv2+1) c --- find third pivot point --- ipiv3=ipiv2+1 ipiv=isamax( np1-ipiv3,a(ipiv3,ipiv3),1 ) kpiv=isamax( np1-ipiv3,a(ipiv3,ipiv3),1 ) if ( ipiv.ne.kpiv ) write(6,303) ipiv3,kpiv,ipiv 303 format(' ipiv ne kpiv ... ipiv3,ipiv,kpiv',3i8) ipvt(ipiv3)=ipiv+ipiv3-1 if ( ipiv.eq.1 ) go to 115 ipiv=ipiv+ipiv3-1 if ( a(ipiv,ipiv3).eq.0. ) go to 200 r=1./a(ipiv,ipiv3) cdir$ ivdep do 114 j=ipiv1,n rr=a(ipiv,j) a(ipiv,j)=a(ipiv3,j) 114 a(ipiv3,j)=rr*r go to 1165 115 if ( a(ipiv3,ipiv3).eq.0. ) go to 200 r = 1./a(ipiv3,ipiv3) do 116 j=ipiv1,n 116 a(ipiv3,j)=a(ipiv3,j)*r 1165 a(ipiv3,ipiv3)=r c --- fix rows ipiv2 thru ipiv3 --- cdir$ ivdep do 121 j=ipiv3+1,n a(ipiv2,j)=a(ipiv2,j)-a(ipiv2,ipiv1)*a(ipiv1,j) 121 a(ipiv3,j)=(a(ipiv3,j)-a(ipiv3,ipiv1)*a(ipiv1,j)) 1 -a(ipiv3,ipiv2)*a(ipiv2,j) return 200 prep3=.false. return end ***deck red3 cal2 --- kernal to perform eleimination using 3 pivots * subroutine red3( ap,nd1,ncols ,a,nrows ,aa ) * real ap(nd1,ncols),a(nd1,ncols),aa(nd1,ncols) * *c ***** lu decomp kernal (3 simultaneous pivots ***** * *c ***** input ***** *c ap : pivot rows (ap(1,1)=1st column after 3rd pivot *c a : block of matrix to be reduced *c a(1,1)=1st col of matrix (after pivot cols) *c of the 1st row after the pivot rows *c aa : pivot columns of pivot rows *c included so that aa(4,1) need not be a(1,1) * * jtot=ncols * j1=1 * do 100 jlps=1,9999 * jvl=64 * if ( jtot.gt.64 .and. jtot.lt.128 ) jvl=jtot/2 * j2=min0(ncols,j1+jvl-1) * do 25 i=1,nrows *cdir$ shortloop * do 25 j=j1,j2 * *25 a(i,j)=(((a(i,j)-aa(i,1)*ap(1,j)) * 1 -aa(i,2)*ap(2,j)) * 2 -aa(i,3)*ap(3,j)) * * jtot=jtot-jvl * if ( jtot.le.0 ) return *100 j1=j2+1 * end ident red3 ap defarg nd1 defarg ncols defarg a defarg nrowss defarg aa defarg jtot defb temp j1 defb temp nrows defb temp ofap defb temp ofa defb temp ofaa defb temp red3 enter argadd a1,ncols argadd a2,nd1 argadd a3,nrowss argadd a4,ap a1 ,a1 a2 ,a2 a3 ,a3 b.ofap a4 b.nrows a3 argadd a3,a argadd a4,aa s5 1 b.ofa a3 b.ofaa a4 * -------------------------------- * jsize = * *** determine vect len to use this pass *** a1:jtot *** a6:scratch *** a7: vl output a6 128 a7=64 $ if( a1 ge 128 ) goto sized a0 a1-a6 a7 64 jap sized a6 64 a7=a1 $ if( a1 le 64 ) goto sized a0 a6-a1 a7 a1 jap sized s1 a1 ( 641 a7 s1 sized vl a7 a1 a1-a7 a1: new jtot ***** * snap (a1,a7) ***** reg use below ***** * a1 : jtot v0 : a(1,*) * a2 : nd1 v1 : ap(1,*) * a3 : )aa(i,1) v2 : ap(2,*) * a4 : )aa(i,2) v3 : ap(3,*) * a5 : )aa(i,3) v4 : s1*rv1 * a6 : )a(1,*) v5 : v0-fv4 * * s1 : aa(i,1) s5 : 1 * s2 : aa(i,2) * s3 : aa(i,3) * s4 : nrows a4 2 vl a4 a0 b.ofaa v2 ,a0,a2 load 2 scalars into vec regs a3 b.ofaa a4 a3+a2 vl a7 a0 b.ofap v1 ,a0,a2 a0 b.ofa v0 ,a0,a2 a5 0 * a3 b.ofaa * a4 a3+a2 * s1 ,a3 a7 b.nrows s4 a7 * s2 ,a4 s1 v2,a5 v4 s1*rv1 s2 v2,a0 a5 a4+a2 a6 b.ofa v5 v0-fv4 a7 b.ofap a7 a7+1 a0 a7 a7 a7+1 v2 ,a0,a2 a0 a7 v3 ,a0,a2 a0 a6 a6 a6+1 j midloop * ----------------------------- * loopi = * a0 a6 v0 ,a0,a2 read a(i,*) v4 s1*rv1 a6 a6+1 v5 v0-fv4 midloop = * * snap s4 a3 a3+1 a4 a4+1 v6 s2*rv2 s4 s4-s5 v7 v5-fv6 s3 ,a5 s2 ,a4 s1 ,a3 a5 a5+1 v4 s3*rv3 s0 -s4 v6 v7-fv4 ,a0,a2 v6 jsm loopi * ------------------------------ * * test to see if done a0 -a1 neg of no. of columns left a7 vl jap finished a7 a7*a2 nd1*last vl a6 b.ofa a4 b.ofap a6 a6+a7 a4 a4+a7 b.ofa a6 inc addr of 1st remaining col in a b.ofap a4 '' '' ap j jsize * ------------------------------ * finished = * exit endp end ***deck isamx2 cal --- fast isamax for small n ident isamx2 * * * fast replacement for isamax for small n * faster for n< 53 ... * usage : * cdir$ vfunction isamx1 * .... * i=isamx2( n,loc(a),inca ) * caution: it is the responsibility of the calling program * to insure that all values in array a have been loaded * into memory before calling isamx2 * if there is any cause for doubt one could force storage by * using the following calling sequence * i=isamx2( n,loc(a),inca,a(idummy) ) * where idummy is any integer expression which the comiler * think's might possibly be the same address as another * reference to a. * .................................................. * entry isamx2% switch = 8 what size to begin 2nd accumulation m. isamx2% = * a1 s1 a7 a1-1 vl a1 s0 a7 is n=1 ? a0 s2 loc(a) a2 s3 inca s1 1 jsz return cmr avoid any possible race condition. v0 ,a0,a2 v1 v0*rv0 a**2 a5 switch a0 a1-a5 a4 64 jap bigr a7 0 a5 1 counter for best value. a6 1 a0 1 s2 v1,a7 best value so far. s4 v1,a6 loop = * a6=1 to n-1 s0 s2-s4 if >= 0 then dont't change estimater jaz fall a6 a6+1 s3 s4 s4 v1,a6 a0 a1-a6 jsp loop a5 a6 s2 s3 jan loop fall s1 a5 return j b00 align bigr = * search 2 at a time. v2 v1,v163 s2 a1 s2 s2&s3 a1 s2 now will search v4,a6 for max * a6 = 0,a1,2 v3 v1-v2 vm v3,p v4 v1]v2&vm a7 0 a5 0 counter for best value. a6 2 next index a4 2 increment a0 1 s2 v4,a7 best value so far. s4 v4,a6 loopb = * a6=1 to n-1 s0 s2-s4 if >= 0 then dont't change estimater jaz fallb done ? a0 a1-a6 done next time thru? a6 a6+a4 inc index s3 s4 s4 v4,a6 jsp loopb a5 a6-a4 index of best value s2 s3 best value jan loopb fallb s0 v3,a5 was best value from v1 or v2 a6 a5+1 isamx2 (if best value was from v1) s1 a6 jsp fin s2 1 s1 s1+s2 fin j b00 end c needed for multiple cpu implementation. ***deck tlump job --- test's× lu decomp (micro-tasking) job,jn=--lump-,t=255,us=bnchbch. account,ac=865606,us=u2240,upw=u2240. fetch,dn=prep3f ,text=for . cft,i=prep3f,l=0,opt=btreg. access,dn=$proc,pdn=microproc,id=jmn ,own=lps . micro,level=new. cft,l=0. fetch,dn=lumpx ,text=for . premult,i=lumpx. cft,i=$multf,alloc=stack,l=0. cal,i=$multc,l=0. * cft,i=lumpx,l=0. fetch,dn=red3 ,text=cal2. * fetch,dn=prep3f ,text=for. * cft,i=prep3f,l=0. fetch,dn=isamx2,text=cal. cal,i=isamx2,l=0. cal,i=red3,l=0. segldr,i. xxx. /eof common /xxx/ a(1601,1600),ipvt(1600),b(1600),x(1600) integer nn(25) logical lump data nn / 2,3,5,8,10 ,15,20,25,32,40 * ,50,64,75,100,150 ,200,250,300,400,500 * ,600,750,1000,1250,1600 / lda=1601 call ranget( iseed ) call getcpus(4) do 100 np = 1,4 write(6,90) np 90 format(' algorithm for',i2,' processors') do 100 itst=10,25 n=nn(itst) call ranset(iseed) do 1 i=1,n do 1 j=1,n 1 a(j,i)=ranf()-.5 cdir$ block i1=irtc() if ( .not.lump( a,lda,n,ipvt ,np )) go to 200 i2=irtc() cdir$ block do 2 j=1,n b(j)=ranf()-.5 2 x(j)=b(j) call uselu1( lda,a,n ,ipvt ,x ) c --- recreate a --- call ranset(iseed) do 3 i=1,n do 3 j=1,n 3 a(j,i)=ranf()-.5 errs=0. do 10 i=1,n s=0. do 5 j=1,n 5 s=s+a(i,j)*x(j) 10 errs=errs+(s-b(i))**2 work=2*n**3/3+4*n/3-1 time=float(i2-i1)*9.5e-9 rate=1.e-6*work/time write(6,11) n,i2-i1,errs,rate 11 format(1x,i4,1x,i12,1x,g16.5,1x,f8.3) 100 continue call relcpus stop 200 write(6,201) 201 format(' problem singular') call relcpus stop end /eof abs=xxx stack=10000+5000 heap=10000+5000 ***deck lumpx for ___ multiple cpu version of luspx c needs decks prep3f for, red3 cal2, isamx2 cal logical function lump( a,nd1,n, ipvt ,np ) real a(nd1,1) integer ipvt(n) c *** linear system solver for all n *** c *** input vars c a - matrix containing lin systems c n - dim of system c nd1 - 1sr dim of a c *** output vars *** c a,ipvt - used by uselu1 to perform inverse c *** cray x-mp timing (nd1=1601)(64 bank x-mp48) c n t clocks 1.e-6*work/t (work=2n**3/3 +4*n/3 -1) c millisecs. logical prep3,lumps cdir$ vfunction isamx2 c t. hewitt cray research jan 1986 (experimental high speed lu c decomposition. c timing info nd]=1601 c timing on x-mp 48 with 64 banks c n clocks megaflops c algorithm for 1 processors c 40 81363 55.266 c 50 123170 71.273 c 64 203765 90.324 c 75 291878 101.466 c 100 579129 121.198 c 150 1617759 146.414 c 200 3509152 159.991 c 250 6589002 166.418 c 300 11066648 171.215 c 400 25219592 178.087 c 500 48553771 180.666 c 600 83015149 182.593 c 750 160437854 184.529 c 1000 377003255 186.141 c 1250 732761311 187.048 c 1600 1530480756 187.809 c algorithm for 2 processors c 40 70393 63.879 c 50 101829 86.211 c 64 158171 116.360 c 75 217528 136.147 c 100 430906 162.888 c 150 1002570 236.256 c 200 2049577 273.925 c 250 3683659 297.673 c 300 6119615 309.624 c 400 13470833 333.408 c 500 25563553 343.145 c 600 43228329 350.649 c 750 82747902 357.778 c 1000 192930742 363.735 c 1250 373159921 367.300 c 1600 775457957 370.670 c algorithm for 3 processors c 40 68849 65.312 c 50 97269 90.252 c 64 147403 124.861 c 75 196424 150.774 c 100 346121 202.788 c 150 804626 294.377 c 200 1579319 355.490 c 250 2763144 396.840 c 300 4402469 430.390 c 400 9660931 464.891 c 500 17984055 487.765 c 600 29974754 505.692 c 750 56887625 520.418 c 1000 131759081 532.605 c 1250 253337441 541.024 c 1600 524994082 547.509 c algorithm for 4 processors c 40 65887 68.248 c 50 92313 95.098 c 64 137346 134.003 c 75 182337 162.423 c 100 310870 225.783 c 150 699364 338.684 c 200 1350111 415.841 c 250 2359689 464.691 c 300 3699425 512.182 c 400 7812279 574.901 c 500 14482703 605.688 c 600 32423811 467.495 c 750 45040204 657.310 c 1000 102784670 682.744 c 1250 197864546 692.704 n2=0 if ( n.lt.16 ) go to 151 nhops=(n-13)/3 n2=3*nhops lumps=.true. npp=np call lumpker( a,nd1,n,n2 ,ipvt ,npp ,lumps ) if ( .not.lumps ) goto 200 c *** now reduce to upper triangular form with unit diagnols c *** by standard gauss elimination 151 continue numl=n-n2 go to ( 5001,5002,5003,5004,5005,5006,5007 * ,5008 ,5009,5010,5011,5012,5013,5014,5015 ) numl 5015 continue n2=n2+1 kk=n2-1+isamx2( 15,loc(a(n2,n2)),1) ipvt(n2)=kk if ( a(kk,n2).eq.0. ) go to 200 s=1./a(kk,n2) cdir$ ivdep do 366 j=n2,n ] pivot and normalize r=a(kk,j)*s a(kk,j)=a(n2,j) 366 a(n2,j)=r a(n2,n2)=s cdir$ ivdep do 367 j=n2+1,n ] reduce last 14*14 part of matrix a(n2+ 1,j)=a(n2+ 1,j) -a(n2+ 1,n2)*a(n2,j) a(n2+ 2,j)=a(n2+ 2,j) -a(n2+ 2,n2)*a(n2,j) a(n2+ 3,j)=a(n2+ 3,j) -a(n2+ 3,n2)*a(n2,j) a(n2+ 4,j)=a(n2+ 4,j) -a(n2+ 4,n2)*a(n2,j) a(n2+ 5,j)=a(n2+ 5,j) -a(n2+ 5,n2)*a(n2,j) a(n2+ 6,j)=a(n2+ 6,j) -a(n2+ 6,n2)*a(n2,j) a(n2+ 7,j)=a(n2+ 7,j) -a(n2+ 7,n2)*a(n2,j) a(n2+ 8,j)=a(n2+ 8,j) -a(n2+ 8,n2)*a(n2,j) a(n2+ 9,j)=a(n2+ 9,j) -a(n2+ 9,n2)*a(n2,j) a(n2+10,j)=a(n2+10,j) -a(n2+10,n2)*a(n2,j) a(n2+11,j)=a(n2+11,j) -a(n2+11,n2)*a(n2,j) a(n2+12,j)=a(n2+12,j) -a(n2+12,n2)*a(n2,j) a(n2+13,j)=a(n2+13,j) -a(n2+13,n2)*a(n2,j) 367 a(n2+14,j)=a(n2+14,j) -a(n2+14,n2)*a(n2,j) 5014 continue n2=n2+1 kk=n2-1+isamx2( 14,loc(a(n2,n2)),1) ipvt(n2)=kk if ( a(kk,n2).eq.0. ) go to 200 s=1./a(kk,n2) cdir$ ivdep do 361 j=n2,n ] pivot and normalize r=a(kk,j)*s a(kk,j)=a(n2,j) 361 a(n2,j)=r a(n2,n2)=s cdir$ ivdep do 362 j=n2+1,n ] reduce last 13*13 part of matrix a(n2+ 1,j)=a(n2+ 1,j) -a(n2+ 1,n2)*a(n2,j) a(n2+ 2,j)=a(n2+ 2,j) -a(n2+ 2,n2)*a(n2,j) a(n2+ 3,j)=a(n2+ 3,j) -a(n2+ 3,n2)*a(n2,j) a(n2+ 4,j)=a(n2+ 4,j) -a(n2+ 4,n2)*a(n2,j) a(n2+ 5,j)=a(n2+ 5,j) -a(n2+ 5,n2)*a(n2,j) a(n2+ 6,j)=a(n2+ 6,j) -a(n2+ 6,n2)*a(n2,j) a(n2+ 7,j)=a(n2+ 7,j) -a(n2+ 7,n2)*a(n2,j) a(n2+ 8,j)=a(n2+ 8,j) -a(n2+ 8,n2)*a(n2,j) a(n2+ 9,j)=a(n2+ 9,j) -a(n2+ 9,n2)*a(n2,j) a(n2+10,j)=a(n2+10,j) -a(n2+10,n2)*a(n2,j) a(n2+11,j)=a(n2+11,j) -a(n2+11,n2)*a(n2,j) a(n2+12,j)=a(n2+12,j) -a(n2+12,n2)*a(n2,j) 362 a(n2+13,j)=a(n2+13,j) -a(n2+13,n2)*a(n2,j) 5013 continue n2=n2+1 kk=n2-1+isamx2( 13,loc(a(n2,n2)),1) ipvt(n2)=kk if ( a(kk,n2).eq.0. ) go to 200 s=1./a(kk,n2) cdir$ ivdep do 356 j=n2,n ] pivot and normalize r=a(kk,j)*s a(kk,j)=a(n2,j) 356 a(n2,j)=r a(n2,n2)=s cdir$ ivdep do 357 j=n2+1,n ] reduce last 12*12 part of matrix a(n2+ 1,j)=a(n2+ 1,j) -a(n2+ 1,n2)*a(n2,j) a(n2+ 2,j)=a(n2+ 2,j) -a(n2+ 2,n2)*a(n2,j) a(n2+ 3,j)=a(n2+ 3,j) -a(n2+ 3,n2)*a(n2,j) a(n2+ 4,j)=a(n2+ 4,j) -a(n2+ 4,n2)*a(n2,j) a(n2+ 5,j)=a(n2+ 5,j) -a(n2+ 5,n2)*a(n2,j) a(n2+ 6,j)=a(n2+ 6,j) -a(n2+ 6,n2)*a(n2,j) a(n2+ 7,j)=a(n2+ 7,j) -a(n2+ 7,n2)*a(n2,j) a(n2+ 8,j)=a(n2+ 8,j) -a(n2+ 8,n2)*a(n2,j) a(n2+ 9,j)=a(n2+ 9,j) -a(n2+ 9,n2)*a(n2,j) a(n2+10,j)=a(n2+10,j) -a(n2+10,n2)*a(n2,j) a(n2+11,j)=a(n2+11,j) -a(n2+11,n2)*a(n2,j) 357 a(n2+12,j)=a(n2+12,j) -a(n2+12,n2)*a(n2,j) 5012 continue n2=n2+1 kk=n2-1+isamx2( 12,loc(a(n2,n2)),1) ipvt(n2)=kk if ( a(kk,n2).eq.0. ) go to 200 s=1./a(kk,n2) cdir$ ivdep do 351 j=n2,n ] pivot and normalize r=a(kk,j)*s a(kk,j)=a(n2,j) 351 a(n2,j)=r a(n2,n2)=s cdir$ ivdep do 352 j=n2+1,n ] reduce last 11*11 part of matrix a(n2+ 1,j)=a(n2+ 1,j) -a(n2+ 1,n2)*a(n2,j) a(n2+ 2,j)=a(n2+ 2,j) -a(n2+ 2,n2)*a(n2,j) a(n2+ 3,j)=a(n2+ 3,j) -a(n2+ 3,n2)*a(n2,j) a(n2+ 4,j)=a(n2+ 4,j) -a(n2+ 4,n2)*a(n2,j) a(n2+ 5,j)=a(n2+ 5,j) -a(n2+ 5,n2)*a(n2,j) a(n2+ 6,j)=a(n2+ 6,j) -a(n2+ 6,n2)*a(n2,j) a(n2+ 7,j)=a(n2+ 7,j) -a(n2+ 7,n2)*a(n2,j) a(n2+ 8,j)=a(n2+ 8,j) -a(n2+ 8,n2)*a(n2,j) a(n2+ 9,j)=a(n2+ 9,j) -a(n2+ 9,n2)*a(n2,j) a(n2+10,j)=a(n2+10,j) -a(n2+10,n2)*a(n2,j) 352 a(n2+11,j)=a(n2+11,j) -a(n2+11,n2)*a(n2,j) 5011 continue n2=n2+1 kk=n2-1+isamx2( 11,loc(a(n2,n2)),1) ipvt(n2)=kk if ( a(kk,n2).eq.0. ) go to 200 s=1./a(kk,n2) cdir$ ivdep do 346 j=n2,n ] pivot and normalize r=a(kk,j)*s a(kk,j)=a(n2,j) 346 a(n2,j)=r a(n2,n2)=s cdir$ ivdep do 347 j=n2+1,n ] reduce last 10*10 part of matrix a(n2+ 1,j)=a(n2+ 1,j) -a(n2+ 1,n2)*a(n2,j) a(n2+ 2,j)=a(n2+ 2,j) -a(n2+ 2,n2)*a(n2,j) a(n2+ 3,j)=a(n2+ 3,j) -a(n2+ 3,n2)*a(n2,j) a(n2+ 4,j)=a(n2+ 4,j) -a(n2+ 4,n2)*a(n2,j) a(n2+ 5,j)=a(n2+ 5,j) -a(n2+ 5,n2)*a(n2,j) a(n2+ 6,j)=a(n2+ 6,j) -a(n2+ 6,n2)*a(n2,j) a(n2+ 7,j)=a(n2+ 7,j) -a(n2+ 7,n2)*a(n2,j) a(n2+ 8,j)=a(n2+ 8,j) -a(n2+ 8,n2)*a(n2,j) a(n2+ 9,j)=a(n2+ 9,j) -a(n2+ 9,n2)*a(n2,j) 347 a(n2+10,j)=a(n2+10,j) -a(n2+10,n2)*a(n2,j) 5010 continue n2=n2+1 kk=n2-1+isamx2( 10,loc(a(n2,n2)),1) ipvt(n2)=kk if ( a(kk,n2).eq.0. ) go to 200 s=1./a(kk,n2) cdir$ ivdep do 341 j=n2,n ] pivot and normalize r=a(kk,j)*s a(kk,j)=a(n2,j) 341 a(n2,j)=r a(n2,n2)=s cdir$ ivdep do 342 j=n2+1,n ] reduce last 9*9 part of matrix a(n2+1,j)=a(n2+1,j) -a(n2+1,n2)*a(n2,j) a(n2+2,j)=a(n2+2,j) -a(n2+2,n2)*a(n2,j) a(n2+3,j)=a(n2+3,j) -a(n2+3,n2)*a(n2,j) a(n2+4,j)=a(n2+4,j) -a(n2+4,n2)*a(n2,j) a(n2+5,j)=a(n2+5,j) -a(n2+5,n2)*a(n2,j) a(n2+6,j)=a(n2+6,j) -a(n2+6,n2)*a(n2,j) a(n2+7,j)=a(n2+7,j) -a(n2+7,n2)*a(n2,j) a(n2+8,j)=a(n2+8,j) -a(n2+8,n2)*a(n2,j) 342 a(n2+9,j)=a(n2+9,j) -a(n2+9,n2)*a(n2,j) 5009 continue n2=n2+1 kk=n2-1+isamx2( 9,loc(a(n2,n2)),1) ipvt(n2)=kk if ( a(kk,n2).eq.0. ) go to 200 s=1./a(kk,n2) cdir$ ivdep do 336 j=n2,n ] pivot and normalize r=a(kk,j)*s a(kk,j)=a(n2,j) 336 a(n2,j)=r a(n2,n2)=s cdir$ ivdep do 337 j=n2+1,n ] reduce last 8*8 part of matrix a(n2+1,j)=a(n2+1,j) -a(n2+1,n2)*a(n2,j) a(n2+2,j)=a(n2+2,j) -a(n2+2,n2)*a(n2,j) a(n2+3,j)=a(n2+3,j) -a(n2+3,n2)*a(n2,j) a(n2+4,j)=a(n2+4,j) -a(n2+4,n2)*a(n2,j) a(n2+5,j)=a(n2+5,j) -a(n2+5,n2)*a(n2,j) a(n2+6,j)=a(n2+6,j) -a(n2+6,n2)*a(n2,j) a(n2+7,j)=a(n2+7,j) -a(n2+7,n2)*a(n2,j) 337 a(n2+8,j)=a(n2+8,j) -a(n2+8,n2)*a(n2,j) 5008 continue n2=n2+1 kk=n2-1+isamx2( 8,loc(a(n2,n2)),1) ipvt(n2)=kk if ( a(kk,n2).eq.0. ) go to 200 s=1./a(kk,n2) cdir$ ivdep do 331 j=n2,n ] pivot and normalize r=a(kk,j)*s a(kk,j)=a(n2,j) 331 a(n2,j)=r a(n2,n2)=s cdir$ ivdep do 332 j=n2+1,n ] reduce last 7*7 part of matrix a(n2+1,j)=a(n2+1,j) -a(n2+1,n2)*a(n2,j) a(n2+2,j)=a(n2+2,j) -a(n2+2,n2)*a(n2,j) a(n2+3,j)=a(n2+3,j) -a(n2+3,n2)*a(n2,j) a(n2+4,j)=a(n2+4,j) -a(n2+4,n2)*a(n2,j) a(n2+5,j)=a(n2+5,j) -a(n2+5,n2)*a(n2,j) a(n2+6,j)=a(n2+6,j) -a(n2+6,n2)*a(n2,j) 332 a(n2+7,j)=a(n2+7,j) -a(n2+7,n2)*a(n2,j) 5007 continue n2=n2+1 kk=n2-1+isamx2( 7,loc(a(n2,n2)),1) ipvt(n2)=kk if ( a(kk,n2).eq.0. ) go to 200 s=1./a(kk,n2) cdir$ ivdep do 326 j=n2,n ] pivot and normalize r=a(kk,j)*s a(kk,j)=a(n2,j) 326 a(n2,j)=r a(n2,n2)=s cdir$ ivdep do 327 j=n2+1,n ] reduce last 6*6 part of matrix a(n2+1,j)=a(n2+1,j) -a(n2+1,n2)*a(n2,j) a(n2+2,j)=a(n2+2,j) -a(n2+2,n2)*a(n2,j) a(n2+3,j)=a(n2+3,j) -a(n2+3,n2)*a(n2,j) a(n2+4,j)=a(n2+4,j) -a(n2+4,n2)*a(n2,j) a(n2+5,j)=a(n2+5,j) -a(n2+5,n2)*a(n2,j) 327 a(n2+6,j)=a(n2+6,j) -a(n2+6,n2)*a(n2,j) 5006 continue n2=n2+1 kk=n2-1+isamx2( 6,loc(a(n2,n2)),1) ipvt(n2)=kk if ( a(kk,n2).eq.0. ) go to 200 s=1./a(kk,n2) cdir$ ivdep do 321 j=n2,n ] pivot and normalize r=a(kk,j)*s a(kk,j)=a(n2,j) 321 a(n2,j)=r a(n2,n2)=s cdir$ ivdep do 322 j=n2+1,n ] reduce last 5*5 part of matrix a(n2+1,j)=a(n2+1,j) -a(n2+1,n2)*a(n2,j) a(n2+2,j)=a(n2+2,j) -a(n2+2,n2)*a(n2,j) a(n2+3,j)=a(n2+3,j) -a(n2+3,n2)*a(n2,j) a(n2+4,j)=a(n2+4,j) -a(n2+4,n2)*a(n2,j) 322 a(n2+5,j)=a(n2+5,j) -a(n2+5,n2)*a(n2,j) 5005 continue n2=n2+1 kk=n2-1+isamx2( 5,loc(a(n2,n2)),1) ipvt(n2)=kk if ( a(kk,n2).eq.0. ) go to 200 s=1./a(kk,n2) cdir$ ivdep do 316 j=n2,n ] pivot and normalize r=a(kk,j)*s a(kk,j)=a(n2,j) 316 a(n2,j)=r a(n2,n2)=s cdir$ ivdep do 317 j=n2+1,n ] reduce last 4*4 part of matrix a(n2+1,j)=a(n2+1,j) -a(n2+1,n2)*a(n2,j) a(n2+2,j)=a(n2+2,j) -a(n2+2,n2)*a(n2,j) a(n2+3,j)=a(n2+3,j) -a(n2+3,n2)*a(n2,j) 317 a(n2+4,j)=a(n2+4,j) -a(n2+4,n2)*a(n2,j) 5004 continue ] should be 4 n2=n2+1 kk=n2-1+isamx2( 4,loc(a(n2,n2)),1) ipvt(n2)=kk if ( a(kk,n2).eq.0. ) go to 200 s=1./a(kk,n2) cdir$ ivdep do 301 j=n2,n ] pivot and normalize r=a(kk,j)*s a(kk,j)=a(n2,j) 301 a(n2,j)=r a(n2,n2)=s cdir$ ivdep do 302 j=n2+1,n ] reduce last 3*3 part of matrix a(n2+1,j)=a(n2+1,j) -a(n2+1,n2)*a(n2,j) a(n2+2,j)=a(n2+2,j) -a(n2+2,n2)*a(n2,j) 302 a(n2+3,j)=a(n2+3,j) -a(n2+3,n2)*a(n2,j) 5003 continue ] should be 3 n2=n2+1 kk=n2-1+isamx2( 3,loc(a(n2,n2)),1) ipvt(n2)=kk if ( a(kk,n2).eq.0. ) go to 200 s=1./a(kk,n2) cdir$ ivdep do 306 j=n2,n ] pivot and normalize r=a(kk,j)*s a(kk,j)=a(n2,j) 306 a(n2,j)=r a(n2,n2)=s cdir$ ivdep do 307 j=n2+1,n ] reduce last 2*2 part of matrix a(n2+1,j)=a(n2+1,j) -a(n2+1,n2)*a(n2,j) 307 a(n2+2,j)=a(n2+2,j) -a(n2+2,n2)*a(n2,j) 5002 continue ] should be 2 n2=n2+1 kk=n2-1+isamx2( 2,loc(a(n2,n2)),1) ipvt(n2)=kk if ( a(kk,n2).eq.0. ) go to 200 s=1./a(kk,n2) cdir$ ivdep do 311 j=n2,n ] pivot and normalize r=a(kk,j)*s a(kk,j)=a(n2,j) 311 a(n2,j)=r a(n2,n2)=s a( n,n )=a( n,n ) -a( n,n-1)*a( n-1,n ) 5001 if ( a( n,n ).eq.0. ) go to 200 a( n,n )=1./a( n,n ) lump=.true. return 200 write(13,201) n 201 format(' lump found sing matrix',/, 1' n=',i6) lump=.false. return end cmic$ micro subroutine lumpker( a,nd1,n,n2 ,ipvt ,np ,lump ) real a(nd1,n) integer ipvt(n) logical prep3,lump,lumps common /qqwwee/ lumps * write(6,99) n *9 format(' lumpker called n=',i5) do 150 ipiv1=1,n2,3 ipiv3=ipiv1+2 cmic$ process if ( .not.lump ) goto 1 lumps= prep3( a,nd1,n,ipvt,ipiv1 ) 1 if ( .not.lumps ) then np=0 lump=.false. endif cmic$ end process ntot=n-ipiv3 cmic$ do global do 145 i=1,np i1=ipiv3+1+((i-1)*ntot)/np i2=ipiv3+1+((i )*ntot)/np call red3(a(ipiv1,ipiv3+1),nd1,n-ipiv3 1 , a(i1 ,ipiv3+1),i2-i1 2 , a(i1 ,ipiv1) ) 145 continue 150 continue return end subroutine uselu1( lda,a,n,ipvt,b ) real a(lda,n),b(n) integer ipvt(n) do 1 j=1,n-1 1 if ( ipvt(j).lt.1 .or. ipvt(j).gt.n ) go to 200 n2=0 if ( n.lt.16 ) go to 151 nhops=(n-13)/3 n2=3*nhops do 150 ipiv1=1,n2,3 r=b(ipvt(ipiv1)) b(ipvt(ipiv1))=b(ipiv1) b(ipiv1)=r*a(ipiv1,ipiv1) c --- find second pivot point --- ipiv2=ipiv1+1 ipiv3=ipiv2+1 r=b(ipvt(ipiv2)) b(ipvt(ipiv2))=b(ipiv2) b(ipiv2)=r*a(ipiv2,ipiv2) c --- find third pivot point --- r=b(ipvt(ipiv3)) b(ipvt(ipiv3))=b(ipiv3) b(ipiv3)=r*a(ipiv3,ipiv3) c --- fix rows ipiv2 thru ipiv3 --- b(ipiv2 )= b(ipiv2 )-a(ipiv2,ipiv1)*b(ipiv1) b(ipiv3 )=(b(ipiv3 )-a(ipiv3,ipiv1)*b(ipiv1)) 1 -a(ipiv3,ipiv2)*b(ipiv2) cdir$ ivdep do 140 i=ipiv3+1,n 140 b(i )=(((b(i )-a(i,ipiv1)*b(ipiv1 )) 1 -a(i,ipiv2)*b(ipiv2 )) 2 -a(i,ipiv3)*b(ipiv3 )) 150 continue c *** now reduce to upper triangular form with unit diagnols c *** by standard gauss elimination 151 do 175 i=n2+1,n-1 c *** find pivot row r=b(ipvt(i)) b(ipvt(i))=b(i) b(i)=r*a(i,i) c *** use pivot row to eliminate rest of column i cdir$ ivdep do 170 j=i+1,n 170 b(j )=b(j )-a(j,i)*b(i ) 175 continue b(n)=b(n)*a(n,n) c *** now back subst to complete solution do 180 i=n,2,-1 cdir$ ivdep do 180 k=1,i-1 180 b(k )=b(k )-b(i )*a(k,i) return 201 format(' uselu1 error bad pivot') 200 write(6,201) stop end .