job,jn=cftlist,t= 77.us=bnchbch.
account,ac=865606,us=u2240,upw=u2240.
cft,l=0.
fetch,dn=bndchl,text=for.
cft,i=bndchl,l=0.
fetch,dn=mxmup,text=cal1,mf=v3.
cal,i=mxmup,l=0.
fetch,dn=banded,text=for.
cft,i=banded,l=0.
segldr,i.
xxx.
/eof
      real abnd(101,5000) ] sqrt of matrix
     *    ,bbnd(201,5000) ] full matrix for bndchl
     *    ,cbnd(201,5000) ] full matrix for spbfa
     *    ,rhs1(5000)     ] right-hand-side for bndchl
     *    ,rhs2(5000)     ] "               "   spbfa
      common /spacer/ abnd,bbnd,cbnd

      integer mh(21)
      data mh / 2,3,4,6,8 ,12,16,24,32,33 ,34,40,48 ] 13
     * ,64,65,66,70,84 ,96,97,100 /                  ] 8

      n=5000
         do 200 im=1,21
         m1=mh(im)
         m2=2*m1
         write(6,1) m2,n
1        format(' problem size  m,n',2i6)

      t0=second()
      do 2 j=1,101*5000
2     abnd(j,1)=0.
      do 3 j=1,201*5000
3     bbnd(j,1)=0.

         do 5 i=1,n
5        abnd(m1+1,i)=1.+ranf()
      s=.75
         do 10 l=m1,1,-1
            do 8 i=1,n
8           abnd(l,i)=s*(ranf()-.4)
10       s=.8*s
         do 15 i=1,m1
         do 15 j=1,m1+1-i
15       abnd(j,i)=0.

      call symmsq( abnd,101 ,n,m1 ,bbnd,201,mc )

         do 30 j=1,201*5000
30       cbnd(j,1)=bbnd(j,1)

         do 35 j=1,n
         rhs1(j)=ranf()-.5
35       rhs2(j)=rhs1(j)
      tims=tims+second()-t0


      t1=second()
      call bndchl( bbnd,201,n,m2  ,infox )
      t2=second()

      call spbfa(  cbnd,201,n,m2  ,infol )
      t3=second()
      tt=t2-t1
      t3=t3-t2
      speed=t3/tt
      write(6,38) infox,tt  ,infol,t3  ,speed
38    format(' bndchl   info,time',i6,f12.6,/,
     1       ' spbfa    info,time',i6,f12.6 ,5x,' ratio',f8.3)

      flops= n*(  m2*(m2+1) +m2 +16 )
      rate=1.e-6*flops/tt
      write(6,44) rate
44    format(' rate=',g16.6)

      call bndchls( bbnd,201,n,m2  ,rhs1,rhs1 )
      call spbsl  ( cbnd,201,n,m2  ,rhs2 )

      siz=0.
      errs=0.
         do 45 j=1,n
         errs=errs+(rhs1(j)-rhs2(j))**2
45       siz=siz+rhs1(j)**2
      write(6,46) errs,siz  ,tims
46    format(' errs,size',2g16.7,5x,' setup time so far',f9.3
     *,/,1x,35(2h-+))
200   continue

      stop
      end

      subroutine getrow( a,lda ,n,m ,i ,row )
      real a(lda,n) ,row(n)

      data lasti/-99999/

c     --- gets the ith row of a symmetric banded matrix ---

      j1=1
      j2=n
         if ( i.eq.lasti+1 ) then
         j1=max0( 1,i-2*m-10 )
         j2=min0( n,i+2*m )
      endif
      lasti=i
         do 1 j=j1,j2
1        row(j)=0.

         do 2 j=0,min0(m,i-1)  ] j represents distance left of diagnol
2        row(i-j)=a(m+1-j,i)

         do 3 j=1,min0(m,n-i)  ] j represents distance right of diagnol
3        row(i+j)=a(m+1-j,i+j)

      return
      end

      subroutine getcol( a,lda ,n,m ,i ,row )
      real a(lda,n) ,row(n)

      data lasti/-99999/

c     --- gets the ith col of a symmetric banded matrix ---

      if ( iabs(i-lasti).lt.10 ) then
         s=1.33*float(m)
         k=s+16
         j1=max0(1,i-k)
            do 11 j=j1,i-m+5
11          row(j)=0.
         j2=min0(n,i+k)
            do 12 j=i+m-5,j2
12          row(j)=0.
      else
         do 1 j=1,n
1        row(j)=0.
      endif

      lasti=i

         do 2 j=0,min0(m,i-1)  ] j represents distance left of diagnol
2        row(i-j)=a(m+1-j,i)

         do 3 j=1,min0(m,n-i)  ] j represents distance right of diagnol
3        row(i+j)=a(m+1-j,i+j)

      return
      end

      subroutine putval( a,lda ,n,m ,i,j ,val )
      real a(lda,n)

c     --- stores an element into a symmetric band store matrix ---

      if ( iabs(i-j).gt.m ) return
      if ( i.lt.1 .or. j.lt.1 .or. i.gt.n .or. j.gt.n ) return
      ii=max0(i,j)
c     jj=iabs(i-j)
      a(m+1-iabs(i-j),ii)=val
      return
      end

      subroutine symmsq( a,lda,n,m ,as,ldas,ms )
      real a(lda,n) ,as(ldas,n)
     *  ,r(10000),c(10000)

c   --- squares the symmetric matrix a ---

      ms=2*m
         do 1 j=1,ms+1
         do 1 i=1,n
1        as(j,i)=0.

c     k1=1
c     k2=n
      s=1.3*(m+6)
      kpos=s

         do 10 i=1,n
         call getrow( a,lda,n,m ,i,r )

         do 10 j=max0(1,i-ms),i
         call getcol( a,lda,n,m ,j,c )

         s=0.
c ---------------------------------
      k1=max0( 1,min0(i,j)-kpos )
      k2=min0( n,max0(i,j)+kpos )
c ---------------------------------
            do 5 k=k1,k2
5           s=s+r(k)*c(k)

         call putval( as,ldas,n,ms ,i,j ,s )
10       continue

      return
      end
/eof
abs=xxx
*deck bndchl for
      subroutine bndchl( a,lda ,n,m ,info )
      real a(lda,n)
c driver for high-speed banded-cholesky decomposition.
c algorith is the same as dongarras matrix times vector method for dense
c matrices -only this is applied to banded systems.

      real rowi(1000)

c  the first m rows we use ordinary banded decomp
c  (we could decompose this into 2 matrix times vector operations
c   where one matrix was rectangular and one was triangular)
c  -but i didn't bother to.

         do 50 i=1,m

c        --- get rowi ---

         j1=max0(m+2-i,1)
            do 10 j=j1,m
10          rowi(j)=-a(j,i)

         nrows=min0(m+1,n+1-i)
            do 30 jj=j1,m
cdir$ ivdep
            do 30 ii=1,min0(jj,nrows)
30          a(m+2-ii,i+ii-1)=a(m+2-ii,i+ii-1)
     *      +rowi(jj)*a(jj+1-ii,i-1+ii)

      info=i
      if(a(m+1,i).le.0. ) go to 60
         a(m+1,i)=1./sqrt(a(m+1,i))
            do 40 ii=1,nrows-1
40          a(m+1-ii,i+ii)=a(m+1-ii,i+ii)*a(m+1,i)

50       continue

c     central rows of the matrix we have only triangular matrix times
c     times vector operations

      j1=1
      nrows=     m+1
         do 150 i=m+1,n-m

c        --- get rowi ---

            do 110 j=j1,m
110         rowi(j)=-a(j,i)

c  do 130 accomplishes the same result as call mxmup
c           do 130 jj=j1,m
c           do 130 ii=1,nrows
c30         if ( jj+1-ii .gt. 0 )
c    *      a(m+2-ii,i+ii-1)=a(m+2-ii,i+ii-1)
c    *      +rowi(jj)*a(jj+1-ii,i-1+ii)
         call mxmup( a(1,i),lda-1 ,rowi,1 ,a(m+1,i),lda-1 ,m )

      info=i
      if(a(m+1,i).le.0. ) go to 60
         a(m+1,i)=1./sqrt(a(m+1,i))
            do 140 ii=1,nrows-1
140         a(m+1-ii,i+ii)=a(m+1-ii,i+ii)*a(m+1,i)

150      continue

c  again do 250 could be accomplished by matrix times vector operations
c  where we have both rectangular and triangular matrices

         do 250 i=n+1-m,n

c        --- get rowi ---

         j1=max0(m+2-i,1)
            do 210 j=j1,m
210         rowi(j)=-a(j,i)

         nrows=min0(m+1,n+1-i)
            do 230 jj=j1,m
cdir$ ivdep
            do 230 ii=1,min0(jj,nrows)   ] nrows
230      ]  if ( jj+1-ii .gt. 0 )
     *      a(m+2-ii,i+ii-1)=a(m+2-ii,i+ii-1)
     *      +rowi(jj)*a(jj+1-ii,i-1+ii)

      info=i
      if(a(m+1,i).le.0. ) go to 60
         a(m+1,i)=1./sqrt(a(m+1,i))
            do 240 ii=1,nrows-1
240         a(m+1-ii,i+ii)=a(m+1-ii,i+ii)*a(m+1,i)

250      continue
      info=0
60    return
      end

c   --- a fortran version (no attempt at efficiency) of the kernal
      subroutine mxmupf( a,lda ,r,ldr ,y,ldy ,m )
      real a(lda,m),r(ldr,m),y(ldy,m)

         do 10 i=1,m
         do 10 j=1,m
10       if ( j.ge.i ) y(1,i)=y(1,i)+ r(1,j)*a(j,i)

      return
      end

      subroutine bndchls( a,lda ,n,m ,x,b )
      real a(lda,n) ,x(n),b(n)

c   solve ax=b : where a is cholesky decomposition (output from bndchl)
c   valid for the case where x and b occupy the same memory storage
c   as well as the case where x ,and b as disjoint (in memory)

      if ( loc(x).ne.loc(b) ) then
         do 1 j=1,n
1        x(j)=b(j)
      endif

         do 20 k=1,n
         x(k)=x(k)*a(m+1,k)
cdir$ ivdep
            do 10 i=k+1,min0(k+m,n)
10          x(i)=x(i)-x(k)*a(m+k+1-i,i)
20       continue

         do 40 k=n,1,-1
         x(k)=x(k)*a(m+1,k)
cdir$ ivdep
            do 30 i=1,min0(k-1,m)
30          x(k-i)=x(k-i)-x(k)*a(m+1-i,k)
40       continue

      return
      end
      subroutine llts (a,lda,n,x,b)
      real a(lda,*), x(*), b(*), xk
c
c   purpose:
c     solve the symmetric positive definite system ax = b given the
c     cholesky factorization of a (as computed in llt).
c
c   additional parameters (not parameters of llt):
c
c     x real(n), solution of linear system
c
c     b real(n), right-hand-side of linear system
c
c ----------------------------------------------------------------------
c
      do 10 k = 1, n
       x(k) = b(k)
   10 continue
c
      do 30 k = 1, n ]]]]]]]]]]]]]]]
       xk = x(k)*a(k,k)
       do 20 i = k+1, n
          x(i) = x(i) - a(i,k)*xk
   20    continue
       x(k) = xk
   30 continue
c
      do 50 k = n, 1, -1
       xk = x(k)*a(k,k)
       do 40 i = 1, k-1
          x(i) = x(i) - a(k,i)*xk
   40    continue
       x(k) = xk
   50 continue
c
      return
      end
      subroutine llt1(a,lda,n,rowi,info)
c ** llt.f -- llt decomposition
c
      integer n,lda,info
      real a(lda,n),rowi(n),t
      character*1 charn
c
         charn='n'
      info = 0
      do 30 i = 1, n ]]] should be n
         do 10 j = 1, i-1
            rowi(j) = -a(i,j)
   10    continue
c
         call smxpy1(n-i+1,a(i,i),i-1,lda,rowi,a(i,1))
c        charn='n'
c        call sgemv (charn ,n-i+1 ,i-1 ,1. ,a(i,1),lda
c    *              ,rowi,1 ,a(i,i),1 ,infos )
c     if ( infos.ne.0 ) then
c     write(6,77) infos,i
c7    format(' ????? info ne 0',6x,'info',i5,'  i',i5)
c     stop
c     endif

c
         if (a(i,i) .le. 0) then
            info = i
            go to 40
         endif
         a(i,i) = 1/sqrt(a(i,i))
c
         t = a(i,i)
         do 20 j = i+1, n
            a(j,i) = t*a(j,i)
   20    continue
   30 continue
   40 return
      end
      subroutine smxpy1 (n1,y,n2,ldm,x,m)
c ** smxpy01.f -- smxpy unrolled to a depth of 1
c
      integer ldm,n1,n2
      real y(n1),x(n2),m(ldm,n2)
c
      do 20 j = 1, n2
         do 10 i = 1, n1
            y(i) = (y(i)) + x(j)*m(i,j)
   10    continue
   20 continue
      return
      end
*deck mxmup cal1

*        subroutine mxmup( q,ldq ,r,ldr ,y,ldy ,m )
*        real q(ldq,m),r(ldr,m),y(ldy,m)
*
*           do 10 i=1,m
*           do 10 j=1,m
*  10       if ( j.ge.i ) y(1,i)=y(1,i)+ r(1,j)*q(j,i)
*
*        return
*        end

          ident     mxmup
q         defarg                   matrix
ldq       defarg                   leading dim. of q
r         defarg                   vector multiplier
ldr       defarg                   leading dim of r
y         defarg                   vector to be updated
ldy       defarg                   leading dim of y
m         defarg                   order of problem

lmajy     defb      temp           @y at begining of a major iteration
lmajr     defb      temp           @r at begining of a major iteration
lmajq     defb      temp           @q at begining of a major iteration
*                         -----    note: stored @q-1
lminr     defb      temp           @r at begining of a minor iteration

nmaj      defb      temp           # of elements left to process at the
                                   beginning of a major iteration
nmin      defb      temp           # of elements left to process at the
                                   beginning of a minor iteration

dimq      defb      temp           leading dim of q
dimqp     defb      temp           leading dim of q +1
dimy      defb                     leading dim of y
dimr      defb                     leading dim of r

lmajyx    defb                     @y at end of a major iteration
lmajrx    defb                     @r at end of a major iteration
lmajqx    defb                     @q at end of a major iteration

nmajx     defb                     # of elements left to process at the
                                   end of a major iteration

mxmup     enter

          argadd    s1,q,use=a7
          argadd    a1,ldq,argptr=a7
          argadd    a2,r,argptr=a7
          argadd    a3,ldr,argptr=a7
          argadd    a4,y,argptr=a7
          argadd    a5,ldy,argptr=a7
          argadd    a6,m,argptr=a7

          a7        s1
          a1        ,a1
          a3        ,a3
          a5        ,a5
          a6        ,a6

          a7        a7-1
          b.lmajr   a2
          b.lmajq   a7
          b.lmajy   a4

          b.dimq    a1
          a1        a1+1
          b.dimr    a3
          b.dimqp   a1
          b.dimy    a5
          b.nmaj    a6

          a0        -a6            test if m<1
          jap       return

major     =         *

          a7        b.nmaj
          vl        a7

          a0        b.lmajr        load rs
          a1        b.dimr
          v0        ,a0,a1
          a6        vl
          a1        a1*a6
          a2        b.lmajr
          a2        a2+a1
          b.lmajrx  a2
          b.lminr   a2

          a0        b.lmajy        load ys
          a1        b.dimy
          v1        ,a0,a1
          a1        a1*a6
          a2        b.lmajy
          a2        a2+a1
          b.lmajyx  a2

*     --- book-keeping for loop -------
          a7        a7-a6
          a2        a6-1           counter (for r in v0)
*         a6                       block vl
          a5        a6             decrementing vl
          a4        b.lmajq        @q
          a4        a4+a6
          a3        b.dimq         ldq
          b.nmajx   a7
          s0        +a7            test after loop for end.
          s4        a4             save @q for minor its

loopm     =         *

          a0        a4
          v2        ,a0,a3

          s1        v0,a2
          a2        a2-1
          a5        a5-1
          a4        a4-1
          a0        a5-1           if next vl=0 then quit
          v3        s1*rv2
          v1        v1+fv3
          vl        a5

          jam       loopmf

          a0        a4
          v5        ,a0,a3

          s1        v0,a2
          a2        a2-1
          a5        a5-1
          a4        a4-1
          a0        a5-1

          v6        s1*rv5
          v1        v1+fv6
          vl        a5
          jap       loopm
loopmf    =         *
          jsz       done           if s0=0 then we are finished.

*   --- prepare for minor iterations ---
          a4        s4
          a4        a4+1           @q loop begining

*   --- compute minor iterations (note if no minor iterations done)
minor     =         *              #################################

          a0        b.lminr
          a1        b.dimr
          vl        a5
          v0        ,a0,a1         load next 64 rs
          a5        vl             =64
          a1        a1*a5          vl*ldr
          a2        b.lminr
          vl        a6             vl in y dir.
          a2        a2+a1
          a7        a7-a5          # rs remaining after this minor iterat
          b.lminr   a2

          s0        +a7            done with this set of minor its?
          a3        0              counter for scalar rs (in v0)
          a2        b.dimq

loop      =         *              +++++++++++++++++++++++++++++

          a0        a4
          v2        ,a0,a2
          s1        v0,a3
          a3        a3+1
          a4        a4+1
*         a0        a5-a3          don't need since 64 is even
          v3        s1*rv2
          v4        v1+fv3
*         jap       loop+

          a0        a4             2nd copy (unrolled by 2)
          v5        ,a0,a2
          s1        v0,a3
          a3        a3+1
          a4        a4+1
          a0        a5-a3
          v6        s1*rv5
          v1        v4+fv6
          jan       loop           +++++++++++++++++++++++++++++

          jsn       minor          more minor its ?

*   --- store out y ---
          a0        b.lmajy
          a5        b.dimy
          ,a0,a5    v1
***  j     return                  ---- //cludge for testing only// ----

*   --- prep for next major iter ---
          a3        b.nmajx
          b.nmaj    a3

          a3        b.lmajyx

          a4        b.dimqp
          a4        a6*a4
          b.lmajy   a3
          a3        b.lmajq
          a1        b.lmajrx
          a3        a3+a4
          b.lmajr   a1
          b.lmajq   a3


          j         major

* -------------------------------------------------


done      vl        a6
          a0        b.lmajy
          a5        b.dimy
          ,a0,a5    v1


return    =         *
          exit
          end
*deck banded for
      subroutine spbfa(abd,lda,n,m,info)
      integer lda,n,m,info
      real abd(lda,1)
c
c     spbfa factors a real symmetric positive definite matrix
c     stored in band form.
c
c     spbfa is usually called by spbco, but it can be called
c     directly with a saving in time if  rcond  is not needed.
c
c     on entry
c
c        abd     real(lda, n)
c                the matrix to be factored.  the columns of the upper
c                triangle are stored in the columns of abd and the
c                diagonals of the upper triangle are stored in the
c                rows of abd .  see the comments below for details.
c
c        lda     integer
c                the leading dimension of the array  abd .
c                lda must be .ge. m + 1 .
c
c        n       integer
c                the order of the matrix  a .
c
c        m       integer
c                the number of diagonals above the main diagonal.
c                0 .le. m .lt. n .
c
c     on return
c
c        abd     an upper triangular matrix  r , stored in band
c                form, so that  a = trans(r)*r .
c
c        info    integer
c                = 0  for normal return.
c                = k  if the leading minor of order  k  is not
c                     positive definite.
c
c     band storage
c
c           if  a  is a symmetric positive definite band matrix,
c           the following program segment will set up the input.
c
c                   m = (band width above diagonal)
c                   do 20 j = 1, n
c                      i1 = max0(1, j-m)
c                      do 10 i = i1, j
c                         k = i-j+m+1
c                         abd(k,j) = a(i,j)
c                10    continue
c                20 continue
c
c     linpack.  this version dated 08/14/78 .
c     cleve moler, university of new mexico, argonne national lab.
c
c     subroutines and functions
c
c     blas sdot
c     fortran max0,sqrt
c
c     internal variables
c
      real sdot,t
      real s
      integer ik,j,jk,k,mu
c     begin block with ...exits to 40
c
c
         do 30 j = 1, n
            info = j
            s = 0.0e0
            ik = m + 1
            jk = max0(j-m,1)
            mu = max0(m+2-j,1)
ccc         if (m .lt. mu) go to 20
            do 10 k = mu, m
               t = abd(k,j) - sdot(k-mu,abd(ik,jk),1,abd(mu,j),1)
               t = t/abd(m+1,jk)
               abd(k,j) = t
               s = s + t*t
               ik = ik - 1
               jk = jk + 1
   10       continue
ccc20       continue
            s = abd(m+1,j) - s   ] main diagnol
c     ......exit
            if (s .le. 0.0e0) go to 40
            abd(m+1,j) = sqrt(s)
   30    continue
         info = 0
   40 continue
      return
      end
      subroutine spbsl(abd,lda,n,m,b)
      integer lda,n,m
      real abd(lda,1),b(1)
c
c     spbsl solves the real symmetric positive definite band
c     system  a*x = b
c     using the factors computed by spbco or spbfa.
c
c     on entry
c
c        abd     real(lda, n)
c                the output from spbco or spbfa.
c
c        lda     integer
c                the leading dimension of the array  abd .
c
c        n       integer
c                the order of the matrix  a .
c
c        m       integer
c                the number of diagonals above the main diagonal.
c
c        b       real(n)
c                the right hand side vector.
c
c     on return
c
c        b       the solution vector  x .
c
c     error condition
c
c        a division by zero will occur if the input factor contains
c        a zero on the diagonal.  technically this indicates
c        singularity but it is usually caused by improper subroutine
c        arguments.  it will not occur if the subroutines are called
c        correctly and  info .eq. 0 .
c
c     to compute  inverse(a) * c  where  c  is a matrix
c     with  p  columns
c           call spbco(abd,lda,n,rcond,z,info)
c           if (rcond is too small .or. info .ne. 0) go to ...
c           do 10 j = 1, p
c              call spbsl(abd,lda,n,c(1,j))
c        10 continue
c
c     linpack.  this version dated 08/14/78 .
c     cleve moler, university of new mexico, argonne national lab.
c
c     subroutines and functions
c
c     blas saxpy,sdot
c     fortran min0
c
c     internal variables
c
      real sdot,t
      integer k,kb,la,lb,lm
c
c     solve trans(r)*y = b
c
      do 10 k = 1, n
         lm = min0(k-1,m)
         la = m + 1 - lm
         lb = k - lm
         t = sdot(lm,abd(la,k),1,b(lb),1)
         b(k) = (b(k) - t)/abd(m+1,k)
   10 continue
c
c     solve r*x = y
c
      do 20 kb = 1, n
         k = n + 1 - kb
         lm = min0(k-1,m)
         la = m + 1 - lm
         lb = k - lm
         b(k) = b(k)/abd(m+1,k)
         t = -b(k)
         call saxpy(lm,t,abd(la,k),1,b(lb),1)
   20 continue
      return
      end

.