Date: Wed, 22 Jul 92 22:17:51 -0500
From: chong@pop.stat.purdue.edu (Chong Gu)
Subject: rkpk at netlib

Dear Dr. Grosse:

RKPACK in GCV has been revised and expanded.  It is repackaged and
deposited also to STATLIB archived at statlib@temper.stat.cmu.edu.  To
keep the consistency of the software, I would appreciate your help in
replacing the one at NETLIB by the following bundle.  If the file is
too big for e-mail at your site, you may consider instructing the
users to ftp it or directing the users to statlib@temper.stat.cmu.edu
(send rkpk from general).  If possible, a note about the update to all
the previous subscribers will be greatly appreciated.

With best regards,

Chong Gu


#	This is a shell archive.
#	Remove everything above and including the cut line.
#	Then run the rest of the file through sh.
#----cut here-----cut here-----cut here-----cut here----#
#!/bin/sh
# shar:	Shell Archiver
#	Run the following text with /bin/sh to create:
#	rkpk
# This archive created: Wed Jul 22 22:06:27 1992
# By:	Chong Gu (Purdue University Statistics Department)
mkdir rkpk
cd rkpk
mkdir demo
cd demo
cat << \SHAR_EOF > tensor.r
#   THIS PROGRAM ILLUSTRATES THE USE OF RKPACK ROUTINES IN FITTING A MODEL
#        y = C + f1(x1) + f2(x2) + f3(x3) + f12(x1,x2) + e
#   ON [0,1]^3 USING TENSOR PRODUCT SPLINES WITH CUBIC SPLINE SPACE AS MARGINALS
#   AND WITH INTEGRATION SIDE CONDITIONS.  THE PROGRAM CALCULATES THE FIT AND THE
#   COMPONENT-WISE BAYESIAN CONFIDENCE INTERVALS ON THE DESIGN POINTS, AND COLLECTS
#   COVERAGE PERCENTAGES FOR INTERVALS OF NOMINAL COVERAGES 95%, 90%, 75%, AND 50%.
#   THE RESULTS IN SECTION 6 OF GU AND WAHBA (1992, UW-TR-881-REV) WERE GENERATED
#   USING THIS PROGRAM.

program  tensor

#   CAUTION:  nobs=200 TAKES A LOT OF MEMORY.
parameter  ( nobs = 100, nnull = 5, nq = 6, k = 3, nrep = 5 )

#   PARAMETERS:
#        nobs     number of observations. 
#        nnull    dimension of null space.
#        nq       number of smoothing parameters.
#        k        number of variables.
#        nrep     number of replicates requested.

double precision  x(nobs,k), s(nobs,nnull), swk(nobs,nnull), q(nobs,nobs,nq),_
                  qwk(nobs,nobs,nq), y(nobs), ywk(nobs), prec, theta(nq), nlaht,_
                  score, varht, b, c(nobs), d(nnull), dwk(nobs*nobs*(nq+2)),_
                  cr(nobs,nobs,nnull), dr(nnull,nobs,nnull), qraux(nnull),_
                  sms(nnull,nnull), limnla(2), tmp, rc, dfm, dfi, ddot,_
                  f(nobs,nnull), nsize
real              uni, rnor
integer           info, i, j, jjj, init, maxiter, jpvt(nnull), ct(5,4), dseed, nseed, infosv


#   SET ALGORITHMIC PARAMETERS
init = 0
prec = 1.d-6
maxiter = 30

#   INPUT SIMULATION PARAMETERS
read (*,*) dseed, nseed, nsize    #SEED FOR DESIGN, SEED FOR NOISE, STD OF NOISE
write (*,*) 'Number of observations', nobs
write (*,*) 'Seed for uniform design', dseed   # 2375 WAS USED IN THE SIMULATIONS
write (*,*) 'Seed for Gaussian noise', nseed   # 5732 WAS USED IN THE SIMULATIONS
write (*,*) 'Standard deviation of noise', sngl (nsize)   # 1, 3, AND 10 USED IN SIMULATIONS

#   GENERATE THE DESIGN
tmp = dble (uni (dseed))
for (j=1;j<=nobs;j=j+1) {
    for (i=1;i<=k;i=i+1)  x(j,i) = dble (uni (0))
}

#   GENERATE THE TEST FUNCTION
for (j=1;j<=nobs;j=j+1) {
    f(j,2) = dfm (x(j,1), 1)   # dfm AND dfi ARE APPENDED AT THE END OF THIS PROGRAM
    f(j,3) = dfm (x(j,2), 2)
    f(j,4) = dfm (x(j,3), 3)
    f(j,5) = dfi (x(j,1), x(j,2), 1, 2)
    f(j,1) = 1.d0 + f(j,2) + f(j,3) + f(j,4) + f(j,5)
}

#   GENERATE THE MATRIX S
call  dset (nobs, 1.d0, s(1,1), 1)
for (j=1;j<=nobs;j=j+1) {
    s(j,2) = x(j,1) - .5d0   #
    s(j,3) = x(j,2) - .5d0   #   MAIN EFFECTS TERMS
    s(j,4) = x(j,3) - .5d0   #
    s(j,5) = s(j,2) * s(j,3)   #   x1-x2 INTERACTION TERM
}

#   GENERATE THE MATRICES $\tilde{\Sigma}_{\beta}$ ($\tilde{Q}_{\beta}$)
for (j=1;j<=nobs;j=j+1) {
    for (i=j;i<=nobs;i=i+1) {
        q(i,j,1) = rc (x(i,1), x(j,1)) #
        q(i,j,2) = rc (x(i,2), x(j,2)) #   MAIN EFFECTS TERMS, rc APPENDED AT THE END
        q(i,j,3) = rc (x(i,3), x(j,3)) #
        q(i,j,4) = q(i,j,1) * s(i,3) * s(j,3) #
        q(i,j,5) = s(i,2) * s(j,2) * q(i,j,2) #   x1-x2 INTERACTION TERMS
        q(i,j,6) = q(i,j,1) * q(i,j,2)        #
    }
}

#   START OF REPLICATION
tmp = dble (rnor (nseed))
for (jjj=1;jjj<=nrep;jjj=jjj+1) {

#   GENERATE THE RESPONSE y
for (j=1;j<=nobs;j=j+1)  y(j) = f(j,1) + dble (rnor (0)) * nsize

#   UNBLOCK NEXT LINE IF ONLY REPLICATE #1 IS OF INTEREST
#if ( jjj != 1 )  next

#   CALL RKPACK DRIVER FOR MODEL FITTING
call  dcopy (nobs*nobs*nq, q, 1, qwk, 1)
call  dcopy (nobs*nnull, s, 1, swk, 1)
call  dcopy (nobs, y, 1, ywk, 1)
call  dmudr ('v',_
             swk, nobs, nobs, nnull, qwk, nobs, nobs, nq, ywk,_
             0.d0, init, prec, maxiter,_
             theta, nlaht, score, varht, c, d,_
             dwk, info)
infosv = info

#   GENERATE (\theta R)'s IN qwk FOR CALCULATING c_r AND d_r
for (j=1;j<=nobs;j=j+1)  call  dset (nobs-j+1, 0.d0, qwk(j,j,1), 1)
#   (\theta R) FOR OVERALL FUNCTION
for (i=1;i<=nq;i=i+1) {
    for (j=1;j<=nobs;j=j+1)
        call  daxpy (nobs-j+1, 10.d0**theta(i), q(j,j,i), 1, qwk(j,j,1), 1)
}
#   (\theta R)'s FOR THE MAIN EFFECTS
for (i=1;i<=3;i=i+1) {
    for (j=1;j<=nobs;j=j+1) {
        call  dcopy (nobs-j+1, q(j,j,i), 1, qwk(j,j,i+1), 1)
        call  dscal (nobs-j+1, 10.d0**theta(i), qwk(j,j,i+1), 1)
    }
}
#   (\theta R) FOR THE INTERACTION
for (j=1;j<=nobs;j=j+1) {
    call  dset (nobs-j+1, 0.d0, qwk(j,j,5), 1)
    for (i=4;i<=6;i=i+1)
        call  daxpy (nobs-j+1, 10.d0**theta(i), q(j,j,i), 1, qwk(j,j,5), 1)
}
#   FILL THE UPPER TRIANGLES
for (i=1;i<=5;i=i+1) {
    for (j=1;j<=nobs;j=j+1)
        call  dcopy (nobs-j, qwk(j+1,j,i), 1, qwk(j,j+1,i), nobs)
}

#   MATRIX DECOMPOSITION FOR CALCULATING c_r, d_r, AND sms
for (j=1;j<=nobs;j=j+1)  call  dcopy (nobs-j+1, qwk(j,j,1), 1, qwk(j,j,6), 1)
call  dcopy (nobs*nnull, s, 1, swk, 1)
call  dcopy (nobs, y, 1, ywk, 1)
limnla(1) = nlaht - 1.d0
limnla(2) = nlaht + 1.d0
call  dsidr ('v',_
             swk, nobs, nobs, nnull, ywk, qwk(1,1,6), nobs,_
             0.d0, -1, limnla,_
             nlaht, score, varht, c, d,_
             qraux, jpvt, dwk,_
             info)
if ( info != 0 )  stop
#   CALCULATE b
b = varht / 10.d0**nlaht

#   CALCULATE c_r, d_r, AND sms
for (i=1;i<=5;i=i+1) {
    call  dcrdr (swk, nobs, nobs, nnull, qraux, jpvt, qwk(1,1,6), nobs, nlaht,_
                 qwk(1,1,i), nobs, nobs, cr(1,1,i), nobs, dr(1,1,i), nnull,_
                 dwk, info)
}
call  dsms (swk, nobs, nobs, nnull, jpvt, qwk(1,1,6), nobs, nlaht,_
            sms, nnull, dwk, info)

#   GENERATE (\theta R)'s IN qwk FOR ESTIMATE EVALUATIONS
for (j=1;j<=nobs;j=j+1)  call  dset (nobs-j+1, 0.d0, qwk(j,j,1), 1)
#   (\theta R) FOR THE OVERALL FUNCTION
for (i=1;i<=nq;i=i+1) {
    for (j=1;j<=nobs;j=j+1)
        call  daxpy (nobs-j+1, 10.d0**theta(i), q(j,j,i), 1, qwk(j,j,1), 1)
}
#   (\theta R)'s FOR THE MAIN EFFECTS TERMS
for (i=1;i<=3;i=i+1) {
    for (j=1;j<=nobs;j=j+1) {
        call  dcopy (nobs-j+1, q(j,j,i), 1, qwk(j,j,i+1), 1)
        call  dscal (nobs-j+1, 10.d0**theta(i), qwk(j,j,i+1), 1)
    }
}
#   (\theta R) FOR THE COMBINED INTERACTION TERM
for (j=1;j<=nobs;j=j+1) {
    call  dset (nobs-j+1, 0.d0, qwk(j,j,5), 1)
    for (i=4;i<=6;i=i+1)
        call  daxpy (nobs-j+1, 10.d0**theta(i), q(j,j,i), 1, qwk(j,j,5), 1)
}
#   FILL THE UPPER TRIANGLES
for (i=1;i<=5;i=i+1) {
    for (j=1;j<=nobs;j=j+1)
        call  dcopy (nobs-j, qwk(j+1,j,i), 1, qwk(j,j+1,i), nobs)
}

#   COLLECTING COVERAGE INFORMATION ON THE DESIGN POINTS
for (i=1;i<=5;i=i+1)
    for (j=1;j<=4;j=j+1)  ct(i,j) = 0
for (j=1;j<=nobs;j=j+1) {
    #   OVERALL ESTIMATE:  POSTERIOR MEAN
    dwk(1) = y(j) - 10.d0**nlaht * c(j)
    #   OVERALL ESTIMATE:  POSTERIOR STANDARD DEVIATION
    call  dsymv ('u', nnull, 1.d0, sms, nnull, s(j,1), nobs, 0.d0, ywk, 1)
    dwk(2) = (qwk(j,j,1) - ddot (nobs, qwk(1,j,1), 1, cr(1,j,1), 1))_
             + ddot (nnull, s(j,1), nobs, ywk, 1)_
             - 2.d0 * ddot (nnull, s(j,1), nobs, dr(1,j,1), 1)
    dwk(2) = dsqrt (b*dwk(2))
    #   OVERALL ESTIMATE:  COVERAGE (NO. OF POINTS OUT)
    if ( dabs (f(j,1)-dwk(1)) > dwk(2)*1.9604d0 )  ct(1,1) = ct(1,1) + 1  # 95%
    if ( dabs (f(j,1)-dwk(1)) > dwk(2)*1.6452d0 )  ct(1,2) = ct(1,2) + 1  # 90%
    if ( dabs (f(j,1)-dwk(1)) > dwk(2)*1.1504d0 )  ct(1,3) = ct(1,3) + 1  # 75%
    if ( dabs (f(j,1)-dwk(1)) > dwk(2)*0.6742d0 )  ct(1,4) = ct(1,4) + 1  # 50%

    for (i=2;i<=5;i=i+1) {
        #   COMPONENTS:  POSTERIOR MEANS
        dwk((i-1)*2+1) = s(j,i) * d(i) + ddot (nobs, qwk(1,j,i), 1, c, 1)
        #   COMPONENTS:  POSTERIOR STANDARD DEVIATIONS
        dwk(i*2) = (qwk(j,j,i) - ddot (nobs, qwk(1,j,i), 1, cr(1,j,i), 1))_
                  + s(j,i) * s(j,i) * sms(i,i) - 2.d0 * s(j,i) * dr(i,j,i)
        dwk(i*2) = dsqrt (b*dwk(i*2))
        #   COMPONENTS:  COVERAGES (NO. OF POINTS OUT)
        if ( dabs (f(j,i)-dwk(i*2-1)) > dwk(i*2)*1.9604d0 )  ct(i,1) = ct(i,1) + 1  # 95%
        if ( dabs (f(j,i)-dwk(i*2-1)) > dwk(i*2)*1.6452d0 )  ct(i,2) = ct(i,2) + 1  # 90%
        if ( dabs (f(j,i)-dwk(i*2-1)) > dwk(i*2)*1.1504d0 )  ct(i,3) = ct(i,3) + 1  # 75%
        if ( dabs (f(j,i)-dwk(i*2-1)) > dwk(i*2)*0.6742d0 )  ct(i,4) = ct(i,4) + 1  # 50%
    }

#   UNBLOCK THE FOLLOWING SEGMENT TO OUTPUT MARGINAL DESIGNS, TEST MAIN EFFECTS, 
#   POSTERIOR MEANS OF MAIN EFFECTS, AND POSTERIOR STANDARD DEVIATIONS OF MAIN EFFECTS
#    write (*,*)  (sngl (x(j,i)),i=1,3),_    #   marginal designs
#                 (sngl (f(j,i)),i=2,4),_    #   test main effects
#                 (sngl (dwk(i*2-1)),i=2,4),_#   posterior means
#                 (sngl (dwk(i*2)),i=2,4)    #   posterior stds
#    write (*,*)

}

#   OUTPUT COVERAGE INFORMATION, VAR ESTIMATE, AND ERROR CHECK (from dmudr)
                                                     # NO. OF UNCOVERED DATA POINTS
for (j=1;j<=4;j=j+1)  write (*,*)  (ct(i,j), i=1,5)  # ROWS:    95%, 90%, 75%, 50%
                                                     # COLUMNS: f, f1, f2, f3, f12
write (*,*)  sngl (dsqrt (varht)), infosv   # SIGMA HAT, info FROM dmudr

}   #   END OF REPLICATION

stop
end


#   TEST MAIN EFFECTS
double precision function  dfm (x, m)
double precision  x
integer           m

switch (m) {
    case 1 :
        dfm = dexp (3.d0 * x) - (dexp (3.d0) - 1.d0) / 3.d0
    case 2 :
        dfm = 1.d6 * (x ** 11 * (1 - x) ** 6) + 1.d4 * (x ** 3 * (1 - x) ** 10) - 6.986477575d0
    default :
        dfm = 0.d0
} 

return
end 


#   TEST INTERACTION
double precision function  dfi (x1, x2, m1, m2)
double precision  x1, x2, pi
integer           m1, m2

pi = 4.d0 * datan (1.d0)
dfi = 0.d0
if ( m1 == 1 & m2 == 2 ) {
    dfi = 5.d0 * dcos (2.d0*pi*(x1-x2))
}

return
end 


#   REPRODUCING KERNEL FOR CUBIC SPLINE ON [0,1]
double precision function  rc (y,x)
double precision  y, x, dk2, dk4

rc = dk2 (y) * dk2 (x) - dk4 (x-y)

return
end


#   AUXILIARY FUNCTION FOR CALCULATING REPRODUCING KERNEL
double precision function  dk2 (x)
double precision  x

x = dabs (x)
dk2 = ( x - .5d0 ) ** 2 
dk2 = ( dk2 - 1.d0 / 12.d0 ) / 2.d0

return
end


#   AUXILIARY FUNCTION FOR CALCULATING REPRODUCING KERNEL
double precision function  dk4 (x)
double precision  x

x = dabs (x)
dk4 = ( x - .5d0 ) ** 2
dk4 = ( dk4 ** 2 - dk4 / 2.d0 + 7.d0 / 240.d0 ) / 24.d0

return
end
SHAR_EOF
cat << \SHAR_EOF > tensor1.r
#   THIS PROGRAM ILLUSTRATES THE USE OF RKPACK ROUTINES IN FITTING A MODEL
#        y = C + f1(x1) + f2(x2) + f3(x3) + f12(x1,x2) + e
#   ON [0,1]^3 USING TENSOR PRODUCT SPLINES WITH CUBIC SPLINE SPACE AS MARGINALS
#   AND WITH INTEGRATION SIDE CONDITIONS.  THE PROGRAM CALCULATES THE FIT AND
#   EVALUATES THE POSTERIOR MEAN AND POSTERIOR STANDARD DEVIATION OF THE x1-x2
#   INTERACTION ON A PRODUCT GRID.

program  tensor1

#   CAUTION:  nobs=200 takes a lot of memory
parameter  ( nobs = 100, nnull = 5, nq = 6, k = 3, nrep = 5, ngrid=41 )

#   PARAMETERS:
#        nobs     number of observations. 
#        nnull    dimension of null space.
#        nq       number of smoothing parameters.
#        k        number of variables.
#        nrep     number of replicates requested.
#        ngrid    number of grid points in each margin of [0,1]^2.

double precision  x(nobs,k), s(nobs,nnull), swk(nobs,nnull), q(nobs,nobs,nq),_
                  qwk(nobs,nobs,nq), y(nobs), ywk(nobs), prec, theta(nq), nlaht,_
                  score, varht, b, c(nobs), d(nnull), dwk(nobs*nobs*(nq+2)),_
                  r(nobs,ngrid*ngrid), xx(ngrid), cr(nobs,ngrid*ngrid),_
                  dr(nnull,ngrid*ngrid), qraux(nnull),_
                  sms(nnull,nnull), limnla(2), tmp, rc, dfm, dfi, ddot,_
                  f(nobs,nnull), nsize
real              uni, rnor
integer           info, i, j, ii, jj, jjj, init, maxiter, jpvt(nnull), dseed, nseed, infosv


#   SET ALGORITHMIC PARAMETERS
init = 0
prec = 1.d-6
maxiter = 30

#   INPUT SIMULATION PARAMETERS
read (*,*) dseed, nseed, nsize    #SEED FOR DESIGN, SEED FOR NOISE, STD OF NOISE
write (*,*) '#nobs', nobs
write (*,*) '#dseed', dseed   # 2375 WAS USED IN THE SIMULATIONS
write (*,*) '#nseed', nseed   # 5732 WAS USED IN THE SIMULATIONS
write (*,*) '#nsize', sngl (nsize)   # 1, 3, AND 10 USED IN SIMULATIONS

#   GENERATE THE DESIGN
tmp = dble (uni (dseed))
for (j=1;j<=nobs;j=j+1) {
    for (i=1;i<=k;i=i+1)  x(j,i) = dble (uni (0))
}

#   GENERATE THE TEST FUNCTION
for (j=1;j<=nobs;j=j+1) {
    f(j,2) = dfm (x(j,1), 1)   # dfm AND dfi ARE APPENDED AT THE END OF THIS PROGRAM
    f(j,3) = dfm (x(j,2), 2)
    f(j,4) = dfm (x(j,3), 3)
    f(j,5) = dfi (x(j,1), x(j,2), 1, 2)
    f(j,1) = 1.d0 + f(j,2) + f(j,3) + f(j,4) + f(j,5)
}

#   GENERATE THE MATRIX S
call  dset (nobs, 1.d0, s(1,1), 1)
for (j=1;j<=nobs;j=j+1) {
    s(j,2) = x(j,1) - .5d0    #                      
    s(j,3) = x(j,2) - .5d0    #   MAIN EFFECTS TERMS
    s(j,4) = x(j,3) - .5d0    #                      
    s(j,5) = s(j,2) * s(j,3)    #   x1-x2 INTERACTION TERM
}

#   GENERATE THE MATRICES $\tilde{\Sigma}_{\beta}$ ($\tilde{Q}_{\beta}$)
for (j=1;j<=nobs;j=j+1) {
    for (i=j;i<=nobs;i=i+1) {
        q(i,j,1) = rc (x(i,1), x(j,1)) #
        q(i,j,2) = rc (x(i,2), x(j,2)) #   MAIN EFFECTS TERMS, rc APPENDED AT THE END
        q(i,j,3) = rc (x(i,3), x(j,3)) #
        q(i,j,4) = q(i,j,1) * s(i,3) * s(j,3) #
        q(i,j,5) = s(i,2) * s(j,2) * q(i,j,2) #    x1-x2 INTERACTION TERMS
        q(i,j,6) = q(i,j,1) * q(i,j,2)        #
    }
}

#   START OF REPLICATION
tmp = dble (rnor (nseed))
for (jjj=1;jjj<=nrep;jjj=jjj+1) {

#   GENERATE THE RESPONSE y
for (j=1;j<=nobs;j=j+1)  y(j) = f(j,1) + dble (rnor (0)) * nsize

#   CALCULATE REPLICATE #1 ONLY
if ( jjj != 1 )  next

#   CALL RKPACK DRIVER FOR FITTING THE MODEL
call  dcopy (nobs*nobs*nq, q, 1, qwk, 1)
call  dcopy (nobs*nnull, s, 1, swk, 1)
call  dcopy (nobs, y, 1, ywk, 1)
call  dmudr ('v',_
             swk, nobs, nobs, nnull, qwk, nobs, nobs, nq, ywk,_
             0.d0, init, prec, maxiter,_
             theta, nlaht, score, varht, c, d,_
             dwk, info)
infosv = info

#   SET MARGINAL GRID
for (i=1;i<=ngrid;i=i+1)  xx(i) = 0.d0 + dfloat (i-1) * 1.d0 / dfloat (ngrid-1)

#   GENERATE (\theta R) FOR c_r AND d_r
for (i=1;i<=nobs;i=i+1) {
    for (j=1;j<=ngrid*ngrid;j=j+1) {
        jj = (j - 1) / ngrid + 1   #   j-TH POINT HAS COORDINATES (xx(ii),xx(jj))
        ii = j - (jj-1) * ngrid    #
        r(i,j) = (10.d0**theta(4) * rc (x(i,1), xx(ii)) * (x(i,2) - .5d0) * (xx(jj) - .5d0)_
                + 10.d0**theta(5) * rc (x(i,2), xx(jj)) * (x(i,1) - .5d0) * (xx(ii) - .5d0)_
                + 10.d0**theta(6) * rc (x(i,1), xx(ii)) * rc (x(i,2), xx(jj)))
    }
}

#   MATRIX DECOMPOSITION FOR CALCULATING c_r, d_r, AND sms
for (j=1;j<=nobs;j=j+1)  call  dset (nobs-j+1, 0.d0, qwk(j,j,1), 1)
for (i=1;i<=nq;i=i+1) {
    for (j=1;j<=nobs;j=j+1)
        call  daxpy (nobs-j+1, 10.d0**theta(i), q(j,j,i), 1, qwk(j,j,1), 1)
}
call  dcopy (nobs*nnull, s, 1, swk, 1)
call  dcopy (nobs, y, 1, ywk, 1)
limnla(1) = nlaht - 1.d0
limnla(2) = nlaht + 1.d0
call  dsidr ('v',_
             swk, nobs, nobs, nnull, ywk, qwk(1,1,1), nobs,_
             0.d0, -1, limnla,
             nlaht, score, varht, c, d,_
             qraux, jpvt, dwk,_
             info)
if ( info != 0 )  stop
#   CALCULATE b
b = varht / 10.d0**nlaht

#   CALCULATE c_r, d_r, AND sms
call  dcrdr (swk, nobs, nobs, nnull, qraux, jpvt, qwk(1,1,1), nobs, nlaht,_
             r, nobs, ngrid*ngrid, cr, nobs, dr, nnull, dwk, info)
call  dsms (swk, nobs, nobs, nnull, jpvt, qwk(1,1,1), nobs, nlaht,_
            sms, nnull, dwk, info)

#   GENERATE (\theta R) FOR ESTIMATE EVALUATIONS
for (i=1;i<=nobs;i=i+1) {
    for (j=1;j<=ngrid*ngrid;j=j+1) {
        jj = (j - 1) / ngrid + 1   #   j-TH POINT HAS COORDINATES (xx(ii),xx(jj))
        ii = j - (jj-1) * ngrid    #
        r(i,j) = (10.d0**theta(4) * rc (x(i,1), xx(ii)) * (x(i,2) - .5d0) * (xx(jj) - .5d0)_
                + 10.d0**theta(5) * rc (x(i,2), xx(jj)) * (x(i,1) - .5d0) * (xx(ii) - .5d0)_
                + 10.d0**theta(6) * rc (x(i,1), xx(ii)) * rc (x(i,2), xx(jj)))
    }
}

#   OUTPUT TEST INTERACTION, POSTERIOR MEAN, AND POSTERIOR STANDARD DEVIATION
write (*,*)  'x1	x2	Truth	Estimate	Posterior std'
for (j=1;j<=ngrid*ngrid;j=j+1) {
    jj = (j - 1) / ngrid + 1   #   j-TH POINT HAS COORDINATES (xx(ii),xx(jj))
    ii = j - (jj-1) * ngrid    #
    #   TEST FUNCTION
    dwk(1) = dfi (xx(ii), xx(jj), 1, 2)
    #   POSTERIOR MEAN
    dwk(2) = d(5) * (xx(ii) - .5d0) * (xx(jj) - .5d0)_
            + ddot (nobs, c, 1, r(1,j), 1)
    #   POSTERIOR STANDARD DEVIATION
    dwk(3) = (10.d0**theta(4) * rc (xx(ii), xx(ii)) * (xx(jj) - .5d0) * (xx(jj) - .5d0)_
            + 10.d0**theta(5) * rc (xx(jj), xx(jj)) * (xx(ii) - .5d0) * (xx(ii) - .5d0)_
            + 10.d0**theta(6) * rc (xx(ii), xx(ii)) * rc (xx(jj), xx(jj)))
    dwk(3) = dwk(3) - ddot (nobs, r(1,j), 1, cr(1,j), 1)_
             + sms(5,5) * ((xx(ii) - .5d0) * (xx(jj) - .5d0)) ** 2_
             - 2.d0 * (xx(ii) - .5d0) * (xx(jj) - .5d0) * dr(5,j)
    dwk(3) = dsqrt (b*dwk(3))
    write (*,*)  sngl (xx(ii)), sngl (xx(jj)), (sngl (dwk(i)), i=1,3)
}

}   #   END OF REPLICATION

stop
end


#   TEST MAIN EFFECTS
double precision function  dfm (x, m)
double precision  x
integer           m

switch (m) {
    case 1 :
        dfm = dexp (3.d0 * x) - (dexp (3.d0) - 1.d0) / 3.d0
    case 2 :
        dfm = 1.d6 * (x ** 11 * (1 - x) ** 6) + 1.d4 * (x ** 3 * (1 - x) ** 10) - 6.986477575d0
    default :
        dfm = 0.d0
} 

return
end 


#   TEST INTERACTION
double precision function  dfi (x1, x2, m1, m2)
double precision  x1, x2, pi
integer           m1, m2

pi = 4.d0 * datan (1.d0)
dfi = 0.d0
if ( m1 == 1 & m2 == 2 ) {
    dfi = 5.d0 * dcos (2.d0*pi*(x1-x2))
}

return
end 


#   REPRODUCING KERNEL FOR CUBIC SPLINE ON [0,1]
double precision function  rc (y,x)
double precision  y, x, dk2, dk4

rc = dk2 (y) * dk2 (x) - dk4 (x-y)

return
end


#   AUXILIARY FUNCTION FOR CALCULATING REPRODUCING KERNELS
double precision function  dk2 (x)
double precision  x

x = dabs (x)
dk2 = ( x - .5d0 ) ** 2 
dk2 = ( dk2 - 1.d0 / 12.d0 ) / 2.d0

return
end


#   AUXILIARY FUNCTION FOR CALCULATING REPRODUCING KERNELS
double precision function  dk4 (x)
double precision  x

x = dabs (x)
dk4 = ( x - .5d0 ) ** 2
dk4 = ( dk4 ** 2 - dk4 / 2.d0 + 7.d0 / 240.d0 ) / 24.d0

return
end
SHAR_EOF
cat << \SHAR_EOF > thin.r
#   THIS PROGRAM ILLUSTRATES THE USE OF RKPACK ROUTINES IN FITTING A MODEL
#        y = f(x) + e
#   ON E^2 USING THIN-PLATE SPLINES.  THE PROGRAM CALCULATES THE FIT BASED ON
#   IRREGULAR DATA AND EVALUATES THE ESTIMATE ON A REGULAR GRID ON [-2,2]^2.
#   THIS PROGRAM USES THE SEMI-KERNEL WHICH IS EASY TO EVALUATE BUT DOES NOT
#   ALLOW THE CALCULATION OF POSTERIOR VARIANCE.

program  thin

parameter  ( nobs = 100, nnull = 3, ngrid = 31 )

#   PARAMETERS:
#        nobs     number of observations. 
#        nnull    dimension of null space.
#        ngrid    number of marginal grid points on each of the axes

double precision  x(nobs,2), s(nobs,nnull), qraux(nnull), q(nobs,nobs), y(nobs),_
                  nlaht, score, varht, c(nobs), d(nnull), wk(3*nobs), limnla(2),_
                  xx(ngrid), tmp, rt, df, nsize
real              rnor
integer           info, i, j, ii, jj, jpvt(nnull), dseed, nseed, infosv


#   INPUT SIMULATION PARAMETERS
read (*,*) dseed, nseed, nsize    #SEED FOR DESIGN, SEED FOR NOISE, STD OF NOISE
write (*,*) 'Number of observations', nobs
write (*,*) 'Number of grid points', ngrid, 'times', ngrid
write (*,*) 'Seed for uniform design', dseed
write (*,*) 'Seed for Gaussian noise', nseed
write (*,*) 'Standard deviation of noise', sngl (nsize)

#   GENERATE THE DESIGN
tmp = dble (rnor (dseed))
for (j=1;j<=nobs;j=j+1) {
     x(j,1) = dble (rnor (0))
     x(j,2) = dble (rnor (0))
}

#   GENERATE THE MATRIX S
call  dset (nobs, 1.d0, s(1,1), 1)
for (j=1;j<=nobs;j=j+1) {
    s(j,2) = x(j,1)
    s(j,3) = x(j,2)
}

#   GENERATE THE MATRIX K
for (j=1;j<=nobs;j=j+1) {           # rt APPENDED AT THE END
    for (i=j;i<=nobs;i=i+1)  q(i,j) = rt (x(i,1), x(i,2), x(j,1), x(j,2))
}

#   GENERATE THE RESPONSE y
tmp = dble (rnor (nseed))
for (j=1;j<=nobs;j=j+1)  y(j) = df (x(j,1), x(j,2)) + dble (rnor (0)) * nsize

#   CALL RKPACK DRIVER FOR MODEL FITTING
call  dsidr ('v', s, nobs, nobs, nnull, y, q, nobs, 0.d0, 0, limnla,_
             nlaht, score, varht, c, d, qraux, jpvt, wk, info)
infosv = info

#   SET GRID
for (j=1;j<=ngrid;j=j+1)  xx(j) = -2.d0 + 4.d0 * dfloat (j-1) / dfloat (ngrid-1)

#   OUTPUT INFO FROM dsidr, N*LAMBDA, AND SIGMA HAT
write (*,*)  'Info from dsidr =', infosv, 'log10(n lambda) =', sngl (nlaht),_
             'Sigma hat =', sngl (sqrt (varht))
#   OUTPUT TEST FUNCTION AND ESTIMATE ON THE GRID
write (*,*)  'x1	x2	Truth	Estimate'
for (j=1;j<=ngrid*ngrid;j=j+1) {
    jj = (j - 1) / ngrid + 1   #   j-TH POINT HAS COORDINATES (xx(ii),xx(jj))
    ii = j - (jj-1) * ngrid    #
    #   TEST FUNCTION
    wk(1) = df (xx(ii), xx(jj))
    #   ESTIMATE
    wk(2) = d(1) + d(2) * xx(ii) + d(3) * xx(jj)
    for (i=1;i<=nobs;i=i+1)  wk(2) = wk(2) + c(i) * rt (x(i,1), x(i,2), xx(ii), xx(jj))
    write (*,*)  sngl (xx(ii)), sngl (xx(jj)), (sngl (wk(i)), i=1,2)
}

stop
end


#   TEST FUNCTION
double precision function  df (x1, x2)
double precision  x1, x2

df = x1 ** 2 + x2 ** 2
df = 2.d1 * dexp (-df)

return
end 


#   SEMI KERNEL FOR THIN PLATE SPLINE ON E^2 WITH m=2
double precision function  rt (x1, x2, y1, y2)
double precision  x1, x2, y1, y2

rt = (x1 - y1) ** 2 + (x2 - y2) ** 2
if ( rt > 0.d0 )  rt = rt * dlog (rt)

return
end
SHAR_EOF
cat << \SHAR_EOF > Makefile
FLAGS = -O
LIBS = ../rkpk/rkpk.a ../lib/lib.a

cubic : cubic.o
	f77 $(FLAGS) -o cubic cubic.o $(LIBS)

thin : thin.o
	f77 $(FLAGS) -o thin thin.o $(LIBS)

tensor : tensor.o
	f77 $(FLAGS) -o tensor tensor.o $(LIBS)

tensor1 : tensor1.o
	f77 $(FLAGS) -o tensor1 tensor1.o $(LIBS)

tptp : tptp.o
	f77 $(FLAGS) -o tptp tptp.o $(LIBS)

tptp1 : tptp1.o
	f77 $(FLAGS) -o tptp1 tptp1.o $(LIBS)

.SUFFIXES: .r .o

.r.o:
	f77 -c $(FLAGS) $*.r
SHAR_EOF
cat << \SHAR_EOF > input
2375
5732
3
SHAR_EOF
cat << \SHAR_EOF > cubic.r
#   THIS PROGRAM ILLUSTRATES THE USE OF RKPACK ROUTINES IN FITTING A MODEL
#        y = f(x) + e
#   ON [0,1] USING CUBIC SPLINES.  THE PROGRAM CALCULATES THE FIT BASED ON
#   IRREGULAR DATA AND CALCULATES POSTERIOR MEAN AND POSTERIOR STANDARD
#   DEVIATION ON A REGULAR GRID AS OUTPUT.

program  cubic

parameter  ( nobs = 100, nnull = 2, ngrid = 101 )

#   PARAMETERS:
#        nobs     number of observations. 
#        nnull    dimension of null space.
#        ngrid    number of grid points for output

double precision  x(nobs), s(nobs,nnull), qraux(nnull), q(nobs,nobs), y(nobs),_
                  nlaht, score, varht, b, c(nobs), d(nnull), wk(3*nobs), limnla(2),_
                  xx(ngrid), r(nobs,ngrid), cr(nobs,ngrid), dr(nnull,ngrid),_
                  sms(nnull,nnull), tmp, rc, df, ddot, nsize
real              uni, rnor
integer           info, i, j, jpvt(nnull), dseed, nseed, infosv


#   INPUT SIMULATION PARAMETERS
read (*,*) dseed, nseed, nsize    #SEED FOR DESIGN, SEED FOR NOISE, STD OF NOISE
write (*,*) 'Number of observations', nobs
write (*,*) 'Number of grid points', ngrid
write (*,*) 'Seed for uniform design', dseed
write (*,*) 'Seed for Gaussian noise', nseed
write (*,*) 'Standard deviation of noise', sngl (nsize)

#   GENERATE THE DESIGN
tmp = dble (uni (dseed))
for (j=1;j<=nobs;j=j+1)  x(j) = dble (uni (0))

#   GENERATE THE MATRIX S
call  dset (nobs, 1.d0, s(1,1), 1)
for (j=1;j<=nobs;j=j+1)  s(j,2) = x(j) - .5d0

#   GENERATE THE MATRIX Q
for (j=1;j<=nobs;j=j+1) {
    for (i=j;i<=nobs;i=i+1)  q(i,j) = rc (x(i), x(j))   # rc IS APPENDED AT THE END
}

#   GENERATE THE RESPONSE y
tmp = dble (rnor (nseed))
for (j=1;j<=nobs;j=j+1)  y(j) = df (x(j)) + dble (rnor (0)) * nsize   # df APPENDED AT THE END

#   CALL RKPACK DRIVER FOR MODEL FITTING
call  dsidr ('v', s, nobs, nobs, nnull, y, q, nobs, 0.d0, 0, limnla,_
             nlaht, score, varht, c, d, qraux, jpvt, wk, info)
infosv = info

#   CALCULATE b
b = varht / 10.d0**nlaht

#   SET GRID
for (j=1;j<=ngrid;j=j+1)  xx(j) = 0.d0 + dfloat (j-1) / dfloat (ngrid-1)

#   GENERATE R FOR CALCULATING c_r AND d_r
for (i=1;i<=nobs;i=i+1) {
    for (j=1;j<=ngrid;j=j+1)  r(i,j) = rc (x(i), xx(j))
}

#   CALCULATE c_r, d_r, AND sms
call  dcrdr (s, nobs, nobs, nnull, qraux, jpvt, q, nobs, nlaht,_
             r, nobs, ngrid, cr, nobs, dr, nnull, wk, info)
call  dsms (s, nobs, nobs, nnull, jpvt, q, nobs, nlaht,_
            sms, nnull, wk, info)

#   GENERATE R FOR ESTIMATE EVALUATION
for (i=1;i<=nobs;i=i+1) {
    for (j=1;j<=ngrid;j=j+1)  r(i,j) = rc (x(i), xx(j))
}

#   OUTPUT VAR ESTIMATE AND INFO FROM dsidr
write (*,*)  'Info from dsidr =', infosv, 'log10(n lambda) =', sngl (nlaht),_
             'Sigma hat =', sngl (sqrt (varht))
#   OUTPUT TEST FUNCTION, POSTERIOR MEAN, AND POSTERIOR STANDARD DEVIATION ON GRID
write (*,*)  'Grid	Truth	Estimate	Posterior std'
for (j=1;j<=ngrid;j=j+1) {
    #   TEST FUNCTION
    wk(1) = df (xx(j))
    #   POSTERIOR MEAN
    wk(2) = d(1) + d(2) * (xx(j) - .5d0) + ddot (nobs, r(1,j), 1, c, 1)
    #   POSTERIOR STANDARD DEVIATION
    wk(3) = sms(1,1) + 2.d0 * sms(2,1) * (xx(j) - .5d0) + sms(2,2) * (xx(j) - .5d0)**2_
           + rc (xx(j), xx(j)) - ddot (nobs, r(1,j), 1, cr(1,j), 1)_
           - 2.d0 * dr(1,j) - 2.d0 * (xx(j) - .5d0) * dr(2,j)
    wk(3) = dsqrt (b*wk(3))
    write (*,*)  sngl (xx(j)), (sngl (wk(i)), i=1,3)
}

stop
end


#   TEST FUNCTION
double precision function  df (x)
double precision  x

df = 1.d6 * (x ** 11 * (1 - x) ** 6) + 1.d4 * (x ** 3 * (1 - x) ** 10)

return
end 


#   REPRODUCING KERNEL FOR CUBIC SPLINE ON [0,1]
double precision function  rc (y,x)
double precision  y, x, dk2, dk4

rc = dk2 (y) * dk2 (x) - dk4 (x-y)

return
end


#   AUXILIARY FUNCTION FOR CALCULATING REPRODUCING KERNEL
double precision function  dk2 (x)
double precision  x

x = dabs (x)
dk2 = ( x - .5d0 ) ** 2 
dk2 = ( dk2 - 1.d0 / 12.d0 ) / 2.d0

return
end


#   AUXILIARY FUNCTION FOR CALCULATING REPRODUCING KERNEL
double precision function  dk4 (x)
double precision  x

x = dabs (x)
dk4 = ( x - .5d0 ) ** 2
dk4 = ( dk4 ** 2 - dk4 / 2.d0 + 7.d0 / 240.d0 ) / 24.d0

return
end
SHAR_EOF
cat << \SHAR_EOF > tptp.r
#   THIS PROGRAM ILLUSTRATES THE USE OF RKPACK ROUTINES IN FITTING A MODEL
#        y = C + f1(x1) + f2(x2,x3) + f12(x1,x2,x3) + e
#   ON E^1 x E^2 USING TENSOR-PRODUCT THIN-PLATE SPLINES WITH AVERAGING OPERATOR
#   THE SUMMATION OVER (MARGINAL) DESIGN POINTS.  THE PROGRAM CALCULATES THE FIT
#   AND THE COMPONENT-WISE BAYESIAN CONFIDENCE INTERVALS ON THE DESIGN POINTS,
#   AND COLLECTS COVERAGE PERCENTAGES FOR INTERVALS OF NOMINAL COVERAGES 95%, 90%,
#   75%, AND 50%.  THE SIMULATION RESULTS IN SECTION 7 OF GU AND WAHBA (1992,
#   UW-TR-881-REV) WERE GENERATED USING THIS PROGRAM WITH INTERACTION REMOVED.

program  tptp

parameter  ( nobs = 112, nnull = 6, nq = 5, nrep = 5 )

#   PARAMETERS:
#        nobs     number of observations. 
#        nnull    dimension of null space.
#        nq       number of smoothing parameters.
#        nrep     number of replications.

double precision  x(nobs,3), s1(nobs,2), swk1(nobs,2), qraux1(2), s2(nobs,3),_
                  swk2(nobs,3), qraux2(3), s(nobs,nnull), swk(nobs,nnull),_
                  qraux(nnull), q(nobs,nobs,nq), qwk(nobs,nobs,nq), y(nobs),_
                  ywk(nobs), prec, theta(nq), nlaht, score, varht, c(nobs),_
                  d(nnull), dwk(nobs*nobs*(nq+2)), limnla(2), cr(nobs,nobs,4),_
                  dr(nnull,nobs,4), sms(nnull,nnull), f(nobs,4), nsize, ddot, b, dum
real              rnor
integer           info, i, j, jjj, init, maxiter, jpvt(nnull), infosv, nnull1, nq1,_
                  ct(4,4)


#   SET ALGORITHMIC PARAMETERS
init = 0
prec = 1.d-6
maxiter = 15
nnull1 = nnull
nq1 = nq

#   UNBLOCK THE FOLLOWING SEGMENT FOR MAIN-EFFECT-ONLY MODEL
#nnull1 = 4
#nq1 = 2

#   INPUT DATA
read (*,*)  nsize           # SIZE OF THE NOISE
nsize = dsqrt (nsize)
for (j=1;j<=nobs;j=j+1) {
    read (*,*)  x(j,1), x(j,2), x(j,3), y(j), f(j,1), f(j,2), f(j,3), f(j,4)
}   #   f CONTAINS OVERALL f, MAIN EFFECTS f1, f2, AND INTERACTION f12

#   GENERATE THE MARGINAL SEMI-KERNEL K's
for (j=1;j<=nobs;j=j+1) {
    for (i=j;i<=nobs;i=i+1) {
        q(i,j,1) = dabs (x(i,1)-x(j,1)) ** 3                          # x1 SPACE
        q(i,j,2) = (x(i,2)-x(j,2)) ** 2 + (x(i,3)-x(j,3)) ** 2          # (x2,x3) SPACE
        if ( q(i,j,2) > 0.d0 )  q(i,j,2) = q(i,j,2) * dlog (q(i,j,2))   #
    }
}

#   GENERATE MARGINAL S and Q
#   x1 SPACE
call  dset (nobs, 1.d0, swk1(1,1), 1)                     #                         /R\
call  dcopy (nobs, x(1,1), 1, swk1(1,2), 1)               # QR-DECOM  S~ = (F1 F2) |   |
call  dqrdc (swk1, nobs, nobs, 2, qraux1, jpvt, dwk, 0)   #                         \O/ 
call  dset (nobs*2, 0.d0, s1, 1)                            #
call  dset (2, 1.d0, s1, nobs+1)                            #
for (i=1;i<=2;i=i+1) {                                      # S = F1
    call  dqrsl (swk1, nobs, nobs, 2, qraux1, s1(1,i),_     #
                 s1(1,i), dum, dum, dum, dum, 10000, info)  #
}
call  dqrslm (swk1, nobs, nobs, 2, qraux1, q(1,1,1), nobs, 0, info, dwk)  #
call  dset (nobs*2, 0.d0, q(1,1,1), 1)                                    # Q = F2F2'KF2F2'
call  dqrslm (swk1, nobs, nobs, 2, qraux1, q(1,1,1), nobs, 1, info, dwk)  #
#   (x2,x3) SPACE
call  dset (nobs, 1.d0, swk2(1,1), 1)                     #                         /R\ 
call  dcopy (nobs*2, x(1,2), 1, swk2(1,2), 1)             # QR-DECOM  S~ = (F1 F2) |   |
call  dqrdc (swk2, nobs, nobs, 3, qraux2, jpvt, dwk, 0)   #                         \O/ 
call  dset (nobs*3, 0.d0, s2, 1)                            #
call  dset (3, 1.d0, s2, nobs+1)                            #
for (i=1;i<=3;i=i+1) {                                      # S = F1
    call  dqrsl (swk2, nobs, nobs, 3, qraux2, s2(1,i),_     #
                 s2(1,i), dum, dum, dum, dum, 10000, info)  #
}
call  dqrslm (swk2, nobs, nobs, 3, qraux2, q(1,1,2), nobs, 0, info, dwk)  #                
call  dset (nobs*3, 0.d0, q(1,1,2), 1)					  # Q = F2F2'KF2F2'
call  dqrslm (swk2, nobs, nobs, 3, qraux2, q(1,1,2), nobs, 1, info, dwk)  #

#   GENERATE THE MATRIX S
for (j=1;j<=nobs;j=j+1) {
    s(j,1) = 1.d0                # CONSTANT TERM
    s(j,2) = s1(j,2)               # x1 MAIN EFFECT TERM
    s(j,3) = s2(j,2)                 # (x2,x3) MAIN EFFECT TERMS
    s(j,4) = s2(j,3)                 #
    s(j,5) = s1(j,2) * s2(j,2)         # INTERACTION TERMS
    s(j,6) = s1(j,2) * s2(j,3)         #
}

#   GENERATE THE MATRICES \tilde{Q}_{\beta}'s
for (j=1;j<=nobs;j=j+1) {
    for (i=j;i<=nobs;i=i+1) {
        q(i,j,5) = q(i,j,1) * q(i,j,2)                            #
        q(i,j,3) = q(i,j,1) * (s2(j,2)*s2(i,2)+s2(j,3)*s2(i,3))   # INTERACTION TERMS
        q(i,j,4) = q(i,j,2) * s1(j,2) * s1(i,2)                   #
        q(i,j,1) = q(i,j,1) * s2(j,1) * s2(i,1)                 # x1 MAIN EFFECT TERM
        q(i,j,2) = q(i,j,2) * s1(j,1) * s1(i,1)               # (x2,x3) MAIN EFFECT TERM
    }
}

#   START OF REPLICATION
dum = dble (rnor (5732))
for (jjj=1;jjj<=nrep;jjj=jjj+1) {

#   GENERATE DATA
for (j=1;j<=nobs;j=j+1)  y(j) = f(j,1) + nsize * dble (rnor (0))

#   UNBLOCK NEXT LINE IF ONLY REPLICATE #1 IS OF INTEREST
#if ( jjj != 1 )  next

#   CALL RKPACK DRIVER FOR MODEL FITTING
call  dcopy (nobs*nobs*nq, q, 1, qwk, 1)
call  dcopy (nobs*nnull, s, 1, swk, 1)
call  dcopy (nobs, y, 1, ywk, 1)
call  dmudr ('v',_
             swk, nobs, nobs, nnull1, qwk, nobs, nobs, nq1, ywk,_
             0.d0, init, prec, maxiter,_
             theta, nlaht, score, varht, c, d,_
             dwk, info)
infosv = info

#   GENERATE (\theta R)'s IN qwk FOR CALCULATING c_r AND d_r
for (j=1;j<=nobs;j=j+1)  call  dset (nobs-j+1, 0.d0, qwk(j,j,1), 1)
#   (\theta R) FOR OVERALL FUNCTION
for (i=1;i<=nq1;i=i+1) {
    for (j=1;j<=nobs;j=j+1)
        call  daxpy (nobs-j+1, 10.d0**theta(i), q(j,j,i), 1, qwk(j,j,1), 1)
}
#   (\theta R)'s FOR THE MAIN EFFECTS
for (i=1;i<=2;i=i+1) {
    for (j=1;j<=nobs;j=j+1) {
        call  dcopy (nobs-j+1, q(j,j,i), 1, qwk(j,j,i+1), 1)
        call  dscal (nobs-j+1, 10.d0**theta(i), qwk(j,j,i+1), 1)
    }
}
#   (\theta R) FOR THE INTERACTION
for (j=1;j<=nobs;j=j+1) {
    call  dset (nobs-j+1, 0.d0, qwk(j,j,4), 1)
    for (i=3;i<=5;i=i+1)
        call  daxpy (nobs-j+1, 10.d0**theta(i), q(j,j,i), 1, qwk(j,j,4), 1)
}
#   FILL THE UPPER TRIANGLES
for (i=1;i<=4;i=i+1) {
    for (j=1;j<=nobs;j=j+1)
        call  dcopy (nobs-j, qwk(j+1,j,i), 1, qwk(j,j+1,i), nobs)
}

#   MATRIX DECOMPOSITION FOR CALCULATING c_r, d_r, AND sms
for (j=1;j<=nobs;j=j+1)  call  dcopy (nobs-j+1, qwk(j,j,1), 1, qwk(j,j,5), 1)
call  dcopy (nobs*nnull, s, 1, swk, 1)
call  dcopy (nobs, y, 1, ywk, 1)
limnla(1) = nlaht - 1.d0
limnla(2) = nlaht + 1.d0
call  dsidr ('v',_
             swk, nobs, nobs, nnull1, ywk, qwk(1,1,5), nobs,_
             0.d0, -1, limnla,_
             nlaht, score, varht, c, d,_
             qraux, jpvt, dwk,_
             info)
if ( info != 0 )  stop
#   CALCULATE b
b = varht / 10.d0**nlaht

#   CALCULATE c_r, d_r, AND sms
for (i=1;i<=4;i=i+1) {
    call  dcrdr (swk, nobs, nobs, nnull1, qraux, jpvt, qwk(1,1,5), nobs, nlaht,_
                 qwk(1,1,i), nobs, nobs, cr(1,1,i), nobs, dr(1,1,i), nnull,_
                 dwk, info)
}
call  dsms (swk, nobs, nobs, nnull1, jpvt, qwk(1,1,5), nobs, nlaht,_
            sms, nnull, dwk, info)

#   GENERATE (\theta R)'s IN qwk FOR ESTIMATE EVALUATIONS
for (j=1;j<=nobs;j=j+1)  call  dset (nobs-j+1, 0.d0, qwk(j,j,1), 1)
#   (\theta R) FOR OVERALL FUNCTION
for (i=1;i<=nq1;i=i+1) {
    for (j=1;j<=nobs;j=j+1)
        call  daxpy (nobs-j+1, 10.d0**theta(i), q(j,j,i), 1, qwk(j,j,1), 1)
}
#   (\theta R)'s FOR THE MAIN EFFECTS
for (i=1;i<=2;i=i+1) {
    for (j=1;j<=nobs;j=j+1) {
        call  dcopy (nobs-j+1, q(j,j,i), 1, qwk(j,j,i+1), 1)
        call  dscal (nobs-j+1, 10.d0**theta(i), qwk(j,j,i+1), 1)
    }
}
#   (\theta R) FOR THE INTERACTION
for (j=1;j<=nobs;j=j+1) {
    call  dset (nobs-j+1, 0.d0, qwk(j,j,4), 1)
    for (i=3;i<=5;i=i+1)
        call  daxpy (nobs-j+1, 10.d0**theta(i), q(j,j,i), 1, qwk(j,j,4), 1)
}
#   FILL THE UPPER TRIANGLES
for (i=1;i<=4;i=i+1) {
    for (j=1;j<=nobs;j=j+1)
        call  dcopy (nobs-j, qwk(j+1,j,i), 1, qwk(j,j+1,i), nobs)
}

#   COLLECTING COVERAGE INFORMATION ON THE DESIGN POINTS
for (i=1;i<=4;i=i+1)
    for (j=1;j<=4;j=j+1)  ct(i,j) = 0
for (j=1;j<=nobs;j=j+1) {
    #   OVERALL ESTIMATE:  POSTERIOR MEAN
    dwk(1) = y(j) - 10.d0**nlaht * c(j)
    #   OVERALL ESTIMATE:  POSTERIOR STANDARD DEVIATION
    call  dsymv ('u', nnull1, 1.d0, sms, nnull, s(j,1), nobs, 0.d0, ywk, 1)
    dwk(2) = (qwk(j,j,1) - ddot (nobs, qwk(1,j,1), 1, cr(1,j,1), 1))_
            + ddot (nnull1, s(j,1), nobs, ywk, 1)_
            - 2.d0 * ddot (nnull1, s(j,1), nobs, dr(1,j,1), 1)
    dwk(2) = dsqrt (b*dwk(2))
    #   OVERALL ESTIMATE:  COVERAGE (NO. OF POINTS OUT)
    if ( dabs (f(j,1)-dwk(1)) > dwk(2)*1.9604d0 )  ct(1,1) = ct(1,1) + 1  # 95%
    if ( dabs (f(j,1)-dwk(1)) > dwk(2)*1.6452d0 )  ct(1,2) = ct(1,2) + 1  # 90%
    if ( dabs (f(j,1)-dwk(1)) > dwk(2)*1.1504d0 )  ct(1,3) = ct(1,3) + 1  # 75%
    if ( dabs (f(j,1)-dwk(1)) > dwk(2)*0.6742d0 )  ct(1,4) = ct(1,4) + 1  # 50%

    #   x1 MAIN EFFECT:  POSTERIOR MEAN
    dwk(3) = s(j,2) * d(2) + ddot (nobs, qwk(1,j,2), 1, c, 1)
    #   x1 MAIN EFFECT:  POSTERIOR STANDARD DEVIATION
    dwk(4) = (qwk(j,j,2) - ddot (nobs, qwk(1,j,2), 1, cr(1,j,2), 1))_
            + s(j,2) * s(j,2) * sms(2,2) - 2.d0 * s(j,2) * dr(2,j,2)
    dwk(4) = dsqrt (b*dwk(4))
    #   x1 MAIN EFFECT:  COVERAGE (NO. OF POINTS OUT)
    if ( dabs (f(j,2)-dwk(3)) > dwk(4)*1.9604d0 )  ct(2,1) = ct(2,1) + 1  # 95%
    if ( dabs (f(j,2)-dwk(3)) > dwk(4)*1.6452d0 )  ct(2,2) = ct(2,2) + 1  # 90%
    if ( dabs (f(j,2)-dwk(3)) > dwk(4)*1.1504d0 )  ct(2,3) = ct(2,3) + 1  # 75%
    if ( dabs (f(j,2)-dwk(3)) > dwk(4)*0.6742d0 )  ct(2,4) = ct(2,4) + 1  # 50%

    #   (x2,x3) MAIN EFFECT:  POSTERIOR MEAN
    dwk(5) = s(j,3) * d(3) + s(j,4) * d(4) + ddot (nobs, qwk(1,j,3), 1, c, 1)
    #   (x2,x3) MAIN EFFECT:  POSTERIOR STANDARD DEVIATION
    dwk(6) = (qwk(j,j,3) - ddot (nobs, qwk(1,j,3), 1, cr(1,j,3), 1))_
            + (s(j,3) * s(j,3) * sms(3,3) + s(j,4) * s(j,4) * sms(4,4)_
               + 2.d0 * s(j,3) * s(j,4) * sms(3,4))_
            - 2.d0 * (s(j,3) * dr(3,j,3) + s(j,4) * dr(4,j,3))
    dwk(6) = dsqrt (b*dwk(6))
    #   (x2,x3) MAIN EFFECT:  COVERAGE (NO. OF POINTS OUT)
    if ( dabs (f(j,3)-dwk(5)) > dwk(6)*1.9604d0 )  ct(3,1) = ct(3,1) + 1  # 95%
    if ( dabs (f(j,3)-dwk(5)) > dwk(6)*1.6452d0 )  ct(3,2) = ct(3,2) + 1  # 90%
    if ( dabs (f(j,3)-dwk(5)) > dwk(6)*1.1504d0 )  ct(3,3) = ct(3,3) + 1  # 75%
    if ( dabs (f(j,3)-dwk(5)) > dwk(6)*0.6742d0 )  ct(3,4) = ct(3,4) + 1  # 50%

    #   INTERACTION:  POSTERIOR MEAN
    dwk(7) = s(j,5) * d(5) + s(j,6) * d(6) + ddot (nobs, qwk(1,j,4), 1, c, 1)
    #   INTERACTION:  POSTERIOR STANDARD DEVIATION
    dwk(8) = (qwk(j,j,4) - ddot (nobs, qwk(1,j,4), 1, cr(1,j,4), 1))_
            + (s(j,5) * s(j,5) * sms(5,5) + s(j,6) * s(j,6) * sms(6,6)_
               + 2.d0 * s(j,5) * s(j,6) * sms(5,6))_
            - 2.d0 * (s(j,5) * dr(5,j,4) + s(j,6) * dr(6,j,4))
    dwk(8) = dsqrt (b*dwk(8))
    #   INTERACTION:  COVERAGE (NO. OF POINTS OUT)
    if ( dabs (f(j,4)-dwk(7)) > dwk(8)*1.9604d0 )  ct(4,1) = ct(4,1) + 1  # 95%
    if ( dabs (f(j,4)-dwk(7)) > dwk(8)*1.6452d0 )  ct(4,2) = ct(4,2) + 1  # 90%
    if ( dabs (f(j,4)-dwk(7)) > dwk(8)*1.1504d0 )  ct(4,3) = ct(4,3) + 1  # 75%
    if ( dabs (f(j,4)-dwk(7)) > dwk(8)*0.6742d0 )  ct(4,4) = ct(4,4) + 1  # 50%

#   UNBLOCK THE FOLLOWING SEGMENT TO OUTPUT MARGINAL DESIGNS, RESPONSE y, 
#   POSTERIOR MEANS, AND POSTERIOR STANDARD DEVIATIONS
#    write (*,*)  (sngl (x(j,i)),i=1,3),_    #   marginal designs
#                 sngl (y(j)),_              #   response
#                 (sngl (dwk(i*2-1)),i=1,4),_#   posterior means
#                 (sngl (dwk(i*2)),i=1,4)    #   posterior stds
#    write (*,*)

}

#   OUTPUT COVERAGE INFORMATION, VAR ESTIMATE, AND ERROR CHECK (from dmudr)
                                                     # NO. OF UNCOVERED DATA POINTS
for (j=1;j<=4;j=j+1)  write (*,*)  (ct(i,j), i=1,4)  # ROWS:    95%, 90%, 75%, 50%
                                                     # COLUMNS: f, f1, f2, f12
write (*,*)  sngl (varht), infosv   # SIGMA HAT, info FROM dmudr

}   #   END OF REPLICATION

stop
end
SHAR_EOF
cat << \SHAR_EOF > README

This directory collects six simulation programs illustrating the usage
of RKPACK drivers DSIDR, DMUDR, and utility routines DCRDR and DSMS.
The programs are all briefly commented.

Please note that these programs are intended as sample programs but
NOT black boxes with which one may trade data for smooth functions.
The RK calculation in tptp.r and tptp1.r for tensor product thin plate
splines is technically involved.

To compile the programs under standard UNIX system, simply type `make
<name>', where <name> is to be replaced by cubic, thin, tensor,
tensor1, tptp, or tptp1.  You will need ../lib/lib.a and
../rkpk/rkpk.a in the compilation.  Also included are a sample input
file `in.tptp' which feeds tptp and tptp1 and a sample input file
`input' which feeds the other four.

Chong Gu
April 18, 1992
SHAR_EOF
cat << \SHAR_EOF > tptp1.r
#   THIS PROGRAM ILLUSTRATES THE USE OF RKPACK ROUTINES IN FITTING A MODEL
#        y = C + f1(x1) + f2(x2,x3) + f12(x1,x2,x3) + e
#   ON E^1 x E^2 USING TENSOR-PRODUCT THIN-PLATE SPLINES WITH AVERAGING OPERATOR
#   THE SUMMATION OVER (MARGINAL) DESIGN POINTS.  THE PROGRAM CALCULATES THE FIT
#   BASED ON IRREGULAR DATA AND EVALUATES THE ESTIMATE AND THE COMPONENT-WISE
#   BAYESIAN CONFIDENCE INTERVALS OF THE (x2,x3) MAIN EFFECT ON REGULAR GRIDS.
#   THE PLOTS IN SECTION 7 OF GU AND WAHBA (1992, UW-TR-881-REV) WERE BASED ON
#   DATA GENERATED USING THIS PROGRAM WITH INTERACTION REMOVED.

program  tptp1

parameter  ( nobs = 112, nnull = 6, nq = 5, ngrid = 41 )

#   PARAMETERS:
#        nobs     number of observations. 
#        nnull    dimension of null space.
#        nq       number of smoothing parameters.
#        ngrid    number of grid points on each margin of (x2,x3) plane.

double precision  x(nobs,3), s1(nobs,2), swk1(nobs,2), qraux1(2), s2(nobs,3),_
                  swk2(nobs,3), qraux2(3), s(nobs,nnull), swk(nobs,nnull),_
                  qraux(nnull), q(nobs,nobs,nq), qwk(nobs,nobs,nq), y(nobs),_
                  ywk(nobs), prec, theta(nq), nlaht, score, varht, c(nobs),_
                  d(nnull), dwk(nobs*nobs*(nq+2)), limnla(2), xx(ngrid),_
                  r(nobs,ngrid*ngrid), cr(nobs,ngrid*ngrid), dr(nnull,ngrid*ngrid),_
                  sms(nnull,nnull), ddot, b, dum
integer           info, i, j, ii, jj, init, maxiter, jpvt(nnull), infosv, nnull1, nq1


#   SET ALGORITHMIC PARAMETERS
init = 0
prec = 1.d-6
maxiter = 15
nnull1 = nnull
nq1 = nq

#   UNBLOCK THE FOLLOWING SEGMENT FOR MAIN-EFFECT-ONLY MODEL
#nnull1 = 4
#nq1 = 2

#   INPUT DATA
read (*,*)  dum
for (j=1;j<=nobs;j=j+1) {
    read (*,*)  x(j,1), x(j,2), x(j,3), y(j), dum, dum, dum, dum
}

#   GENERATE THE MARGINAL SEMI-KERNEL K's
for (j=1;j<=nobs;j=j+1) {
    for (i=j;i<=nobs;i=i+1) {
        q(i,j,1) = dabs (x(i,1)-x(j,1)) ** 3                          # x1 SPACE
        q(i,j,2) = (x(i,2)-x(j,2)) ** 2 + (x(i,3)-x(j,3)) ** 2          # (x2,x3) SPACE
        if ( q(i,j,2) > 0.d0 )  q(i,j,2) = q(i,j,2) * dlog (q(i,j,2))   #
    }
}

#   GENERATE MARGINAL S and Q
#   x1 SPACE
call  dset (nobs, 1.d0, swk1(1,1), 1)                     #                         /R\ 
call  dcopy (nobs, x(1,1), 1, swk1(1,2), 1)               # QR-DECOM  S~ = (F1 F2) |   |
call  dqrdc (swk1, nobs, nobs, 2, qraux1, jpvt, dwk, 0)   #                         \O/ 
call  dset (nobs*2, 0.d0, s1, 1)                            #
call  dset (2, 1.d0, s1, nobs+1)                            #
for (i=1;i<=2;i=i+1) {                                      # S = F1
    call  dqrsl (swk1, nobs, nobs, 2, qraux1, s1(1,i),_     #
                 s1(1,i), dum, dum, dum, dum, 10000, info)  #
}
call  dqrslm (swk1, nobs, nobs, 2, qraux1, q(1,1,1), nobs, 0, info, dwk)  #
call  dset (nobs*2, 0.d0, q(1,1,1), 1)                                    # Q = F2F2'KF2F2'
call  dqrslm (swk1, nobs, nobs, 2, qraux1, q(1,1,1), nobs, 1, info, dwk)  #
#   (x2,x3) SPACE
call  dset (nobs, 1.d0, swk2(1,1), 1)                     #                         /R\ 
call  dcopy (nobs*2, x(1,2), 1, swk2(1,2), 1)             # QR-DECOM  S~ = (F1 F2) |   |
call  dqrdc (swk2, nobs, nobs, 3, qraux2, jpvt, dwk, 0)   #                         \O/ 
call  dset (nobs*3, 0.d0, s2, 1)                            #
call  dset (3, 1.d0, s2, nobs+1)                            #
for (i=1;i<=3;i=i+1) {                                      # S = F1
    call  dqrsl (swk2, nobs, nobs, 3, qraux2, s2(1,i),_     #
                 s2(1,i), dum, dum, dum, dum, 10000, info)  #
}
call  dqrslm (swk2, nobs, nobs, 3, qraux2, q(1,1,2), nobs, 0, info, dwk)  #                
call  dset (nobs*3, 0.d0, q(1,1,2), 1)					  # Q = F2F2'KF2F2'
call  dqrslm (swk2, nobs, nobs, 3, qraux2, q(1,1,2), nobs, 1, info, dwk)  #

#   GENERATE THE MATRIX S
for (j=1;j<=nobs;j=j+1) {
    s(j,1) = 1.d0                  # CONSTANT TERM
    s(j,2) = s1(j,2)                 # x1 MAIN EFFECT TERM
    s(j,3) = s2(j,2)                 # (x2,x3) MAIN EFFECT TERMS
    s(j,4) = s2(j,3)                 #
    s(j,5) = s1(j,2) * s2(j,2)         # INTERACTION TERMS
    s(j,6) = s1(j,2) * s2(j,3)         #
}

#   GENERATE THE MATRICES \tilde{Q}_{\beta}'s
for (j=1;j<=nobs;j=j+1) {
    for (i=j;i<=nobs;i=i+1) {
        q(i,j,5) = q(i,j,1) * q(i,j,2)                            #
        q(i,j,3) = q(i,j,1) * (s2(j,2)*s2(i,2)+s2(j,3)*s2(i,3))   # INTERACTION TERMS
        q(i,j,4) = q(i,j,2) * s1(j,2) * s1(i,2)                   #
        q(i,j,1) = q(i,j,1) * s2(j,1) * s2(i,1)                 # x1 MAIN EFFECT TERM
        q(i,j,2) = q(i,j,2) * s1(j,1) * s1(i,1)               # (x2,x3) MAIN EFFECT TERM
    }
}

#   CALL RKPACK DRIVER FOR MODEL FITTING
call  dcopy (nobs*nobs*nq, q, 1, qwk, 1)
call  dcopy (nobs*nnull, s, 1, swk, 1)
call  dcopy (nobs, y, 1, ywk, 1)
call  dmudr ('v',_
             swk, nobs, nobs, nnull1, qwk, nobs, nobs, nq1, ywk,_
             0.d0, init, prec, maxiter,_
             theta, nlaht, score, varht, c, d,_
             dwk, info)
infosv = info

#   SET MARGINAL GRID
for (i=1;i<=ngrid;i=i+1)  xx(i) = -.04d0 + dfloat (i-1) * .08d0 / dfloat (ngrid-1)

#   K S~ R^{-1} R^{-T} = KSR^{-T}
for (j=1;j<=nobs;j=j+1) {
    for (i=j;i<=nobs;i=i+1) {                                                 #
        qwk(i,j,2) = (x(i,2)-x(j,2)) ** 2 + (x(i,3)-x(j,3)) ** 2              # K IN qwk(,,2)
        if ( qwk(i,j,2) > 0.d0 )  qwk(i,j,2) = qwk(i,j,2) * dlog (qwk(i,j,2)) #
    }
}
for (i=1;i<=3;i=i+1) {                                #
    call  dsymv ('l', nobs, 1.d0, qwk(1,1,2), nobs,_  #
                 s2(1,i), 1, 0.d0, qwk(1,i,3), 1)     # 
}                                                     # KSR^{-T} in qwk(,1:3,3)
for (j=1;j<=nobs;j=j+1) {                             #
    call  dcopy (3, qwk(j,1,3), nobs, dwk, 1)         #
    call  dtrsl (swk2, nobs, 3, dwk, 01, info)        #
    call  dcopy (3, dwk, 1, qwk(j,1,3), nobs)         #
}

#   GENERATE (\theta R) FOR CALCULATING c_r AND d_r
for (j=1;j<=ngrid*ngrid;j=j+1) {
    jj = (j - 1) / ngrid + 1   #   j-TH POINT u HAS COORDINATES (xx(ii),xx(jj))
    ii = j - (jj-1) * ngrid    #
    for (i=1;i<=nobs;i=i+1) {
        r(i,j) = (x(i,2)-xx(ii)) ** 2 + (x(i,3)-xx(jj)) ** 2          #       
        if ( r(i,j) > 0.d0 )  r(i,j) = r(i,j) * dlog (r(i,j))         #       
        r(i,j) = r(i,j) - (qwk(i,1,3)_			              #
                 + qwk(i,2,3) * xx(ii) + qwk(i,3,3) * xx(jj))         # R(t,u) IN r(,j) 
    }                                                                 #
    call  dqrsl (swk2, nobs, nobs, 3, qraux2, r(1,j), dum, r(1,j),_   #
                 dum, r(1,j), dum, 00010, info)                       #
    call  dscal (nobs, s1(1,1)*s1(1,1)*10.d0**theta(2), r(1,j), 1)    #
}

#   MATRIX DECOMPOSITION FOR CALCULATING c_r, d_r, AND sms
for (j=1;j<=nobs;j=j+1)  call  dset (nobs-j+1, 0.d0, qwk(j,j,1), 1)
for (i=1;i<=nq1;i=i+1) {
    for (j=1;j<=nobs;j=j+1)
        call  daxpy (nobs-j+1, 10.d0**theta(i), q(j,j,i), 1, qwk(j,j,1), 1)
}
call  dcopy (nobs*nnull, s, 1, swk, 1)
call  dcopy (nobs, y, 1, ywk, 1)
limnla(1) = nlaht - 1.d0
limnla(2) = nlaht + 1.d0
call  dsidr ('v',_
             swk, nobs, nobs, nnull1, ywk, qwk(1,1,1), nobs,_
             0.d0, -1, limnla,_
             nlaht, score, varht, c, d,_
             qraux, jpvt, dwk,_
             info)
if ( info != 0 )  stop

#   CALCULATE b
b = varht / 10.d0**nlaht

#   CALCULATE c_r, d_r, AND sms
call  dcrdr (swk, nobs, nobs, nnull1, qraux, jpvt, qwk(1,1,1), nobs, nlaht,_
             r, nobs, ngrid*ngrid, cr, nobs, dr, nnull, dwk, info)
call  dsms (swk, nobs, nobs, nnull1, jpvt, qwk(1,1,1), nobs, nlaht,_
            sms, nnull, dwk, info)

#   CALCULATE POSTERIOR MEAN AND STANDARD DEVIATION ON GRID
write (*,*)  'x2	x3	Estimate	Posterior std'
call  dqrslm (swk2, nobs, nobs, 3, qraux2, qwk(1,1,2), nobs, 0, info, dwk)   # FKF'
for (j=1;j<=ngrid*ngrid;j=j+1) {
    jj = (j - 1) / ngrid + 1    # j-TH POINT u HAS COORDINATES (xx(ii),xx(jj))
    ii = j - (jj-1) * ngrid     #
    dwk(1) = 1.d0                                  #
    dwk(2) = xx(ii)                                # NULL SPACE BASIS \phi IN dwk(1:3)
    dwk(3) = xx(jj)                                #
    call  dtrsl (swk2, nobs, 3, dwk, 11, info)     #
    call  dsymv ('l', 3, 1.d0, qwk(1,1,2), nobs,_    #
                 dwk, 1, 0.d0, dwk(4), 1)            # SKS'\phi IN dwk(4:6)
    for (i=1;i<=nobs;i=i+1) {                                         #
        r(i,j) = (x(i,2)-xx(ii)) ** 2 + (x(i,3)-xx(jj)) ** 2          # K_{t,u} in r(,j)
        if ( r(i,j) > 0.d0 )  r(i,j) = r(i,j) * dlog (r(i,j))         #
    }
    for (i=1;i<=3;i=i+1)
        dwk(i+6) = ddot (nobs, r(1,j), 1, s2(1,i), 1)                 # K_{u,t}S IN dwk(7:9)
    for (i=1;i<=nobs;i=i+1) {
        r(i,j) = r(i,j) - (qwk(i,1,3)_                                #
                + qwk(i,2,3) * xx(ii) + qwk(i,3,3) * xx(jj))          #
    }                                                                 # R(t,u) IN r(,j)
    call  dqrsl (swk2, nobs, nobs, 3, qraux2, r(1,j), dum, r(1,j),_   #
                 dum, r(1,j), dum, 00010, info)                       #
    #   SCALING
    call  dscal (6, 10.d0**theta(2)*s1(1,1)*s1(1,1), dwk(4), 1)
    call  dscal (nobs, s1(1,1)*s1(1,1)*10.d0**theta(2), r(1,j), 1)
    #   POSTERIOR MEAN
    dwk(10) = dwk(2) * d(3) + dwk(3) * d(4) + ddot (nobs, r(1,j), 1, c, 1)
    #   POSTERIOR STANDARD DEVIATION
    dwk(11) = ddot (3, dwk, 1, dwk(4), 1) - 2.d0 * ddot (3, dwk, 1, dwk(7), 1)
    dwk(11) = (dwk(11) - ddot (nobs, r(1,j), 1, cr(1,j), 1))_
             + (dwk(2) * dwk(2) * sms(3,3) + dwk(3) * dwk(3) * sms(4,4)_
                + 2.d0 * dwk(2) * dwk(3) * sms(3,4))_
             - 2.d0 * (dwk(2) * dr(3,j) + dwk(3) * dr(4,j))
    dwk(11) = dsqrt (b*dwk(11))
    write (*,*)  sngl (xx(ii)), sngl (xx(jj)), sngl (dwk(10)), sngl (dwk(11))
}

stop
end
SHAR_EOF
cat << \SHAR_EOF > in.tptp
   6.5504573E-02

   0.4700036     -2.8572975E-02  3.8292429E-03   6.702679       6.523018    
  -5.7611395E-02 -0.1795486      0.0000000E+00           
					                 
   0.4762342     -2.8038979E-02  3.0298871E-03   6.834330       6.532366    
  -5.5924140E-02 -0.1718880      0.0000000E+00           
					                 
    1.337629     -3.0668963E-02  2.1188280E-03   6.820076       6.765297    
   0.1773328     -0.1722143      0.0000000E+00           
					                 
   0.9202827     -9.8960120E-03  4.4837361E-03   6.303720       6.695732    
   6.4323246E-02 -0.1287703      0.0000000E+00           
					                 
    1.071584     -8.5709924E-03  4.6198699E-03   6.277339       6.740992    
   0.1052937     -0.1244808      0.0000000E+00           
					                 
  -0.1625189     -1.3775451E-02  2.4486941E-03   6.393313       6.383916    
  -0.2289117     -0.1473510      0.0000000E+00           
					                 
  -0.2231435     -1.3973476E-02 -2.4975641E-03   6.559896       6.401677    
  -0.2453313     -0.1131701      0.0000000E+00           
					                 
   0.5766134     -1.6240014E-02 -2.9583289E-03   6.196429       6.622203    
  -2.8741239E-02 -0.1092338      0.0000000E+00           
					                 
  -8.3381608E-02 -1.5932309E-02 -3.0543210E-03   6.629720       6.444622    
  -0.2074783     -0.1080787      0.0000000E+00           
					                 
  -4.0821992E-02 -1.5100349E-02 -2.8605910E-03   6.202922       6.453421    
  -0.1959517     -0.1108062      0.0000000E+00           
					                 
   -1.108663     -1.4863865E-02 -2.9775270E-03   6.339253       6.166106    
  -0.4851819     -0.1088906      0.0000000E+00           
					                 
    1.222304      2.6364293E-02  2.6607890E-02   6.731449       6.903258    
   0.1461057     -3.0260347E-03  0.0000000E+00           
					                 
  -0.4620354      1.4688638E-02  2.0389270E-02   6.388750       6.416108    
  -0.3100347     -3.4036163E-02  0.0000000E+00           
					                 
   7.6961040E-02  1.3949257E-02  1.9633690E-02   6.341170       6.559956    
  -0.1640526     -3.6169983E-02  0.0000000E+00           
					                 
    1.532557      4.1142199E-03  1.5033700E-02   6.707127       6.950183    
   0.2301123     -4.0107228E-02  0.0000000E+00           
					                 
   0.7975072     -2.7734241E-02  6.5938062E-03   6.267028       6.601869    
   3.1076441E-02 -0.1893861      0.0000000E+00           
					                 
    3.157851     -1.5456498E-02  7.2727231E-03   7.656587       7.273330    
   0.6701397     -0.1569886      0.0000000E+00           
					                 
   0.3852624     -2.6080895E-02  3.0298871E-03   6.833274       6.515103    
  -8.0559760E-02 -0.1645162      0.0000000E+00           
					                 
    1.118415     -2.4125511E-02  1.3962630E-05   7.346690       6.749997    
   0.1179748     -0.1281562      0.0000000E+00           
					                 
   0.0000000E+00 -2.4300689E-02 -1.6249010E-03   6.416225       6.469045    
  -0.1848958     -0.1062376      0.0000000E+00           
					                 
   0.4574248     -3.2724887E-02 -1.8517930E-03   6.510863       6.566745    
  -6.1017778E-02 -0.1324153      0.0000000E+00           
					                 
    2.733068     -3.7822478E-02 -3.1660220E-03   7.249128       7.182249    
   0.5551364     -0.1330657      0.0000000E+00           
					                 
   0.3220835     -3.8316362E-02 -7.2727231E-03   6.825534       6.603970    
  -9.7669132E-02 -5.8539424E-02  0.0000000E+00           
					                 
    1.007958     -3.4733623E-02 -8.5432827E-03   6.956055       6.832749    
   8.8064738E-02 -1.5494409E-02  0.0000000E+00           
					                 
   0.3293037     -3.0122202E-02 -6.0073868E-03   6.478281       6.623165    
  -9.5713817E-02 -4.1300174E-02  0.0000000E+00           
					                 
   9.5310181E-02 -2.2709798E-02 -7.8399386E-03   6.489791       6.603145    
  -0.1590832      2.0499860E-03  0.0000000E+00           
					                 
   2.9558800E-02 -2.0913711E-02 -8.8329958E-03   6.519984       6.598383    
  -0.1768904      1.5094976E-02  0.0000000E+00           
					                 
   0.8285518     -3.8078673E-02 -1.1679480E-02   6.606997       6.826431    
   3.9483167E-02  2.6769413E-02  0.0000000E+00           
					                 
   0.2851789     -2.6550850E-02 -1.1426420E-02   6.807120       6.738097    
  -0.1076633      8.5581772E-02  0.0000000E+00           
					                 
   0.6881346     -2.6488189E-02 -1.1412460E-02   6.906674       6.846984    
   1.4586849E-03  8.5347295E-02  0.0000000E+00           
					                 
   0.6097656     -3.4339294E-02 -1.3730070E-02   6.920940       6.826474    
  -1.9763602E-02  8.6058594E-02  0.0000000E+00           
					                 
   0.3001046     -3.4283776E-02 -1.2770230E-02   6.345798       6.725660    
  -0.1036213      6.9102339E-02  0.0000000E+00           
					                 
   9.5310181E-02 -3.3149146E-02 -1.4384510E-02   7.191700       6.707186    
  -0.1590832      0.1060911      0.0000000E+00           
					                 
   0.1397619     -3.3290084E-02 -1.4839990E-02   6.986866       6.726413    
  -0.1470447      0.1132789      0.0000000E+00           
					                 
  -0.6161861     -3.0142136E-02 -1.3342650E-02   6.286397       6.516064    
  -0.3517874      0.1076732      0.0000000E+00           
					                 
   0.6259384     -3.4417009E-03  2.3020869E-03   6.454190       6.602108    
  -1.5383991E-02 -0.1426871      0.0000000E+00           
					                 
   0.6575200     -3.4659230E-03  1.0751230E-03   6.806765       6.606887    
  -6.8317205E-03 -0.1464593      0.0000000E+00           
					                 
  -0.2231435     -1.6168786E-02 -3.8746311E-04   6.978184       6.376817    
  -0.2453313     -0.1380308      0.0000000E+00           
					                 
  -0.3424903     -1.5683006E-02 -6.3006382E-04   6.283511       6.346907    
  -0.2776558     -0.1356162      0.0000000E+00           
					                 
  -0.5447272     -1.4804031E-02 -1.1065390E-03   6.517201       6.297860    
  -0.3324322     -0.1298867      0.0000000E+00           
					                 
  -8.3381608E-02 -7.9383943E-03 -2.4260080E-04   6.178396       6.431004    
  -0.2074783     -0.1216969      0.0000000E+00           
					                 
   0.5877867     -6.4936592E-03 -8.9709909E-04   6.592211       6.609035    
  -2.5715500E-02 -0.1254280      0.0000000E+00           
					                 
   0.2231435     -1.7949453E-02 -3.7332510E-03   6.460427       6.542559    
  -0.1244634     -9.3156703E-02  0.0000000E+00           
					                 
   8.6177699E-02 -1.3917915E-02 -5.2551618E-03   6.504828       6.526649    
  -0.1615565     -7.1972646E-02  0.0000000E+00           
					                 
   0.1043600     -1.0490126E-02 -4.3144408E-03   6.553641       6.516614    
  -0.1566323     -8.6931825E-02  0.0000000E+00           
					                 
   6.7658648E-02 -8.3458787E-03 -3.0543210E-03   6.804101       6.490137    
  -0.1665720     -0.1034694      0.0000000E+00           
					                 
  -0.4385050     -1.6271355E-02 -7.1610250E-03   6.239214       6.423556    
  -0.3036614     -3.2961197E-02  0.0000000E+00           
					                 
  -0.1508229     -1.9627474E-02 -1.0258865E-02   6.204000       6.572187    
  -0.2257439      3.7752576E-02  0.0000000E+00           
					                 
   5.8268908E-02 -1.9923754E-02 -1.1562549E-02   6.444702       6.655363    
  -0.1691149      6.4299382E-02  0.0000000E+00           
					                 
    1.327075     -1.8828359E-02 -1.1756267E-02   7.022972       6.995304    
   0.1744750      6.0650095E-02  0.0000000E+00           
					                 
   0.8628899     -1.7526373E-02 -1.0869697E-02   6.997017       6.846593    
   4.8781727E-02  3.7632480E-02  0.0000000E+00           
					                 
   0.3506569     -2.1882271E-02 -1.3730074E-02   6.831537       6.787021    
  -8.9931197E-02  0.1167735      0.0000000E+00           
					                 
   0.0000000E+00 -1.6940894E-02 -1.7697630E-03   6.342434       6.451334    
  -0.1848958     -0.1239493      0.0000000E+00           
					                 
   0.5068176     -3.1420738E-02 -1.9879290E-03   6.845943       6.588174    
  -4.7642056E-02 -0.1243626      0.0000000E+00           
					                 
  -1.0050340E-02 -3.2844476E-02 -7.2238548E-03   6.470811       6.541976    
  -0.1876178     -3.0584749E-02  0.0000000E+00           
					                 
  -0.3856625     -3.3022437E-02 -1.3026769E-02   6.529196       6.553283    
  -0.2893490      8.2453020E-02  0.0000000E+00           
					                 
    1.208960     -3.4702305E-02 -1.3520654E-02   6.892715       6.982590    
   0.1424926      7.9918355E-02  0.0000000E+00           
					                 
    3.433019     -2.6167765E-02  6.8835239E-03   7.700999       7.320774    
   0.7446379     -0.1840422      0.0000000E+00           
					                 
   0.4946962     -1.7099015E-02 -3.3353181E-03   5.973235       6.607126    
  -5.0924554E-02 -0.1021284      0.0000000E+00           
					                 
   0.5247285     -2.7267156E-02  1.5760320E-03   6.603404       6.561932    
  -4.2791732E-02 -0.1554551      0.0000000E+00           
					                 
   -1.237874     -1.0035637E-02 -2.5446869E-03   6.125936       6.134462    
  -0.5201812     -0.1055355      0.0000000E+00           
					                 
   0.6151856     -2.8671227E-02 -9.3757706E-03   6.850204       6.779947    
  -1.8295847E-02  3.8064636E-02  0.0000000E+00           
					                 
   0.4446858      3.2436491E-03  1.5028464E-02   6.909156       6.656170    
  -6.4467564E-02 -3.9541088E-02  0.0000000E+00           
					                 
   0.1133287     -7.6007210E-03  1.1658535E-02   6.405915       6.520586    
  -0.1542034     -8.5388541E-02  0.0000000E+00           
					                 
    1.975469      3.8316362E-02  3.5602428E-02   6.927082       7.074042    
   0.3500295     -3.6166053E-02  0.0000000E+00           
					                 
   0.8064759      8.6844229E-04  8.1209280E-03   6.255124       6.692550    
   3.3505116E-02 -0.1011335      0.0000000E+00           
					                 
  -6.1875399E-02 -5.3851595E-03  6.8119671E-03   6.468435       6.447364    
  -0.2016537     -0.1111608      0.0000000E+00           
					                 
   0.5364934     -3.9004958E-03 -1.4311690E-03   6.322824       6.578411    
  -3.9605789E-02 -0.1421618      0.0000000E+00           
					                 
   0.7323679      3.1054402E-03 -3.2969210E-03   6.573042       6.588926    
   1.3436952E-02 -0.1846899      0.0000000E+00           
					                 
  -0.5276327     -6.1645294E-03 -5.6722900E-03   5.995416       6.337548    
  -0.3278021     -9.4828568E-02  0.0000000E+00           
					                 
   0.5007753     -2.0917984E-02 -1.1335671E-02   6.409370       6.776803    
  -4.9278330E-02  6.5902881E-02  0.0000000E+00           
					                 
    1.492904     -3.0206211E-02 -1.7201116E-02   7.319273       7.152666    
   0.2193759      0.1731116      0.0000000E+00           
					                 
   0.5933269     -2.5391614E-02 -2.2071388E-02   7.404623       6.984800    
  -2.4215214E-02  0.2488363      0.0000000E+00           
					                 
   0.3920421     -1.9868203E-02 -2.0563764E-02   6.533486       6.877417    
  -7.8723773E-02  0.1959623      0.0000000E+00           
					                 
   0.7202759     -2.6238972E-02 -2.2275539E-02   7.142084       7.019626    
   1.0162459E-02  0.2492851      0.0000000E+00           
					                 
   0.5187938     -1.8721523E-02 -2.3147980E-02   7.348371       6.918883    
  -4.4398874E-02  0.2031029      0.0000000E+00           
					                 
    1.160021     -3.1004986E-02 -2.7654706E-02   6.770537       7.155087    
   0.1292409      0.2656681      0.0000000E+00           
					                 
    1.658228     -2.9818915E-02 -2.7218537E-02   7.424509       7.289629    
   0.2641384      0.2653118      0.0000000E+00           
					                 
    1.763017     -1.0490126E-02 -2.7558750E-02   6.667977       7.131439    
   0.2925098      7.8750402E-02  0.0000000E+00           
					                 
    1.621367     -3.3318561E-02 -3.1328768E-02   7.420779       7.306509    
   0.2541580      0.2921731      0.0000000E+00           
					                 
    1.327075     -2.0422297E-02 -3.2452188E-02   7.624606       7.165191    
   0.1744750      0.2305373      0.0000000E+00           
					                 
   0.5877867     -3.2499939E-02 -3.2841191E-02   7.119359       7.032610    
  -2.5715500E-02  0.2981471      0.0000000E+00           
					                 
   0.3920421     -1.2370735E-02 -3.5602428E-02   7.035511       6.780488    
  -7.8723773E-02  9.9033393E-02  0.0000000E+00           
					                 
    1.463255     -3.1752475E-02 -2.8940499E-02   6.962271       7.246269    
   0.2113482      0.2747425      0.0000000E+00           
					                 
   0.4187103     -1.2548820E-02 -1.5731748E-02   7.174058       6.749037    
  -7.1501851E-02  6.0359865E-02  0.0000000E+00           
					                 
    1.004302      7.1618869E-03  8.8713923E-03   6.380754       6.753075    
   8.7074652E-02 -9.4178297E-02  0.0000000E+00           
					                 
    1.321756     -3.3200394E-02 -3.0747853E-02   7.451348       7.221478    
   0.1730347      0.2882643      0.0000000E+00           
					                 
    1.733424     -2.2377148E-03  1.8527355E-02   6.990656       7.004251    
   0.2844976     -4.0425252E-02  0.0000000E+00           
					                 
    1.706565     -1.6010659E-02 -2.8570643E-02   7.524852       7.206444    
   0.2772255      0.1690402      0.0000000E+00           
					                 
   0.7929925     -3.2262180E-02 -3.1072330E-02   7.139350       7.079945    
   2.9853886E-02  0.2899120      0.0000000E+00           
					                 
    1.214913     -3.3754192E-02 -2.2586130E-02   7.360447       7.115140    
   0.1441044      0.2108575      0.0000000E+00           
					                 
   0.4187103     -2.2222685E-02 -2.1073291E-02   6.949375       6.911248    
  -7.1501851E-02  0.2225709      0.0000000E+00           
					                 
   0.7975072      1.1648097E-03  1.5323980E-03   6.611337       6.625384    
   3.1076441E-02 -0.1658708      0.0000000E+00           
					                 
   0.4317824     -1.7432953E-03  7.8294668E-03   6.495232       6.588479    
  -6.7961864E-02 -0.1037376      0.0000000E+00           
					                 
    1.730770     -3.1091839E-02 -2.3681903E-02   7.009170       7.279696    
   0.2837791      0.2357382      0.0000000E+00           
					                 
   0.6097656      3.3020671E-03 -9.3026040E-04   6.940802       6.556676    
  -1.9763594E-02 -0.1837386      0.0000000E+00           
					                 
   0.8020016     -1.2688230E-03 -2.9338943E-03   7.058734       6.635450    
   3.2293506E-02 -0.1570218      0.0000000E+00           
					                 
    1.686399     -2.9699307E-02 -2.7801258E-02   7.385159       7.300848    
   0.2717657      0.2689044      0.0000000E+00           
					                 
    1.675226     -3.5243409E-03 -1.4278053E-02   6.672657       6.985912    
   0.2687405     -4.3006923E-02  0.0000000E+00           
					                 
    1.829376     -2.9640928E-02 -2.8130995E-02   7.595140       7.341594    
   0.3104763      0.2709390      0.0000000E+00           
					                 
   0.8754687     -6.0177748E-03 -1.2969178E-02   6.750093       6.783907    
   5.2187964E-02 -2.8459791E-02  0.0000000E+00           
					                 
   0.7080358      3.1638581E-03 -3.1660220E-03   6.471524       6.581868    
   6.8478812E-03 -0.1851581      0.0000000E+00           
					                 
   0.3074847     -2.4497230E-02 -2.2498885E-02   7.129153       6.910567    
  -0.1016226      0.2520109      0.0000000E+00           
					                 
    2.214846      1.9271364E-02  3.1168276E-02   7.359655       7.165607    
   0.4148374     -9.4089983E-03  0.0000000E+00           
					                 
    1.627278     -3.1360935E-02 -1.7670538E-02   7.539319       7.189115    
   0.2557585      0.1731780      0.0000000E+00           
					                 
    2.761907      3.3868082E-02  3.0805422E-02   7.448471       7.304985    
   0.5629440     -1.8138114E-02  0.0000000E+00           
					                 
   0.7929925     -2.9047150E-02 -1.8930456E-02   6.921924       6.991223    
   2.9853880E-02  0.2011903      0.0000000E+00           
					                 
    1.442202     -3.3532105E-02 -1.8848440E-02   7.214940       7.140189    
   0.2056477      0.1743630      0.0000000E+00           
					                 
   0.2468601     -2.6021082E-02 -4.6059075E-03   6.511280       6.585474    
  -0.1180406     -5.6664012E-02  0.0000000E+00           
					                 
    1.018847     -2.7848519E-03  1.5740475E-02   6.514668       6.802907    
   9.1013439E-02 -4.8285153E-02  0.0000000E+00           
					                 
    1.749200     -1.7432950E-03  1.8349363E-02   6.994211       7.010152    
   0.2887689     -3.8795166E-02  0.0000000E+00           
					                 
  -0.3424903     -1.4368099E-02 -1.4486229E-04   6.479924       6.346223    
  -0.2776558     -0.1363000      0.0000000E+00
SHAR_EOF
cd ..
mkdir rkpk
cd rkpk
cat << \SHAR_EOF > dmudr.r
#:::::::::::
#   dmudr
#:::::::::::

subroutine  dmudr (vmu,_
                   s, lds, nobs, nnull, q, ldqr, ldqc, nq, y,_     # inputs
                   tol, init, prec, maxite,_                       # tune para
                   theta, nlaht, score, varht, c, d,_              # outputs
                   wk, info)

#  Acronym:  Double precision MUltiple smoothing parameter DRiver.
 
#  Purpose:  This routine implements the iterative algorithm for minimizing
#      GCV/GML scores with multiple smoothing parameters described in 
#      Gu and Wahba (1991, SISSC).

#  WARNING:  Please be sure that you understand what this routine does before 
#      you call it.  Pilot runs with small problems are recommended.  This
#      routine performs VERY INTENSIVE numerical calculations for big nobs.

integer           lds, nobs, nnull, ldqr, ldqc, nq, init, maxite,_
                  info

double precision  s(lds,*), q(ldqr,ldqc,*), y(*), tol, prec,_
                  theta(*), nlaht, score, varht, c(*), d(*),_
                  wk(*)

character*1       vmu

#  On entry:
#      vmu        'v':  GCV criterion.
#                 'm':  GML criterion.
#                 'u':  unbiased risk estimate.
#      s          the matrix S, of size (lds,nnull).
#      nobs       the number of observations.
#      nnull      the dimension of the null space.
#      q          the matrices Q_{i}'s, of dimension (ldqr,ldqc,nq).
#      nq         the number of Q_{i}'s.
#      y          the response vector of size (nobs)
#      tol        the tolerance for truncation in the tridiagonalization; usually set to 0.d0.
#      init       0 :  no initial values provided for the theta.
#                 1 :  initial values provided for the theta.
#      theta      initial values of theta if init = 1.
#      prec       precision requested for the minimum score value, where precision
#                   is the weaker of the absolute and relative precisions.
#      maxite     maximum number of iterations allowed; usually 20 is enough.
#      varht      known variance if vmu=='u'.
 
#  On exit:
#      theta      the vector of parameter log10(theta) used in the final model,
#                 of dimension (nq).  -25 indicates effective minus infinity.
#      nlaht      the estimated  log10(n*lambda)|theta  in the final model.
#      score      the minimum GCV/GML/URE score found at (theta, nlaht).
#      varht      the variance estimate.
#      c,d        the coefficient estimates.
#      info        0 :  normal termination.
#                 -1 :  dimension error.
#                 -2 :  F_{2}^{T} Q_{*}^{theta} F_{2} !>= 0.
#                 -3 :  tuning parameters are out of scope.
#                 -4 :  fails to converge within maxite steps.
#                 -5 :  fails to find a reasonable descent direction.
#                 >0 :  the matrix S is rank deficient: rank(S)+1.
#      s,q,y      destroyed.
#      others     intact.

#  Work arrays:
#      wk         of size (nobs*nobs*(nq+2))

#  Routines called directly:
#      Rkpack  -- dmudr1

#  Routines called indirectly:
#      Blas    -- dasum, daxpy, dcopy, ddot, dnrm2, dscal, dswap, idamax
#      Blas2   -- dgemv, dsymv, dsyr2
#      Fortran -- dabs, dexp, dfloat, dlog, dlog10, dmax1, dsqrt
#      Linpack -- dpbfa, dpbsl, dpofa, dposl, dqrdc, dqrsl, dtrsl
#      Rkpack  -- dcoef, dcore, ddeev, deval, dgold, dmcdc, dqrslm,
#                 dstup, dsytr, dtrev
#      Other   -- dprmut, dset

#  Written:  Chong Gu, Statistics, Purdue, latest version 3/9/91.

integer  n, n0
integer  iqraux, itraux, itwk, iqwk, iywk, ithewk, ihes, igra, ihwk1, ihwk2,_
         igwk1, igwk2, ikwk, iwork1, iwork2, ijpvt, ipvtwk


n = nobs
n0 = nnull

iqraux = 1
itraux = iqraux + n0
itwk = itraux + (n-n0-2)
iqwk = itwk + 2 * (n-n0)
iywk = iqwk + n * n
ithewk = iywk + n
ihes = ithewk + nq
igra = ihes + nq * nq
ihwk1 = igra + nq
ihwk2 = ihwk1 + nq * nq
igwk1 = ihwk2 + nq * nq
igwk2 = igwk1 + nq
ikwk = igwk2 + nq
iwork1 = ikwk + (n-n0) * (n-n0) * nq
iwork2 = iwork1 + n
ijpvt = iwork2 + n
ipvtwk = ijpvt + n0

call  dmudr1 (vmu,_
             s, lds, nobs, nnull, q, ldqr, ldqc, nq, y,_     # inputs
             tol, init, prec, maxite,_                       # tune para
             theta, nlaht, score, varht, c, d,_              # outputs
             wk(iqraux), wk(ijpvt), wk(itwk), wk(itraux), wk(iqwk),_
             wk(iywk), wk(ithewk), wk(ihes), wk(igra), wk(ihwk1),_
             wk(ihwk2), wk(igwk1), wk(igwk2), wk(ipvtwk), wk(ikwk),_
             wk(iwork1), wk(iwork2),_
             info)

return
end
SHAR_EOF
cat << \SHAR_EOF > dcoef.r

#:::::::::::
#   dcoef
#:::::::::::

subroutine  dcoef (s, lds, nobs, nnull, qraux, jpvt, z, q, ldq, nlaht,_
                   c, d, info, twk)

#  Purpose:  To compute the estimated coefficients of the model.

integer           lds, nobs, nnull, jpvt(*), ldq, info
double precision  s(lds,*), qraux(*), z(*), q(ldq,*), nlaht, c(*), d(*),_
                  twk(2,*)

#  On entry:
#      s,qraux,jpvt
#                 QR-decomposition of  S = F R.
#      lds        leading dimension of s.
#      nobs       number of observations.
#      nnull      dimension of null space.
#      z          diag(I, U^{T}) F^{T} y.
#      q          U^{T} F_{2}^{T} Q F_{2} U in BOTTOM-RIGHT corner's
#                     LOWER triangle and SUPER DIAGONAL;
#                 F_{2}^{T} Q F_{1} in BOTTOM-LEFT corner.
#      ldq        leading dimension of q.
#      nlaht      estimated log10(n*lambda).

#  On exit:
#      c          parameters c.
#      d          parameters d.
#      info        0: normal termination.
#                 >0: S is not of full rank: rank(S)+1 .
#                 -1: dimension error.
#                 -2: F_{2}^{T} Q F_{2} is not non-negative definite.

#  Work array:
#      twk        of size at least (2,nobs-nnull).

#  Routines called directly:
#      Blas    -- daxpy, dcopy, ddot
#      Linpack -- dpbfa, dpbsl, dqrsl, dtrsl
#      Other   -- dprmut, dset

#  Written:  Chong Gu, Statistics, UW-Madison, 5/4/88 at Yale.

double precision  dum, ddot
integer           n, n0

info = 0

#   check dimension
if ( nnull < 1 | nnull >= nobs | nobs > lds | nobs > ldq ) {
    info = -1
    return
}

#   set working parameters
n0 = nnull
n = nobs - nnull

#   compute  U ( T + n*lambdahat I )^{-1} z
call  dset (n, 10.d0 ** nlaht, twk(2,1), 2)
call  daxpy (n, 1.d0, q(n0+1,n0+1), ldq+1, twk(2,1), 2)
call  dcopy (n-1, q(n0+1,n0+2), ldq+1, twk(1,2), 2)
call  dpbfa (twk, 2, n, 1, info)
if ( info != 0 ) {
    info = -2
    return
}
call  dpbsl (twk, 2, n, 1, z(n0+1))
call  dcopy (n-2, q(n0+2,n0+1), ldq+1, twk, 1)
call  dqrsl (q(n0+2,n0+1), ldq, n-1, n-2, twk, z(n0+2), z(n0+2), dum,_
             dum, dum, dum, 10000, info)

#   compute  c
call  dset (n0, 0.d0, c, 1)
call  dcopy (n, z(n0+1), 1, c(n0+1), 1)
call  dqrsl (s, lds, nobs, nnull, qraux, c, c, dum, dum, dum, dum, 10000,_
             info)

#   compute  d
for (j=1;j<=n0;j=j+1) {
    d(j) = z(j) - ddot (n, z(n0+1), 1, q(n0+1,j), 1)
}
call  dtrsl (s, lds, n0, d, 01, info)
call  dprmut (d, n0, jpvt, 1)

return
end

#...............................................................................

SHAR_EOF
cat << \SHAR_EOF > Makefile
OBJECTS = dcoef.o dcore.o dcrdr.o ddeev.o deval.o dgold.o dmcdc.o dmudr.o dmudr1.o dqrslm.o dsidr.o dsms.o dstup.o dsytr.o dtrev.o
FLAGS = -O

.SUFFIXES: .f .o

.f.o:
	f77 -c $(FLAGS) $*.f

rkpk.a :: $(OBJECTS)
	ar rv  rkpk.a $(OBJECTS)
	rm *.o
	ranlib  rkpk.a
SHAR_EOF
cat << \SHAR_EOF > dcore.r

#:::::::::::
#   dcore
#:::::::::::

subroutine  dcore (vmu, q, ldq, nobs, nnull, tol, z, job, limnla, nlaht,_
                   score, varht, info, twk, work)

#  Purpose:  To evaluate the GCV/GML score function at various trial values
#      of n*lambda using the tridiagonalization GCV/GML algorithm.  Perform
#      either golden section search or regular grid search for minimizing
#      n*lambda.

character*1       vmu
integer           ldq, nobs, nnull, job, info
double precision  q(ldq,*), tol, z(*), limnla(2), nlaht, score(*), varht,_
                  twk(2,*), work(*)

#  On entry:
#      vmu        'v':  GCV criterion.
#                 'm':  GML criterion.
#                 'u':  unbiased risk estimate.
#      q          F^{T} Q F, only refer the LOWER triangle of the BOTTOM-
#                 RIGHT corner, i.e., F_{2}^{T} Q F_{2}.
#      ldq        leading dimension of Q.
#      nobs       number of observations.
#      nnull      dimension of null space.
#      tol        tolerance of truncation.
#      z          F^{T} y.
#      job         0:  searching interval for nlaht chosen automatically.
#                 -1:  searching interval for nlaht provided by limnla.
#                 >0:  search regular grid points on [limnla(1),limnla(2)]:
#                        #(grids) = job + 1.
#      limnla     searching interval in log10 scale, see job.
#      varht      known variance if vmu=='u'.

#  On exit:
#      q          tridiagonal form in diagonal and superdiagonal of the
#                 corner, Householder factors in strict lower triangle of 
#                 the corner.
#      z          diag(I, U^{T}) F^{T} y.
#      limnla     see limnla of entry.
#      nlaht      the estimated log10(n*lambda).
#      score      job <= 0 :  the GCV/GML/URE score at nlaht.
#                 job  > 0 :  the GCV/GML/URE score at the regular grid points.
#      varht      variance estimate.
#      info        0 :  normal termination.
#                 -1 :  dimension error.
#                 -2 :  F_{2}^{T}QF_{2} is not non-negative definite.
#                 -3 :  vmu is none of 'v', 'm', or 'u'.

#  Work arrays:
#      twk        of size at least (2,nobs-nnull).
#      work       of size at least (nobs-nnull).

#  Routines called directly:
#      Fortran -- dfloat, dlog10
#      Blas    -- dasum, dcopy
#      Linpack -- dqrsl
#      Rkpack  -- deval, dgold, dsytr

#  Written:  Chong Gu, Statistics, Purdue, latest version 3/24/92.

double precision  dum, low, upp, dasum, mchpr
integer           n0, n, j

info = 0

#   check vmu
if ( vmu != 'v' & vmu != 'm' & vmu != 'u' ) {
    info = -3
    return
}

#   check dimension
if ( nnull < 1 | nobs <= nnull | nobs > ldq ) {
    info = -1
    return
}

#   set working parameters
n0 = nnull
n = nobs - nnull

#   tridiagonalization  U^{T} ( F_{2}^{T} Q F_{2} ) U = T
call  dsytr (q(n0+1,n0+1), ldq, n, tol, info, work)
             if ( info != 0 )  return

#   U^{T} z_{2}
call  dcopy (n-2, q(n0+2,n0+1), ldq+1, work, 1)
call  dqrsl (q(n0+2,n0+1), ldq, n-1, n-2, work, z(n0+2), dum, z(n0+2),_
             dum, dum, dum, 01000, info)

#   set searching range
if ( job == 0 ) {
    mchpr = 1.d0
    while ( 1.d0 + mchpr > 1.d0 )  mchpr = mchpr / 2.d0
    mchpr = mchpr * 2.d0
    limnla(2) = dmax1 (dasum (n, q(n0+1,n0+1), ldq+1) * 1.d2, mchpr)
    limnla(1) = limnla(2) * mchpr
    limnla(2) = dlog10 (limnla(2))
    limnla(1) = dlog10 (limnla(1))
}

low = limnla(1)
upp = limnla(2)
if ( job <= 0 ) {
    #   compute score and estimate nlaht thru golden-section search
    call dgold (vmu, q(n0+1,n0+1), ldq, n, z(n0+1), low, upp, nlaht,_
                score(1), varht, info, twk, work)
    if ( vmu == 'v' )  score(1) = score(1) * dfloat (nobs) / dfloat (n)
    if ( vmu == 'm' )  score(1) = score(1) * dfloat (n) / dfloat (nobs)
    if ( vmu == 'u' )  score(1) = score(1) * dfloat (n) / dfloat (nobs) + 2.d0 * varht
}
else {
    #   regular grid evaluation
    call  deval (vmu, q(n0+1,n0+1), ldq, n, z(n0+1), job, low, upp, nlaht,_
                 score, varht, info, twk, work)
    dum = dfloat (nobs) / dfloat (n)
    for (j=1;j<=job+1;j=j+1) {
        if ( vmu == 'v' )  score(j) = score(j) * dum
        if ( vmu == 'm' )  score(j) = score(j) / dum
        if ( vmu == 'u' )  score(j) = score(j) / dum + 2.d0 * varht
    }
}

return
end

#...............................................................................

SHAR_EOF
cat << \SHAR_EOF > dcrdr.r

#:::::::::::
#   dcrdr
#:::::::::::

subroutine  dcrdr (s, lds, nobs, nnull, qraux, jpvt, q, ldq, nlaht,_
                   r, ldr, nr, cr, ldcr, dr, lddr, wk, info)

#  Purpose:  To compute auxiliary quantities cr and dr for posterior covariance

#  Usage:  Use s, qraux, jpvt, q, and nlaht returned by dsidr.

integer           lds, nobs, nnull, jpvt(*), ldq, ldr, nr, ldcr, lddr, info
double precision  s(lds,*), qraux(*), q(ldq,*), nlaht, r(ldr,*), cr(ldcr,*),_
                  dr(lddr,*), wk(2,*)

#  On entry:
#      s,qraux,jpvt
#                 QR-decomposition of  S = F R.
#      nobs       number of observations.
#      nnull      dimension of null space.
#      q          U^{T} F_{2}^{T} Q F_{2} U in BOTTOM-RIGHT corner's
#                     LOWER triangle and SUPER DIAGONAL;
#                 F_{2}^{T} Q F_{1} in BOTTOM-LEFT corner;
#      ldq        leading dimension of q.
#      nlaht      estimated log10(n*lambda).
#      r          R(t,s^{T}), distroyed on exit.
#      nr         length of s.

#  On exit:
#      cr         (M^{-1}-M^{-1}S(S^{T}M^{-1}S)^{-1}S^{T}M^{-1})R(t,s^{T})
#      dr         (S^{T}M^{-1}S)^{-1}S^{T}M^{-1}R(t,s^{T})
#      info        0: normal termination.
#                 >0: S is not of full rank: rank(S)+1 .
#                 -1: dimension error.
#                 -2: F_{2}^{T} Q F_{2} is not non-negative definite.
#      r          destroyed.
#      others     intact.

#  Work array:
#      wk         of size at least (2,nobs-nnull).

#  Routines called directly:
#      Blas    -- daxpy, dcopy, ddot
#      Linpack -- dpbfa, dpbsl, dqrsl, dtrsl
#      Other   -- dprmut, dset

#  Written:  Chong Gu, Statistics, Purdue, 1/31/91.

double precision  dum, ddot
integer           i, j, n, n0

info = 0

#   check dimension
if ( nnull < 1 | nnull >= nobs | nobs > lds | nobs > ldq | ldr < nobs |_
     nr < 1 | ldcr < nobs | lddr < nnull ) {
    info = -1
    return
}

#   set working parameters
n0 = nnull
n = nobs - nnull

#   compute  diag(I, U^{T}) F^{T} R(t,s^{T})
call  dcopy (n-2, q(n0+2,n0+1), ldq+1, wk, 1)
for (j=1;j<=nr;j=j+1) {
    call  dqrsl (s, lds, nobs, nnull, qraux, r(1,j), dum, r(1,j), dum,_
                 dum, dum, 01000, info)
    call  dqrsl (q(n0+2,n0+1), ldq, n-1, n-2, wk, r(n0+2,j), dum,
                 r(n0+2,j), dum, dum, dum, 01000, info)
}

#   compute  U ( T + n*lambdahat I )^{-1} diag(I, U^{T}) F^{T} R(t,s^{T})
call  dset (n, 10.d0 ** nlaht, wk(2,1), 2)
call  daxpy (n, 1.d0, q(n0+1,n0+1), ldq+1, wk(2,1), 2)
call  dcopy (n-1, q(n0+1,n0+2), ldq+1, wk(1,2), 2)
call  dpbfa (wk, 2, n, 1, info)
if ( info != 0 ) {
    info = -2
    return
}
for (j=1;j<=nr;j=j+1)  call  dpbsl (wk, 2, n, 1, r(n0+1,j))
call  dcopy (n-2, q(n0+2,n0+1), ldq+1, wk, 1)
for (j=1;j<=nr;j=j+1)
    call  dqrsl (q(n0+2,n0+1), ldq, n-1, n-2, wk, r(n0+2,j), r(n0+2,j),_
                 dum, dum, dum, dum, 10000, info)

#   compute  cr
for (j=1;j<=nr;j=j+1) {
    call  dset (n0, 0.d0, cr(1,j), 1)
    call  dcopy (n, r(n0+1,j), 1, cr(n0+1,j), 1)
    call  dqrsl (s, lds, nobs, nnull, qraux, cr(1,j), cr(1,j),_
                 dum, dum, dum, dum, 10000, info)
}

#   compute  dr
for (j=1;j<=nr;j=j+1) {
    for (i=1;i<=n0;i=i+1)
        dr(i,j) = r(i,j) - ddot (n, r(n0+1,j), 1, q(n0+1,i), 1)
    call  dtrsl (s, lds, n0, dr(1,j), 01, info)
    call  dprmut (dr(1,j), n0, jpvt, 1)
}

return
end

#..............................................................................
SHAR_EOF
cat << \SHAR_EOF > ddeev.r

#:::::::::::
#   ddeev
#:::::::::::

subroutine  ddeev (vmu, nobs,_
                   q, ldqr, ldqc, n, nq, u, ldu, uaux, t, x,_     # inputs
                   theta, nlaht, score, varht,_              
                   hes, ldh, gra,_                                # outputs
                   hwk1, hwk2, gwk1, gwk2,_                       # work arrays
                   kwk, ldk, work1, work2, work3,_
                   info)

#  Acronym:  Double precision DErivative EValuation.

#  Purpose:  This routine calculates the gradient and the Hessian of
#      V(theta|lambda) or M(theta|lambda).
 
character*1       vmu
integer           nobs, ldqr, ldqc, n, nq, ldu, ldh, ldk, info
double precision  q(ldqr,ldqc,*), u(ldu,*), uaux(*), t(2,*), x(*),_
                  theta(*), nlaht, score, varht,_
                  hes(ldh,*), gra(*), hwk1(nq,*), hwk2(nq,*), gwk1(*), gwk2(*),_
                  kwk(ldk,ldk,*), work1(*), work2(*), work3(*)

#  On entry:
#      vmu        'v':  GCV criterion.
#                 'm':  GML criterion.
#                 'u':  unbiased risk estimate.
#      nobs       the number of observations.
#      q          F_{2}^{T} Q_{i} F_{2}, of size (n,n,nq).
#      n          the size of q.
#      nq         the number of Q_{i}'s.
#      u,uaux     Householder vectors of U, of size (n-1,n-2),
#                 where U^{T}DU is tridiagonal.
#      t          U^{T} (D-n\lambda I) U in packed form, of size (2,n).
#      x          U^{T}z = U^{T}F_{2}^{T}y, of size (n).
#      theta      the current log(theta) for the D matrix, of dimension (nq).
#      nlaht      the estimated  log10(n*lambda)  in the current model.
#      score      the minimum GCV/GML score found at (theta, nlaht).
#      varht      the variance estimate at (theta, nlaht).
 
#  On exit:
#      hes        Hessian at point (theta, nlaht), of size (nq,nq).
#      gra        gradient at point (theta, nlaht), of size (nq).
#      info        0 :  normal termination.
#                 -1 :  dimension error.
#                 -2 :  D !>= 0.
#                 -3 :  tuning parameters are out of scope.

#  Work arrays:
#      hwk1,2     of sizes at least (nq,nq).
#      gwk1,2     of sizes at least (nq).
#      kwk        of size at least (n,n,nq).
#      work1-3    of sizes at least (n).

#  Routines called directly:
#      Fortran -- dfloat
#      Blas    -- daxpy, dcopy, ddot, dscal
#      Blas2   -- dgemv
#      Linpack -- dpbfa, dpbsl, dqrsl
#      Rkpack  -- dqrslm
#      Other   -- dset

#  Written:  Chong Gu, Statistics, Purdue, latest version 12/29/91.

double precision  trc, det, dum, ddot
integer           i, j, m

info = 0
call  dset (nq, 0.d0, gra, 1)
call  dset (nq*nq, 0.d0, hes, 1)

#   check tuning parameters
if ( vmu != 'v' & vmu != 'm' & vmu != 'u' ) {
    info = -3
    return
}

#   check dimension
if ( nobs < n | ldqr < n | ldqc < n | nq <= 0 | ldu < n-1 | ldh < nq | ldk < n ) {
    info = -1
    return
}

#   compute  K_{i} = U^{T}(\theta_{i}Q_{i})U
for (i=2;i<=nq;i=i+1) {
#   from i=2 to nq
    if ( theta(i) <= -25.d0 ) next
    for (j=1;j<=n;j=j+1) {
        call  dcopy (n-j+1, q(j,j,i), 1, kwk(j,j,i), 1)
        call  dscal (n-j+1, 10.d0 ** theta(i), kwk(j,j,i), 1)
    }
    call  dqrslm (u, ldu, n-1, n-2, uaux, kwk(2,2,i), n, 0, info, work1)
    call  dqrsl (u, ldu, n-1, n-2, uaux, kwk(2,1,i), dum, kwk(2,1,i),_
                 dum, dum, dum, 01000, info)
}
#   compute K_{1} through the identity:  U^{T}(\sum K_{i})U = T
call  dcopy (n, t(2,1), 2, kwk(1,1,1), n+1)
call  dcopy (n-1, t(1,2), 2, kwk(2,1,1), n+1)
for (j=1;j<n-1;j=j+1)  call  dset (n-j-1, 0.d0, kwk(j+2,j,1), 1)
for (i=2;i<=nq;i=i+1) {
    if ( theta(i) <= -25.d0 )  next
    for (j=1;j<=n;j=j+1)
        call  daxpy (n-j+1, -1.d0, kwk(j,j,i), 1, kwk(j,j,1), 1)
}
#   fill the upper triangles of K_{i}
for (i=1;i<=nq;i=i+1) {
    if ( theta(i) <= -25.d0 )  next
    for (j=1;j<n;j=j+1)  call  dcopy (n-j, kwk(j+1,j,i), 1, kwk(j,j+1,i), n)
}

#   decompose the tridiagonal matrix  U^{T}DU
call  dset (n, 10.d0 ** nlaht, work1, 1)
call  daxpy (n, 1.d0, work1, 1, t(2,1), 2)
call  dpbfa (t, 2, n, 1, info)
             if ( info != 0 ) {
                 info = -2
                 return
             }

#   compute  T^{-1}K_{i}
for (i=1;i<=nq;i=i+1) {
    if ( theta(i) <= -25.d0 )  next
    for (j=1;j<=n;j=j+1)  call  dpbsl (t, 2, n, 1, kwk(1,j,i))
}

#::::::::::  Compute the gradient and the Hessian  ::::::::::

#   compute  -m x^{-T}T^{-m}K_{i}T^{-1}x:  m = 2('v') or 1('m')
call  dcopy (n, x, 1, work1, 1)
call  dpbsl (t, 2, n, 1, work1)
if ( vmu != 'm' ) {
    call  dcopy (n, work1, 1, work2, 1)
    call  dscal (n, 2.d0, work2, 1)
}
else  call  dcopy (n, x, 1, work2, 1)
for (i=1;i<=nq;i=i+1) {
    if ( theta(i) <= -25.d0 )  next
    call  dgemv ('t', n, n, 1.d0, kwk(1,1,i), n, work2, 1, 0.d0, work3, 1)
    gwk1(i) = - ddot (n, work1, 1, work3, 1)
}

#   compute  - tr[T^{-m}K_{i}]:  m = 2('v') or 1('m')
for (i=1;i<=nq;i=i+1) {
    gwk2(i) = 0.d0
    if ( theta(i) <= -25.d0 )  next
    for (j=1;j<=n;j=j+1) {
        if ( vmu != 'm' ) {
            call  dcopy (n, kwk(1,j,i), 1, work1, 1)
            call  dpbsl (t, 2, n, 1, work1)
            gwk2(i) = gwk2(i) - work1(j)
        }
        else  gwk2(i) = gwk2(i) - kwk(j,j,i)
    }
}
    
if ( vmu != 'm' ) {
    #   compute  2 x^{T}T^{-1} [K_{i}T^{-2}K_{j}+T^{-1}K_{i}T^{-1}K_{j}
    #                           +K_{i}T^{-1}K_{j}T^{-1}]T^{-1}x  for 'v'
    call  dcopy (n, x, 1, work1, 1)
    call  dpbsl (t, 2, n, 1, work1)
    for (i=1;i<=nq;i=i+1) {
        if ( theta(i) <= -25.d0 )  next
        call  dgemv ('n', n, n, 1.d0, kwk(1,1,i), n, work1, 1, 0.d0, work2, 1)
        for (j=1;j<=i;j=j+1) {
            if ( theta(j) <= -25.d0 )  next
            call  dgemv ('n', n, n, 1.d0, kwk(1,1,j), n, work1, 1, 0.d0, work3, 1)
            hwk1(i,j) = 2.d0 * ddot (n, work2, 1, work3, 1)
            call  dgemv ('t', n, n, 1.d0, kwk(1,1,j), n, work1, 1, 0.d0, work3, 1)
            hwk1(i,j) = hwk1(i,j) + 2.d0 * ddot (n, work2, 1, work3, 1)
        }
        call  dgemv ('t', n, n, 1.d0, kwk(1,1,i), n, work1, 1, 0.d0, work2, 1)
        for (j=1;j<=i;j=j+1) {
            if ( theta(j) <= -25.d0 )  next
            call  dgemv ('n', n, n, 1.d0, kwk(1,1,j), n, work1, 1, 0.d0, work3, 1)
            hwk1(i,j) = hwk1(i,j) + 2.d0 * ddot (n, work2, 1, work3, 1)
        }
    }
}
else {
    #   compute  2 x^{T} T^{-1}K_{i}T^{-1}K_{j}T^{-1}x  for 'm'
    call  dcopy (n, x, 1, work1, 1)
    call  dpbsl (t, 2, n, 1, work1)
    for (i=1;i<=nq;i=i+1) {
        if ( theta(i) <= -25.d0 )  next
        call  dgemv ('n', n, n, 1.d0, kwk(1,1,i), n, work1, 1, 0.d0, work2, 1)
        for (j=1;j<=i;j=j+1) {
            if ( theta(j) <= -25.d0 )  next
            call  dgemv ('t', n, n, 1.d0, kwk(1,1,j), n, x, 1, 0.d0, work3, 1)
            hwk1(i,j) = 2.d0 * ddot (n, work2, 1, work3, 1)
        }
    }
}
#   adjust diagonal
for (i=1;i<=nq;i=i+1) {
    if ( theta(i) <= -25.d0 )  next
    hwk1(i,i) = hwk1(i,i) + gwk1(i)
}
   
#   compute  m tr[T^{-m}K_{i}T^{-1}K_{j}]:  m = 2('v') or 1('m')
for (i=1;i<=nq;i=i+1) {
    if ( theta(i) <= -25.d0 )  next
    for (m=1;m<=i;m=m+1) {
        hwk2(i,m) = 0.d0
        if ( theta(m) <= -25.d0 )  next
        for (j=1;j<=n;j=j+1) {
            if ( vmu != 'm' ) {
                call  dcopy (n, kwk(1,j,m), 1, work1, 1)
                call  dpbsl (t, 2, n, 1, work1)
                hwk2(i,m) = hwk2(i,m) + 2.d0 * ddot (n, kwk(j,1,i), n, work1, 1)
            }
            else  hwk2(i,m) = hwk2(i,m) + ddot (n, kwk(j,1,i), n, kwk(1,j,m), 1)
        }
    }
}
#   adjust diagonal
for (i=1;i<=nq;i=i+1) {
    if ( theta(i) <= -25.d0 )  next
    hwk2(i,i) = hwk2(i,i) + gwk2(i)
}
    
#   compute the gradient
if ( vmu == 'v' ) {
    trc = dfloat (nobs) * 10.d0 ** (-nlaht) * varht / score
    for (i=1;i<=nq;i=i+1) {
        if ( theta(i) <= -25.d0 )  next
        gra(i) = gwk1(i) / trc / trc - 2.d0 * score * gwk2(i) / trc / dfloat(nobs)
    }
    call  dscal (nq, dfloat (nobs), gra, 1)
}
if ( vmu == 'u' ) {
    dum = 10.d0 ** nlaht
    for (i=1;i<=nq;i=i+1) {
        if ( theta(i) <= -25.d0 )  next
        gra(i) = dum * dum * gwk1(i) - 2.d0 * varht * dum * gwk2(i)
    }
    call  dscal (nq, 1.d0/dfloat (n), gra, 1)
}
if ( vmu == 'm' ) {
    det = 10.d0 ** (-nlaht) * varht / score
    for (i=1;i<=nq;i=i+1) {
        if ( theta(i) <= -25.d0 )  next
        gra(i) = gwk1(i) / det - dfloat (nobs) / dfloat (n) * score * gwk2(i)
    }
    call  dscal (nq, 1.d0 / dfloat (nobs), gra, 1)
}

#   compute the Hessian
if ( vmu == 'v' ) {
    for (i=1;i<=nq;i=i+1) {
        if ( theta(i) <= -25.d0 )  next
        for (j=1;j<=i;j=j+1) {
            if ( theta(j) <= -25.d0 )  next
            hes(i,j) = hwk1(i,j) / trc / trc - 2.d0 * gwk1(i) * gwk2(j) / trc ** 3_
                      - 2.d0 * gwk1(j) * gwk2(i) / trc ** 3 - 2.d0 * score * hwk2(i,j)_
                      / trc / dfloat (nobs) + 6.d0 * score * gwk2(i) * gwk2(j)_
                      / trc / trc / dfloat (nobs)
        }
        call  dscal (i, dfloat (nobs), hes(i,1), ldh)
    }
}
if ( vmu == 'u' ) {
    for (i=1;i<=nq;i=i+1) {
        if ( theta(i) <= -25.d0 )  next
        for (j=1;j<=i;j=j+1) {
            if ( theta(j) <= -25.d0 )  next
            hes(i,j) = dum * dum * hwk1(i,j) - 2.d0 * varht * dum * hwk2(i,j)
        }
        call  dscal (i, 1.d0/dfloat (n), hes(i,1), ldh)
    }
}
if ( vmu == 'm' ) {
    for (i=1;i<=nq;i=i+1) {
        if ( theta(i) <= -25.d0 )  next
        for (j=1;j<=i;j=j+1) {
            if ( theta(j) <= -25.d0 )  next
            hes(i,j) = hwk1(i,j) / det - gwk1(i) * gwk2(j) / det / dfloat (n)_
                      - gwk1(j) * gwk2(i) / det / dfloat (n) - dfloat (nobs)_
                      / dfloat (n) * score * hwk2(i,j) + dfloat (nobs)_
                      / dfloat (n) ** 2 * score * gwk2(i) * gwk2(j)
        }
        call  dscal (i, 1.d0 / dfloat (nobs), hes(i,1), ldh)
    }
}

return
end

#....................................................................................
SHAR_EOF
cat << \SHAR_EOF > deval.r

#:::::::::::
#   deval
#:::::::::::

subroutine  deval (vmu, q, ldq, n, z, nint, low, upp, nlaht, score, varht,_
                   info, twk, work)

#  Purpose:  To evaluate GCV/GML function based on tridiagonal form and to
#      search minimum on an interval by equally spaced (in log10 scale) grid
#      search.

character*1       vmu
integer           ldq, n, nint, info
double precision  q(ldq,*), z(*), low, upp, nlaht, score(*), varht,_
                  twk(2,*), work(*)

#  On entry:
#      vmu        'v':  GCV criterion.
#                 'm':  GML criterion.
#                 'u':  unbiased risk estimate.
#      q          tidiagonal matrix in diagonal and super diagonal.
#      ldq        leading dimension of Q.
#      n          size of the matrix.
#      z          U^{T} F_{2}^{T} y.
#      nint       number of intervals (number of grids minus 1).
#      low        lower limit of log10(n*lambda).
#      upp        upper limit of log10(n*lambda).
#      varht      known variance if vmu=='u'.

#  On exit:
#      nlaht      the estimated log10(n*lambda).
#      score      the GCV/GML/URE score vector on grid points.
#      varht      the variance estimate at the estimated n*lambda.
#      info        0: normal termination.
#                 -1: dimension error.
#                 -2: tridiagonal form is not non-negative definite.
#                 -3: vmu or nint is out of scope.

#  Work arrays:
#      twk        array of length at least (2,n).
#      work       array of length at least (n).

#  Routines called directly:
#      Fortran -- dfloat
#      Blas    -- daxpy, dcopy
#      Rkpack  -- dtrev
#      Other   -- dset

#  Written:  Chong Gu, Statistics, Purdue, 12/29/91 latest version.

double precision  tmp, minscr, mlo, varhtwk
integer           j

info = 0

#   interchange boundaries if necessary
if ( upp < low ) {
    mlo = low
    low = upp
    upp = mlo
}

#   check job requests
if ( (vmu != 'v' & vmu != 'm' & vmu != 'u') | nint < 1 ) {
    info = -3
    return
}

#   check dimension
if ( 1 > n | n > ldq ) {
    info = -1
    return
}

#   evaluation
for (j=1;j<=nint+1;j=j+1) {
    tmp = low + dfloat (j-1) * ( upp - low ) / dfloat (nint)
    call  dset (n, 10.d0 ** (tmp), twk(2,1), 2)
    call  daxpy (n, 1.d0, q, ldq+1, twk(2,1), 2)
    call  dcopy (n-1, q(1,2), ldq+1, twk(1,2), 2)
    twk(1,1) = 10.d0**tmp
    call  dtrev (vmu, twk, 2, n, z, score(j), varht, info, work)
    if ( info != 0 ) {
        info = -2
        return
    }
    if ( score(j) <= minscr | j == 1 ) {
        minscr = score(j)
        nlaht = tmp
        varhtwk = varht
    }
}
varht = varhtwk

return
end

#...............................................................................

SHAR_EOF
cat << \SHAR_EOF > dgold.r

#:::::::::::
#   dgold
#:::::::::::

subroutine  dgold (vmu, q, ldq, n, z, low, upp, nlaht, score, varht,_
                   info, twk, work)

#  Purpose:  To evaluate GCV/GML function based on tridiagonal form and to
#      search minimum on an interval by golden section search.

character*1       vmu
integer           ldq, n, info
double precision  q(ldq,*), z(*), low, upp, nlaht, score, varht, twk(2,*),_
                  work(*)

#  On entry:
#      vmu        'v':  GCV criterion.
#                 'm':  GML criterion.
#                 'u':  unbiased risk estimate.
#      q          tidiagonal matrix in diagonal and super diagonal.
#      ldq        leading dimension of Q.
#      n          size of the matrix.
#      z          U^{T} F_{2}^{T} y.
#      low        lower limit of log10(n*lambda).
#      upp        upper limit of log10(n*lambda).
#      varht      known variance if vmu=='u'.

#  On exit:
#      nlaht      the estimated log(n*lambda).
#      score      the GCV/GML/URE score at the estimated lambda.
#      varht      the variance estimate at the estimated lambda.
#      info        0: normal termination.
#                 -1: dimension error.
#                 -2: tridiagonal form is not non-negative definite.
#                 -3: vmu is none of 'v', 'm', or 'u'.

#  Work arrays:
#      twk        of size at least (2,n).
#      work       of size at least (n).

#  Routines called directly:
#      Fortran -- dsqrt
#      Blas    -- daxpy, dcopy
#      Rkpack  -- dtrev
#      Other   -- dset

#  Written:  Chong Gu, Statistics, Purdue, latest version 12/29/91.

double precision  ratio, mlo, mup, tmpl, tmpu

ratio = ( dsqrt (5.d0) - 1.d0 ) / 2.d0

info = 0

#   interchange the boundaries if necessary
if ( upp < low ) {
    mlo = low
    low = upp
    upp = mlo
}

#   check vmu
if ( vmu != 'v' & vmu != 'm' & vmu != 'u' ) {
    info = -3
    return
}

#   check dimension
if ( n < 1 | n > ldq ) {
    info = -1
    return
}

#   initialize golden section search for scrht
mlo = upp - ratio * (upp - low)
call  dset (n, 10.d0 ** (mlo), twk(2,1), 2)
call  daxpy (n, 1.d0, q, ldq+1, twk(2,1), 2)
call  dcopy (n-1, q(1,2), ldq+1, twk(1,2), 2)
twk(1,1) = 10.d0**mlo
call  dtrev (vmu, twk, 2, n, z, tmpl, varht, info, work)
if ( info != 0 ) {
    info = -2
    return
}
mup = low + ratio * (upp - low)
call  dset (n, 10.d0 ** (mup), twk(2,1), 2)
call  daxpy (n, 1.d0, q, ldq+1, twk(2,1), 2)
call  dcopy (n-1, q(1,2), ldq+1, twk(1,2), 2)
twk(1,1) = 10.d0**mup
call  dtrev (vmu, twk, 2, n, z, tmpu, varht, info, work)
if ( info != 0 ) {
    info = -2
    return
}

#   golden section search for estimate of lambda
repeat {
    if ( mup - mlo < 1.d-7 )  break
    if ( tmpl < tmpu ) {
        upp = mup
        mup = mlo
        tmpu = tmpl
        mlo = upp - ratio * (upp - low)
        call  dset (n, 10.d0 ** (mlo), twk(2,1), 2)
        call  daxpy (n, 1.d0, q, ldq+1, twk(2,1), 2)
        call  dcopy (n-1, q(1,2), ldq+1, twk(1,2), 2)
        twk(1,1) = 10.d0**mlo
        call  dtrev (vmu, twk, 2, n, z, tmpl, varht, info, work)
        if ( info != 0 ) {
            info = -2
            return
        }
    }    
    else {
        low = mlo
        mlo = mup
        tmpl = tmpu
        mup = low + ratio * (upp - low)
        call  dset (n, 10.d0 ** (mup), twk(2,1), 2)
        call  daxpy (n, 1.d0, q, ldq+1, twk(2,1), 2)
        call  dcopy (n-1, q(1,2), ldq+1, twk(1,2), 2)
        twk(1,1) = 10.d0**mup
        call  dtrev (vmu, twk, 2, n, z, tmpu, varht, info, work)
        if ( info != 0 ) {
            info = -2
            return
        }
    }    
}

#   compute the return value
nlaht = ( mup + mlo ) / 2.d0
call  dset (n, 10.d0 ** (nlaht), twk(2,1), 2)
call  daxpy (n, 1.d0, q, ldq+1, twk(2,1), 2)
call  dcopy (n-1, q(1,2), ldq+1, twk(1,2), 2)
twk(1,1) = 10.d0**nlaht
call  dtrev (vmu, twk, 2, n, z, score, varht, info, work)
if ( info != 0 ) {
    info = -2
    return
}

return
end

#...............................................................................

SHAR_EOF
cat << \SHAR_EOF > dmcdc.r

#:::::::::::
#   dmcdc
#:::::::::::

subroutine  dmcdc (a, lda, p, e, jpvt, info)

#  Acronym:  Double precision Modified Cholesky DeComposition.

#  Purpose:  This routine implements the modified Cholesky decomposition as
#      described by Gill, Murray, and Wright (p.111, Practical Optimization,
#      Academic Press, 1981).  The parameter delta is set to the maximum of
#      1.d-7 * (average diag) and 1.d-10.  Pivoting is enforced.  The result
#      is compatible with the Linpack routine `dposl'.

integer           lda, p, jpvt(*), info
double precision  a(lda,*), e(*)

#  On entry:
#      a          a symmetric matrix in the UPPER triangle.
#      lda        the leading dimension of a.
#      p          the size of a.

#  On exit:
#      a          the Cholesky factor  R  of  P^{T}AP + E = R^{T} R, where P 
#                 is a permutation matrix.
#      e          the amount of diagonal modification, of size (p).
#      jpvt       the permutation P, jpvt(j) contains the index of diagonal 
#                 element moved to j-th position, of size (p).
#      info        0:  normal termination.
#                 -1:  dimension error.

#  Routines called directly:
#      Blas    -- dasum, ddot, dscal, dswap, idamax
#      Fortran -- dabs, dmax1, dfloat, dsqrt

#   Written:  Chong Gu, Statistics, UW-Madison, latest version 9/16/88.

double precision  beta, delta, theta, tmp, dasum, ddot
integer           i, j, jmax, jtmp, idamax

info = 0

#   check dimension
if ( lda < p | p < 1 ) {
    info = -1
    return
}

#   compute constants
tmp = 1.d0
while ( 1.d0 + tmp > 1.d0 )  tmp = tmp / 2.d0
jmax = idamax (p, a, lda+1)
beta = dmax1 (2.d0 * tmp, dabs (a(jmax,jmax)))
tmp = dsqrt (dfloat (p*p-1))
if ( tmp < 1.d0 )  tmp = 1.d0
for (j=2;j<=p;j=j+1) {
    jmax = idamax (j-1, a(1,j), 1)
    beta = dmax1 (beta, dabs (a(jmax,j)) / tmp)
}
delta = dasum (p, a, lda+1) / dfloat (p) * 1.d-7
delta = dmax1 (delta, 1.d-10)
for (j=1;j<=p;j=j+1)  jpvt(j) = j

#   compute  P^{T}AP + E = LDL^{T}
for (j=1;j<=p;j=j+1) {
    #   pivoting
    jmax = idamax (p-j+1, a(j,j), lda+1) + j - 1
    if ( jmax != j ) {
        call  dswap (j-1, a(1,j), 1, a(1,jmax), 1)
        call  dswap (jmax-j-1, a(j,j+1), lda, a(j+1,jmax), 1)
        call  dswap (p-jmax, a(j,jmax+1), lda, a(jmax,jmax+1), lda)
        tmp = a(j,j)
        a(j,j) = a(jmax,jmax)
        a(jmax,jmax) = tmp
        jtmp = jpvt(j)
        jpvt(j) = jpvt(jmax)
        jpvt(jmax) = jtmp
    }
    #   compute j-th column of L^{T}
    for (i=1;i<j;i=i+1)  a(i,j) = a(i,j) / a(i,i)
    #   update j-th row and determine the parameter theta
    for (i=j+1;i<=p;i=i+1)
        a(j,i) = a(j,i) - ddot (j-1, a(1,j), 1, a(1,i), 1)
    #   specify theta
    if ( j == p )  theta = 0.d0
    else {
        jmax = idamax (p-j, a(j,j+1), lda) + j
        theta = dabs (a(j,jmax))
    }
    #   compute diagonal d(j,j)
    tmp = dmax1 (delta, dabs (a(j,j)), theta ** 2 / beta)
    e(j) = tmp - a(j,j)
    a(j,j) = tmp
    #   update remaining diagonals
    for (i=j+1;i<=p;i=i+1)  a(i,i) = a(i,i) - a(j,i) ** 2 / a(j,j)
}

#   scale
for (j=1;j<=p;j=j+1) {
    a(j,j) = dsqrt (a(j,j))
    call  dscal (p-j, a(j,j), a(j,j+1), lda)
}

return
end

#...............................................................................
SHAR_EOF
cat << \SHAR_EOF > dmudr1.r
#:::::::::::
#   dmudr1
#:::::::::::

subroutine  dmudr1 (vmu,_
                   s, lds, nobs, nnull, q, ldqr, ldqc, nq, y,_     # inputs
                   tol, init, prec, maxite,_                       # tune para
                   theta, nlaht, score, varht, c, d,_              # outputs
                   qraux, jpvt, twk, traux, qwk, ywk, thewk,_      # work arrays
                   hes, gra, hwk1, hwk2, gwk1, gwk2, pvtwk,_
                   kwk, work1, work2,_
                   info)

#  Acronym:  Double precision MUltiple smoothing parameter DRiver.
 
#  Purpose:  This routine implements the iterative algorithm for minimizing
#      GCV/GML scores with multiple smoothing parameters described in 
#      Gu and Wahba(1988, Minimizing GCV/GML scores with multiple smoothing
#      parameters via the Newton method).

#  WARNING:  Please be sure that you understand what this routine does before 
#      you call it.  Pilot runs with small problems are recommended.  This
#      routine performs VERY INTENSIVE numerical calculations for big nobs.

integer           lds, nobs, nnull, ldqr, ldqc, nq, init, maxite,_
                  jpvt(*), pvtwk(*), info

double precision  s(lds,*), q(ldqr,ldqc,*), y(*), tol, prec,_
                  theta(*), nlaht, score, varht, c(*), d(*),_
                  qraux(*), traux(*), twk(2,*), qwk(ldqr,*), ywk(*),_
                  thewk(*), hes(nq,*), gra(*), hwk1(nq,*), hwk2(nq,*),_
                  gwk1(*), gwk2(*), kwk(nobs-nnull,nobs-nnull,*),_
                  work1(*), work2(*)

character*1       vmu

#  On entry:
#      vmu        'v':  GCV criterion.
#                 'm':  GML criterion.
#                 'u':  unbiased risk estimate.
#      s          the matrix S, of size (lds,nnull).
#      nobs       the number of observations.
#      nnull      the dimension of the null space.
#      q          the matrices Q_{i}'s, of dimension (ldqr,ldqc,nq).
#      nq         the number of Q_{i}'s.
#      y          the response vector of size (nobs)
#      tol        the tolerance for truncation in the tridiagonalization.
#      init       0 :  No initial values provided for the theta.
#                 1 :  Initial values provided for the theta.
#      theta      initial values of theta if init = 1.
#      prec       precision requested for the minimum score value.
#      maxite     maximum number of iterations allowed.
#      varht      known variance if vmu=='u'.
 
#  On exit:
#      theta      the vector of parameter log10(theta) used in the final model,
#                 of dimension (nq).  -25 indicates effective minus infinity.
#      nlaht      the estimated  log10(n*lambda)|theta  in the final model.
#      score      the minimum GCV/GML/URE score found at (theta, nlaht).
#      varht      the variance estimate.
#      c,d        the coefficients estimates.
#      info        0 :  normal termination.
#                 -1 :  dimension error.
#                 -2 :  F_{2}^{T} Q_{*}^{theta} F_{2} !>= 0.
#                 -3 :  tuning parameters are out of scope.
#                 -4 :  fails to converge within maxite steps.
#                 -5 :  fails to find a reasonable descent direction.
#                 >0 :  the matrix S is rank deficient: rank(S)+1.

#  Work arrays:
#      qraux      of size at least (nnull).
#      jpvt       of size at least (nnull).
#      twk        of size at least (2,nobs-nnull).
#      traux      of size at least (nobs-nnull-2).
#      qwk        of size at least (nobs,nobs).
#      ywk        of size at least (nobs).
#      thewk      of size at least (nq).
#      hes        of size at least (nq,nq).
#      gra        of size at least (nq).
#      hwk1-2     of sizes at least (nq,nq).
#      gwk1-2     of sizes at least (nq).
#      pvtwk      of size at least (nq).
#      kwk        of size at least (nobs-nnull,nobs-nnull,nq).
#      work1-2    of sizes at least (nobs).

#  Routines called directly:
#      Blas    -- dasum, daxpy, dcopy, ddot, dscal, idamax
#      Blas2   -- dsymv
#      Fortran -- dfloat, dlog, dlog10
#      Linpack -- dpofa, dposl, sqrsl
#      Rkpack  -- dcoef, dcore, ddeev, dmcdc, dstup
#      Other   -- dprmut, dset

#  Routines called indirectly:
#      Blas    -- dasum, daxpy, dcopy, ddot, dnrm2, dscal, dswap, idamax
#      Blas2   -- dgemv, dsymv, dsyr2
#      Fortran -- dabs, dexp, dfloat, dlog, dlog10, dsqrt
#      Linpack -- dpbfa, dpbsl, dqrdc, dqrsl, dtrsl
#      Rkpack  -- deval, dgold, dqrslm, dsytr, dtrev
#      Other   -- dprmut, dset

#  Written:  Chong Gu, Statistics, Purdue, latest version 1/6/92.

double precision  alph, scrold, scrwk, nlawk, limnla(2),_
                  tmp, dasum, ddot
integer           n, n0, i, j, iwk, maxitwk, idamax, job

info = 0

#   set working parameters
n0 = nnull
n = nobs - nnull
maxitwk = maxite

#   check tuning parameters
if ( (vmu != 'v' & vmu != 'm' & vmu != 'u') | (init != 0 & init != 1) |_
     (maxitwk <=0) | (prec <= 0.d0) ) {
    info = -3
    return
}

#   check dimension
if ( lds < nobs | nobs <= n0 | n0 < 1 | ldqr < nobs | ldqc < nobs |_
     nq <= 0 ) {
    info = -1
    return
}

#   initialize
call  dstup (s, lds, nobs, n0, qraux, jpvt, y, q, ldqr, ldqc, nq, info,_
             work1)
if ( info != 0 )  return
if ( init == 1 )  call  dcopy (nq, theta, 1, thewk, 1)
else {
#   use the "plug-in" weights as the starting theta
    for (i=1;i<=nq;i=i+1) {
        thewk(i) = dasum (n, q(n0+1,n0+1,i), ldqr+1)
        if ( thewk(i) > 0.d0 )  thewk(i) = 1.d0 / thewk(i)
    }
    #   fit an initial model
    for (j=1;j<=nobs;j=j+1)  call  dset (nobs-j+1, 0.d0, qwk(j,j), 1)
    for (i=1;i<=nq;i=i+1) {
        for (j=1;j<=nobs;j=j+1)
            call  daxpy (nobs-j+1, thewk(i), q(j,j,i), 1, qwk(j,j), 1)
    }
    call  dcopy (nobs, y, 1, ywk, 1)
    call  dcore (vmu, qwk, ldqr, nobs, n0, tol, ywk, 0, limnla, nlawk,_
                 scrwk, varht, info, twk, work1)
    if (info != 0 )  return
    call  dcoef (s, lds, nobs, n0, qraux, jpvt, ywk, qwk, ldqr, nlawk,_
                 c, d, info, twk)
    #   assign weights due to norm  \theta^{2}c^{T}(Q_{i})c
    call  dqrsl (s, lds, nobs, n0, qraux, c, tmp, c, tmp, tmp, tmp,_
                 01000, info)
    for (i=1;i<=nq;i=i+1) {
        call  dsymv('l', n, thewk(i), q(n0+1,n0+1,i), ldqr, c(n0+1), 1,_
                    0.d0, work1, 1)
        thewk(i) = ddot (n, c(n0+1), 1, work1, 1) * thewk(i)
        if ( thewk(i) > 0.d0 )  thewk(i) = dlog10 (thewk(i))
        else  thewk(i) = -25.d0
    }
}
scrold = 1.d10

#   main process

job = 0
repeat {
    #   nq == 1
    if ( nq == 1 ) {
        theta(1) = 0.d0
        break
    }
    #   form  Qwk = \sum_{i=1}^{nq} \thewk_{i} Q_{i}
    for (j=1;j<=nobs;j=j+1)  call  dset (nobs-j+1, 0.d0, qwk(j,j), 1)
    for (i=1;i<=nq;i=i+1) {
        if ( thewk(i) <= -25.d0 )  next
        for (j=1;j<=nobs;j=j+1)
            call  daxpy (nobs-j+1, 10.d0 ** thewk(i), q(j,j,i), 1,_
                         qwk(j,j), 1)
    }
    #   main calculation
    call  dcopy (nobs, y, 1, ywk, 1)
    call  dcore (vmu, qwk, ldqr, nobs, n0, tol, ywk, job, limnla, nlawk,_
                 scrwk, varht, info, twk, work1)
    if (info != 0 )  return

    #   half the increment if no gain
    if ( scrold < scrwk ) {
        #   algorithm halts
        tmp = dabs (gwk1(idamax (nq, gwk1, 1)))
        if ( alph * tmp > - prec ) {
            info = -5
            return
        }
        alph = alph / 2.d0
        for (i=1;i<=nq;i=i+1)  thewk(i) = theta(i) + alph * gwk1(i)
        next
    }
    #   count for one iteration
    maxitwk = maxitwk - 1

    #   compute the gradient and the Hessian
    call  dcopy (n-2, qwk(n0+2,n0+1), ldqr+1, traux, 1)
    call  dcopy (n, qwk(n0+1,n0+1), ldqr+1, twk(2,1), 2)
    call  dcopy (n-1, qwk(n0+1,n0+2), ldqr+1, twk(1,2), 2)
    call  ddeev (vmu, nobs,_
                 q(n0+1,n0+1,1), ldqr, ldqc, n, nq, qwk(n0+2,n0+1),_
                 ldqr, traux, twk, ywk(n0+1),_                 
                 thewk, nlawk, scrwk, varht,_            # inputs
                 hes, nq, gra,_                          # outputs
                 hwk1, hwk2, gwk1, gwk2,_
                 kwk, n, work1, work2, c,_
                 info)

    #   get the active subset
    iwk = 0
    for (i=1;i<=nq;i=i+1) {
        if ( thewk(i) <= -25.d0 )  next
        iwk = iwk + 1
        call  dcopy (nq, hes(1,i), 1, hes(1,iwk), 1)
    }
    iwk = 0
    for (i=1;i<=nq;i=i+1) {
        if ( thewk(i) <= -25.d0 )  next
        iwk = iwk + 1
        call  dcopy (nq, hes(i,1), nq, hes(iwk,1), nq)
        gwk1(iwk) = gra(i)
        work2(iwk) = gra(i)
    }

    #   compute the Newton direction
    for (i=1;i<iwk;i=i+1)  
        call  dcopy (iwk-i, hes(i+1,i), 1, hes(i,i+1), nq)
    call  dmcdc (hes, nq, iwk, gwk2, pvtwk, info)
    call  dprmut (gwk1, iwk, pvtwk, 0)
    call  dposl (hes, nq, iwk, gwk1)
    call  dprmut (gwk1, iwk, pvtwk, 1)

    #   specify the stepsize
    alph = -1.d0

    #   set the update direction in the original index
    j = iwk
    for (i=nq;i>=1;i=i-1) {
        if ( thewk(i) <= -25.0 )  gwk1(i) = 0.d0
        else {
            gwk1(i) = gwk1(iwk)
            iwk = iwk - 1     
        }
    }
    call  dscal (nq, 1.d0/dlog(1.d1), gwk1, 1)
    tmp = dabs (gwk1(idamax (nq, gwk1, 1)))
    if ( tmp > 1.d0 )  call  dscal (nq, 1.d0/tmp, gwk1, 1)

    #   set thewk such that  nlawk = 0.d0
    for (i=1;i<=nq;i=i+1) {
        if ( thewk(i) <= -25.d0 )  next
        thewk(i) = thewk(i) - nlawk
    }
    call  dcopy (nq, thewk, 1, theta, 1)

    #   check convergence
    tmp = gra(idamax (nq, gra, 1)) ** 2
    if ( tmp < prec ** 2_                          #  zero gradient
        | scrold - scrwk < prec * (scrwk + 1.d0)_  #  small change
        & tmp < prec * (scrwk + 1.d0) ** 2 ) {     #  small gradient
        break
    }

    #   fail to converge
    if ( maxitwk < 1 ) {
        info = -4
        return
    }

    #   update records
    scrold = scrwk

    #   increment thewk
    for (i=1;i<=nq;i=i+1)  thewk(i) = thewk(i) + alph * gwk1(i)

    job = -1
    limnla(1) = -1.d0
    limnla(2) = 1.d0
}   #   the end of the main loop

#   compute the model to be returned
for (j=1;j<=nobs;j=j+1)  call  dset (nobs-j+1, 0.d0, qwk(j,j), 1)
for (i=1;i<=nq;i=i+1) {
    if ( theta(i) <= -25.d0 )  next
    for (j=1;j<=nobs;j=j+1)
        call  daxpy (nobs-j+1, 10.d0 ** theta(i), q(j,j,i), 1,_
                     qwk(j,j), 1)
}

call  dcopy (nobs, y, 1, ywk, 1)
call  dcore (vmu, qwk, ldqr, nobs, n0, tol, ywk, job, limnla, nlaht,_
             score, varht, info, twk, work1)
if (info != 0 )  return
call  dcoef (s, lds, nobs, n0, qraux, jpvt, ywk, qwk, ldqr, nlaht,_
             c, d, info, twk)

return
end

#....................................................................................
SHAR_EOF
cat << \SHAR_EOF > dsidr.r

#::::::::::::
#   dsidr
#::::::::::::

subroutine  dsidr (vmu,_
                   s, lds, nobs, nnull, y, q, ldq,_       # data
                   tol, job, limnla,_                     # job requests
                   nlaht, score, varht, c, d,_            # output
                   qraux, jpvt, wk,_                      # work arrays
                   info)                                  # error message

#  Acronym:  Double precision SIngle smoothing parameter DRiver.
 
#  Purpose:  
#   
#      This routine is the double precision single smoothing parameter
#      driver of the RKPACK -- a minipackage for solving the equations
#      
#              ( n lambda I + Sigma ) c + S d  =  y
#                                        S' c  =  0  ,
#
#      where Sigma is n-by-n and S is n-by-M, and lambda is the so-called
#      smoothing parameter chosen to minimize the GCV criterion
#
#                            (1/n) || ( I - A(lambda) ) y || ** 2
#            V(lambda)  =   --------------------------------------   ,
#                            [ (1/n) tr ( I - A(lambda) ) ] ** 2
#
#      where A(lambda), satisfying
#
#                      A(lambda) y  =  Sigma c + S d  ,
#
#      is the so-called influence matrix, OR to minimize the GML criterion
#            
#                               (1/n) y' ( I - A(lambda) ) y
#            M(lambda)  =   ------------------------------------   ,
#                            det [ (I - A(lambda))+ ]^{1/(n-M)}
#
#      where det[(...)+] is the product of nonzero eigenvalues of (...).
#
#      The general theory behind this is described in Kimeldorf and Wahba
#      (1971), which seeks the minimizer of certain variational problem in 
#      reproducing kernel hilbert space.  The generalized cross validation
#      (GCV) method for choosing the smoothing parameter lambda is propos-
#      ed by Craven and Wahba (1979).  The GML criterion is described and
#      compared with the GCV by Wahba (1985).  An example of this general
#      scheme is the thin plate smoothing spline model, as described by
#      Wahba and Wendelberger (1980), and Bates et al. (1987).
#
#      RKPACK is the implementation of the GCV/GML algorithm based on the
#      Householder tridiagonalization, as proposed by Gu et al. (1988). 
#      It does not assume any structure of Sigma and S, except that S is
#      of full rank, Sigma is symmetric, and
#
#                  S' c  =  0   ===>   c' Sigma c  >=  0            (*)
#
#      The Sigma matrix is the reproducing kernel (or semi-kernel) evalu-
#      ated at the data points, and the matrix S is a set of null space 
#      basis evaluated at the data points.
#
#      Dsidr will do either golden-section search or regular grid search
#      for the minimizing lambda of V/M(lambda).  In the goden-section 
#      search case, it does assume bowl-shaped V/M(lambda) curve.  If this 
#      is not true, the user may specify shorter searching intervals on
#      which the curve may be bowl-shaped.  The precision of n*lambda is
#      1.d-7 in the log10 scale.  In the regular grid search case, it 
#      provides a "GCV/GML curve" on the searching interval.  (For the
#      later case user should provide `score' as a vector, though in the
#      golden section search case only minimum GCV/GML value is recorded.)
#
#      RKPACK is a cubic order package.  In fitting univariate smoothing
#      spline models, a linear order algorithm developed independently
#      by Hutchinson and deHoog (1985) and by O'Sullivan (1985) is recommended.
#      Code by Woltring (1986) and O'Sullivan is available from NETLIB.


character*1       vmu
integer           lds, nobs, nnull, ldq, job, jpvt(*), info
double precision  s(lds,*), y(*), q(ldq,*), tol, limnla(2), nlaht, score(*),_
                  varht, c(*), d(*), qraux(*), wk(*)


#  On entry:
#      vmu        'v':  GCV criterion.
#                 'm':  GML criterion.
#                 'u':  unbiased risk estimate.
#      s          the matrix S of size (nobs,nnull).
#      lds        the leading dimension of s.
#      nobs       the number of observations.
#      nnull      the dimension of the null space.
#      y          the observations.
#      q          the matrix Q, only the lower triangle referred.
#      tol        tolerance for truncation in `dsytr'.  If 0.d0, set to 
#                 square of machine precision.
#      job        <=0 : golden-section search
#                     0 --  searching interval specified automatically.
#                    -1 --  search on (limnla(1), limnla(2)).
#                  >0 : regular grid search on [limnla(1), limnla(2)]
#                     #(grids) = job + 1.
#      limnla     the searching interval (in log10 scale), see job.
#      varht      known variance if vmu=='u'.

#  On exit:
#      nlaht      the GCV/GML/URE estimate of log10(nobs*lambda).
#      limnla     searching range for nlaht.
#      score      job <= 0 :  GCV/GML/URE value at nlaht.
#                 job >  0 :  GCV/GML/URE vector on the regular grid points.
#      varht      the variance estimate.
#      c          the parameters c.
#      d          the parameters d.
#      s,qraux,jpvt
#                 QR decomposition of S=FR, as from Linpack `dqrdc'.
#      q          first nnull columns: F^{T} Q F_{1}.
#                 BOTTOM-RIGHT corner: tridiagonalization of 
#                                      F_{2}^{T} Q F_{2}.
#      info        0: normal termination.
#                 -1: dimension error.
#                 -2: F_{2}^{T} Q F_{2} !>= 0.
#                 -3: vmu is out of scope.
#                 >0: the matrix S is rank deficient: rank(S)+1.
#      others     intact.

#  Work arrays:
#      wk         of size at least (3*nobs).

#  Routines called directly:
#      Rkpack  -- dcoef, dcore, dstup

#  Routines called indirectly:
#      Fortran -- dexp, dfloat, dlog, dlog10, dsqrt
#      Blas    -- dasum, daxpy, dcopy, ddot, dnrm2, dscal
#      Blas2   -- dsymv, dsyr2
#      Linpack -- dpbfa, dpbsl, dqrdc, dqrsl, dtrsl
#      Rkpack  -- deval, dgold, dqrslm, dsytr, dtrev
#      Other   -- dprmut, dset

#  Written:  Chong Gu, Statistics, Purdue, latest version 12/29/91.


info = 0

#   check dimension
if ( nnull < 1 | nnull >= nobs | nobs > lds | nobs > ldq ) {
    info = -1
    return
}

#   check vmu
if ( vmu != 'v' & vmu != 'm' & vmu != 'u' ) {
    info = -3
    return
}

#   main process

call  dstup (s, lds, nobs, nnull, qraux, jpvt, y, q, ldq, nobs, 1, info,_
             wk)
if ( info != 0 )  return

call  dcore (vmu, q, ldq, nobs, nnull, tol, y, job, limnla, nlaht, score,_
             varht, info, wk, wk(2*nobs+1))
if ( info != 0 )  return

call  dcoef (s, lds, nobs, nnull, qraux, jpvt, y, q, ldq, nlaht, c, d,_
             info, wk)

return
end

#...............................................................................
SHAR_EOF
cat << \SHAR_EOF > dqrslm.r

#::::::::::::
#   dqrslm
#::::::::::::

subroutine  dqrslm (x, ldx, n, k, qraux, a, lda, job, info, work)

#  Acronym:  `dqrsl' Matrix version

#  Purpose:  This routine generates the matrix Q^{T}AQ or QAQ^{T}, where 
#      Q is the products of Householder matrix stored in factored form in 
#      the LOWER triangle of `x' and `qraux', and A is assumed to be 
#      symmetric.  This routine is designed to be compatible with LINPACK's 
#      `dqrdc' subroutine.
 
#  References:  1. Dongarra et al. (1979) LINPACK Users' Guide. (chap. 9)
#               2. Golud and Van Loan (1983) Matrix Computation. (pp.276-7)
 
integer           ldx, n, k, lda, job, info
double precision  x(ldx,*), qraux(*), a(lda,*), work(*)

#  On entry:
#      x          output from `dqrdc', of size (ldx,k).
#      ldx        leading dimension of x.
#      n          size of matrix A and Q.
#      k          number of factors in Q.
#      qraux      output from `dqrdc'.
#      a          matrix A (of size (lda,n)), only LOWER triangle refered.
#      lda        leading dimension of a.
#      job        0:  Q^{T} A Q.
#                 1:  Q A Q^{T}.
 
#  On Exit:
#      a          matrix Q^{T}AQ or QAQ^{T} in LOWER triangle.
#      info        0:  normal termination.
#                  1:  `job' is out of scope.
#                 -1:  dimension error.
#      others     unchanged.

#  Work array:
#      work       of size at least (n).

#  Routines called:
#      Blas    -- ddot, daxpy
#      Blas2   -- dsymv, dsyr2

#  Written:  Chong Gu, Statistics, UW-Madison, latest version 8/29/88.

double precision  tmp, alph, ddot
integer           i, j, step

info = 0

#   check input
if ( lda < n | n < k | k < 1 ) {
    info = -1
    return
}
if ( job != 0 & job != 1 ) {
    info = 1
    return
}

#   set operation sequence
if ( job == 0 ) {
    j = 1
    step = 1
}
else {
    j = k
    step = -1
}

#   main process
while ( j >= 1 & j <= k ) {
    if ( qraux(j) == 0.0d0 ) {
        j = j + step
        next
    }

    tmp = x(j,j)
    x(j,j) = qraux(j)

    #   update the columns 1 thru j-1
    for (i=1;i<j;i=i+1) {
        alph = - ddot (n-j+1, x(j,j), 1, a(j,i), 1) / x(j,j)
        call  daxpy (n-j+1, alph, x(j,j), 1, a(j,i), 1)
    }

    #   update the submatrix at bottom-right corner

    #   compute  p = D v / v_{1} 
    alph = 1.d0 / x(j,j)
    call  dsymv ('l', n-j+1, alph, a(j,j), lda, x(j,j), 1, 0.d0, work(j), 1)

    #   compute  w = p - ( p^{T} v / 2 v_{1} ) v 
    alph = - ddot (n-j+1, work(j), 1, x(j,j), 1) / 2.d0 / x(j,j)
    call  daxpy (n-j+1, alph, x(j,j), 1, work(j), 1)

    #   compute  D = D - v w^{T} - w v^{T} 
    call  dsyr2 ('l', n-j+1, -1.d0, x(j,j), 1, work(j), 1, a(j,j), lda)

    x(j,j) = tmp
    j = j + step
}

return
end

#...............................................................................

SHAR_EOF
cat << \SHAR_EOF > dsms.r
 
#:::::::::::
#   dsms
#:::::::::::

subroutine  dsms (s, lds, nobs, nnull, jpvt, q, ldq, nlaht,_
                  sms, ldsms, wk, info)

#  Purpose:  To compute the auxiliary quantity sms for posterior covariance

#  Usage:  Use s, qraux, jpvt, q, and nlaht returned by dsidr.

integer           lds, nobs, nnull, jpvt(*), ldq, ldsms, info
double precision  s(lds,*), q(ldq,*), nlaht, sms(ldsms,*), wk(2,*)

#  On entry:
#      s,jpvt     QR-decomposition of  S = F R.
#      nobs       number of observations.
#      nnull      dimension of null space.
#      q          U^{T} F_{2}^{T} Q F_{2} U in BOTTOM-RIGHT corner's
#                     LOWER triangle and SUPER DIAGONAL;
#                 F_{2}^{T} Q F_{1} in BOTTOM-LEFT corner;
#                 F_{1}^{T} Q F_{1} in UPPER-LEFT corner's LOWER triangle.
#      ldq        leading dimension of q.
#      nlaht      estimated log10(n*lambda).

#  On exit:
#      sms        (S^{T}M^{-1}S)^{-1}.
#      info        0: normal termination.
#                 >0: S is not of full rank: rank(S)+1 .
#                 -1: dimension error.
#                 -2: F_{2}^{T} Q F_{2} is not non-negative definite.
#      inputs     intact but UPPER-RIGHT corner of q was used as work array.

#  Work array:
#      wk         of size at least (2,nobs-nnull).

#  Routines called directly:
#      Blas    -- daxpy, dcopy, ddot
#      Linpack -- dpbfa, dpbsl, dqrsl, dtrsl
#      Other   -- dprmut, dset

#  Written:  Chong Gu, Statistics, Purdue, latest version 4/17/92.

double precision  dum, ddot
integer           i, j, n, n0

info = 0

#   check dimension
if ( nnull < 1 | nnull >= nobs | nobs > lds | nobs > ldq | ldsms < nnull ) {
    info = -1
    return
}

#   set working parameters
n0 = nnull
n = nobs - nnull

#   compute  sms

#   U^{T} (F_{2}^{T} Q F_{1})
call  dcopy (n-2, q(n0+2,n0+1), ldq+1, wk, 1)
for (j=1;j<=n0;j=j+1) {
    call  dcopy (n, q(n0+1,j), 1, q(j,n0+1), ldq)
    call  dqrsl (q(n0+2,n0+1), ldq, n-1, n-2, wk, q(n0+2,j), dum,
                 q(n0+2,j), dum, dum, dum, 01000, info)
}
#   U^{T} (F_{2}^{T}QF_{2} + n lambda I)^{-1} (F_{2}^{T}QF_{1})
call  dset (n, 10.d0 ** nlaht, wk(2,1), 2)
call  daxpy (n, 1.d0, q(n0+1,n0+1), ldq+1, wk(2,1), 2)
call  dcopy (n-1, q(n0+1,n0+2), ldq+1, wk(1,2), 2)
call  dpbfa (wk, 2, n, 1, info)
if ( info != 0 ) {
    info = -2
    return
}
for (j=1;j<=n0;j=j+1)  call  dpbsl (wk, 2, n, 1, q(n0+1,j))
#   (F_{2}^{T}QF_{2} + n lambda I)^{-1} (F_{2}^{T}QF_{1})
call  dcopy (n-2, q(n0+2,n0+1), ldq+1, wk, 1)
for (j=1;j<=n0;j=j+1) {
    call  dqrsl (q(n0+2,n0+1), ldq, n-1, n-2, wk, q(n0+2,j), q(n0+2,j),_
                 dum, dum, dum, dum, 10000, info)
}
#   (F_{1}^{T}QF_{1} + n lambda I) -
#   (F_{1}^{T}QF_{2}^{T}) (F_{2}^{T}QF_{2} + n lambda I)^{-1} (F_{2}^{T}QF_{1})
for (i=1;i<=n0;i=i+1) {
    for (j=1;j<i;j=j+1)  sms(i,j) = sms(j,i)
    for (j=i;j<=n0;j=j+1)
        sms(i,j) = q(j,i) - ddot (n, q(n0+1,j), 1, q(i,n0+1), ldq)
    sms(i,i) = sms(i,i) + 10.d0**nlaht
}
#   R^{-1} ... R^{-T} and permutation
for (j=1;j<=n0;j=j+1)  call  dtrsl (s, lds, n0, sms(1,j), 01, info)
for (i=1;i<=n0;i=i+1) {
    call  dcopy (n0, sms(i,1), ldsms, wk, 1)
    call  dtrsl (s, lds, n0, wk, 01, info)
    call  dprmut (wk, n0, jpvt, 1)
    call  dcopy (n0, wk, 1, sms(i,1), ldsms)
}
for (j=1;j<=n0;j=j+1)  call  dprmut (sms(1,j), n0, jpvt, 1)

#   restore  F_{2}^{T} Q F_{1} to the BOTTOM-LEFT corner of q
for (j=1;j<=n0;j=j+1)  call  dcopy (n, q(j,n0+1), ldq, q(n0+1,j), 1)

return
end

#..............................................................................
SHAR_EOF
cat << \SHAR_EOF > README

This directory collects RKPACK routines in RATFOR and FORTRAN.  The
RATFOR routines are self-documented and the FORTRAN routines were
translated from the corresponding RATFOR routines using `ratfor' under
standard UNIX system.  The user interface is via two drivers DSIDR,
DMUDR, and two utility routines DCRDR and DSMS, and is illustrated in
a few simulation programs in ../demo/.  The routines are based on a
set of public domain routines from BLAS, BLAS2, and LINPACK collected
in ../lib/.  Run `make' under standard UNIX system to compile and
archive the *.o files in rkpk.a.

Chong Gu
April 18, 1992
SHAR_EOF
cat << \SHAR_EOF > dstup.r

#:::::::::::
#   dstup
#:::::::::::

subroutine  dstup (s, lds, nobs, nnull, qraux, jpvt, y, q, ldqr, ldqc, nq,_
                   info, work)

#  Purpose:  To perform QR decomposition of S=FR and to form F^{T}y, F^{T}QF's.

integer           lds, nobs, nnull, jpvt(*), ldqr, ldqc, nq, info
double precision  s(lds,*), y(*), qraux(*), q(ldqr,ldqc,*), work(*) 

#  On entry:
#      s          the S matrix spanning null space, of size (lds,nnull).
#      lds        leading dimension of s.
#      nobs       number of observations.
#      nnull      dimension of null space.
#      y          observations, of size (nobs).
#      q          the reproducing kernels, of size (ldqr,ldqc,nq).
#      ldqr       leading dimension for rows of q.
#      ldqc       leading dimension for columns of q.
#      nq         number of Q's.

#  On exit:
#      s,qraux,jpvt
#                 QR decomposition of S=FR.
#      y          F^{T} y.
#      q          F^{T}QF's.
#      info        0: normal termination.
#                 -1: dimension error.
#                 >0: rank(S)+1.

#  Work array:
#      work       of size at least (nobs).

#  Routines called directly:
#      Linpack -- dqrdc, dqrsl
#      Rkpack  -- dqrslm

#  Written:  Chong Gu, Statistics, Purdue, latest version 3/7/91.

double precision  dum
integer           j

info = 0

#   check dimension
if ( nobs < 1 | nobs > lds | nobs > ldqr | nobs > ldqc ) {
    info = -1
    return
}

#   QR decomposition of S=FR
    #   The indented line below is added on Mar 7, 1991, 
    #   with credit to Dick Franke
    for (j=1;j<=nnull;j=j+1)  jpvt(j) = 0
call  dqrdc (s, lds, nobs, nnull, qraux, jpvt, work, 1)

#   F^{T} y;  test rank of R
call  dqrsl (s, lds, nobs, nnull, qraux, y, dum, y, work, dum, dum, 01100,_
             info)
if ( info != 0 )  return

#   F^{T} Q_{*} F
for (j=1;j<=nq;j=j+1) {
    call  dqrslm (s, lds, nobs, nnull, qraux, q(1,1,j), ldqr, 0, info,_
                  work)
}

return
end

#...............................................................................

SHAR_EOF
cat << \SHAR_EOF > dsytr.r

#:::::::::::
#   dsytr
#:::::::::::

subroutine  dsytr (x, ldx, n, tol, info, work)

#  Acronym:  Double-precision SYmmetric matrix TRidiagonalization.

#  Purpose:  This routine performs the Householder tridiagonalization
#      algorithm on symmetric matrix `x', with truncation strategy as 
#      described in Gu, Bates, Chen, and Wahba (1988).
 
#  References:  1. Golud and Van Loan (1983) Matrix Computation. (pp.276-7)
#               2. Gu, Bates, Chen, and Wahba(1988), TR#823, Stat, UW-M.
#               3. Dongarra et al.(1979) LINPACK User's Guide. (Chap. 9)

#  Relation with LINPACK:  This routine computes the tridiagonalization
#      U^{T}XU=T, where X is symmetric, T is tridiagonal, and U is an 
#      orthogonal matrix as the product of Housholder matrices.  To compute 
#      U^{T}y or Uy for vector y, we can use routine `dqrsl' of LINPACK.  
#      The calling procedure is:
#
#        1. Create vector `qraux' by  
#             call  dcopy(n-2, x(2,1), ldx+1, qraux, 1)
#        2. Call `dqrsl' as
#             call  dqrsl (x(2,1), ldx, n-1, n-2, qraux, y(2), ... )
 
integer           ldx, n, info
double precision  x(ldx,*), tol, work(*)

#  On entry:
#      x          symmetric matrix, only LOWER triangle refered.
#      ldx        leading dimension of x.
#      n          size of matrix `x'.
#      tol        truncation tolarence; if zero, set to square machine
#                 precision.
 
#  On Exit:
#      x          diagonal:  diagonal elements of tridiag. transf.
#                 upper triangle:  off-diagonal of tridiag. transf.
#                 lower triangle:  overwritten by Householder factors.
#      info        0 :  normal termination.
#                 -1 :  dimension error.

#  Work array:
#      work       of size at least (n).

#  Routines called directly:
#      Fortran -- dfloat, dsqrt
#      Blas    -- daxpy, ddot, dscal
#      Blas2   -- dsymv, dsyr2

#  Written:  Chong Gu, Statistics, UW-Madison, latest version 8/29/88.

double precision  nrmtot, nrmxj, alph, toltot, tolcum, toluni, dn, ddot
integer           j

info = 0

#   check dimension
if ( ldx < n | n <= 2 ) {
    info = -1
    return
}

#   total Frobenius norm
nrmtot = ddot (n, x, ldx+1, x, ldx+1)
for ( j=1 ; j<n ; j=j+1 )  
    nrmtot = nrmtot + 2.d0 * ddot (n-j, x(j+1,j), 1, x(j+1,j), 1)

#   compute machine precision
toltot = 1.d0
while ( 1.d0 + toltot > 1.d0 )  toltot = toltot / 2.d0
toltot = 4.d0 * toltot ** 2

#   set truncation criterion
if ( toltot < tol )  toltot = tol
toltot = toltot * nrmtot
dn = dfloat (n)
toluni = toltot * 6.d0 / dn / ( dn - 1.d0 ) / ( 2.d0 * dn - 1.d0 )

#   initialization
tolcum = 0.d0

#   main process

for ( j=1 ; j<n-1 ; j=j+1 ) {
    #   deduct the F-norm of new diagonal element to update the remainder
    nrmtot = nrmtot - x(j,j) * x(j,j)

    #   compute norm of `b'
    nrmxj = ddot (n-j, x(j+1,j), 1, x(j+1,j), 1)

    #   cumulate the tolarence
    dn = dfloat (n-j)
    tolcum = tolcum + toluni * dn * dn

    #   set diagonal separation if truncation applicable
    if ( 2.d0 * nrmxj <= tolcum ) {     
        x(j,j+1) = 0.d0
        call  dscal (n-j, 0.d0, x(j+1,j), 1)
        #   deduct the norm truncated from the tolerance
        tolcum = tolcum - 2.d0 * nrmxj 
        toltot = toltot - 2.d0 * nrmxj
        next
    }

    #   Householder transform
    if ( x(j+1,j) < 0.d0 )  x(j,j+1) = dsqrt (nrmxj)
    else  x(j,j+1) = - dsqrt (nrmxj)
    nrmtot = nrmtot - 2.d0 * nrmxj

    #   b = sign(b_{1}) b / nrm(b) 
    call  dscal (n-j, -1.d0/x(j,j+1), x(j+1,j), 1)

    #   v = b + e_{1} 
    x(j+1,j) = 1.d0 + x(j+1,j)

    #   p = D v / v_{1} 
    alph = 1.d0 / x(j+1,j)
    call  dsymv ('l', n-j, alph, x(j+1,j+1), ldx, x(j+1,j), 1,_
                 0.d0, work(j+1), 1)

    #   w = p - (p^{T}v) v / (2 v_{1}) 
    alph = - ddot (n-j, work(j+1), 1, x(j+1,j), 1) / 2.d0 / x(j+1,j)
    call  daxpy (n-j, alph, x(j+1,j), 1, work(j+1), 1)

    #   D = D - v w^{T} - w v^{T} 
    call  dsyr2 ('l', n-j, -1.d0, x(j+1,j), 1, work(j+1), 1, x(j+1,j+1),_
                 ldx)

}

x(n-1,n) = x(n,n-1)

return
end

#...............................................................................

SHAR_EOF
cat << \SHAR_EOF > dtrev.r

#:::::::::::
#   dtrev
#:::::::::::

subroutine  dtrev (vmu, t, ldt, n, z, score, varht, info, work)

#  Acronym:  Double-precision TRidiagonal EValuation.

#  Purpose:  To compute the GCV/GML function and the related variance
#      estimate from the tridiagonal matrix `t' and data vector `z'.

#  References:  1. Gu, Bates, Chen, and Wahba(1988), TR#823, Stat, UW-M.
#               2. Dongarra et al. (1979) LINPACK User's Guide. (Chap. 4)

character*1       vmu
integer           n, info
double precision  t(ldt,*), z(*), score, varht, work(*)

#  On entry:
#      vmu        'v':  GCV.
#                 'm':  GML.
#                 'u':  unbiased risk estimate.
#      t          the positive definite tridiagonal matrix  T,
#                 stored in packed form:
#                     t(1,2:n):  off-diagonal
#                     t(2,1:n):  diagonal.
#      ldt        leading dimension of t.
#      n          the dimension of the matrix.
#      z          the appropriately transformed data vector.
#      varht      known variance if vmu=='u'.

#  On exit:
#      score      the GCV/GML/URE score.
#      varht      \hat\sigma^{2}.
#      info         -3:  vmu is none of 'v', 'm', or 'u'.
#                 > -3:  as from LINPACK's `dpbfa'.

#  Work array:
#      work       of size at least (n).

#  Routines called directly:
#      Fortran -- dexp, dfloat, dlog
#      Blas    -- dasum, dcopy, ddot, dscal
#      Linpack -- dpbfa, dpbsl

#  Written:  Chong Gu, Statistics, UW-Madison, latest version 12/29/91.

double precision  nume, deno, tmp, alph, la, dasum, ddot
integer           j

info = 0

#   check vmu
if ( vmu != 'v' & vmu != 'm' & vmu != 'u' ) {
    info = -3
    return
}

la = t(1,1)

#   standardize the matrix for numerical stability
alph = dfloat (n) / dasum (n, t(2,1), ldt)
call  dscal (n, alph, t(2,1), ldt)
call  dscal (n-1, alph, t(1,2), ldt)

#   decomposition
call  dpbfa (t, ldt, n, 1, info)
if ( info != 0 )  return

call  dcopy (n, z, 1, work, 1)
call  dpbsl (t, ldt, n, 1, work)
    
#   GCV computation
if ( vmu == 'v' ) {
    tmp = 1.d0 / t(2,n) / t(2,n)
    deno = tmp
    for (j=n-1;j>0;j=j-1) {
        tmp = ( 1.d0 + t(1,j+1) * t(1,j+1) * tmp ) / t(2,j) / t(2,j)
        deno = deno + tmp
    }
    nume = ddot (n, work, 1, work, 1) / dfloat (n)
    deno = deno / dfloat (n)
    varht = alph * la * nume / deno
    score = nume / deno / deno
}

#   GML computation
if ( vmu == 'm' ) {
    deno = dlog (t(2,n))
    for (j=n-1;j>0;j=j-1)  deno = deno + dlog (t(2,j))
    nume = ddot (n, z, 1, work, 1) / dfloat (n)
    varht = alph * la * nume
    score = nume * dexp (2.d0 * deno / dfloat (n))
}

#   unbiased risk computation
if ( vmu == 'u' ) {
    nume = ddot (n, work, 1, work, 1) / dfloat (n)
    tmp = 1.d0 / t(2,n) / t(2,n)
    deno = tmp
    for (j=n-1;j>0;j=j-1) {
        tmp = ( 1.d0 + t(1,j+1) * t(1,j+1) * tmp ) / t(2,j) / t(2,j)
        deno = deno + tmp
    }
    deno = deno / dfloat (n)
    score = alph * alph * la * la * nume - 2.d0 * varht * alph * la * deno
}

return
end

#...............................................................................
SHAR_EOF
cat << \SHAR_EOF > dcoef.f
      subroutine dcoef (s, lds, nobs, nnull, qraux, jpvt, z, q, ldq, 
     *nlaht, c, d, info, twk)
      integer lds, nobs, nnull, jpvt(*), ldq, info
      double precision s(lds,*), qraux(*), z(*), q(ldq,*), nlaht, c(*), 
     *d(*), twk(2,*)
      double precision dum, ddot
      integer n, n0
      info = 0
      if(.not.( nnull .lt. 1 .or. nnull .ge. nobs .or. nobs .gt. lds 
     *.or. nobs .gt. ldq ))goto 23000
      info = -1
      return
23000 continue
      n0 = nnull
      n = nobs - nnull
      call dset (n, 10.d0 ** nlaht, twk(2,1), 2)
      call daxpy (n, 1.d0, q(n0+1,n0+1), ldq+1, twk(2,1), 2)
      call dcopy (n-1, q(n0+1,n0+2), ldq+1, twk(1,2), 2)
      call dpbfa (twk, 2, n, 1, info)
      if(.not.( info .ne. 0 ))goto 23002
      info = -2
      return
23002 continue
      call dpbsl (twk, 2, n, 1, z(n0+1))
      call dcopy (n-2, q(n0+2,n0+1), ldq+1, twk, 1)
      call dqrsl (q(n0+2,n0+1), ldq, n-1, n-2, twk, z(n0+2), z(n0+2), 
     *dum, dum, dum, dum, 10000, info)
      call dset (n0, 0.d0, c, 1)
      call dcopy (n, z(n0+1), 1, c(n0+1), 1)
      call dqrsl (s, lds, nobs, nnull, qraux, c, c, dum, dum, dum, dum, 
     *10000, info)
      j=1
23004 if(.not.(j.le.n0))goto 23006
      d(j) = z(j) - ddot (n, z(n0+1), 1, q(n0+1,j), 1)
      j=j+1
      goto 23004
23006 continue
      call dtrsl (s, lds, n0, d, 01, info)
      call dprmut (d, n0, jpvt, 1)
      return
      end
SHAR_EOF
cat << \SHAR_EOF > dcore.f
      subroutine dcore (vmu, q, ldq, nobs, nnull, tol, z, job, limnla, 
     *nlaht, score, varht, info, twk, work)
      character*1 vmu
      integer ldq, nobs, nnull, job, info
      double precision q(ldq,*), tol, z(*), limnla(2), nlaht, score(*), 
     *varht, twk(2,*), work(*)
      double precision dum, low, upp, dasum, mchpr
      integer n0, n, j
      info = 0
      if(.not.( vmu .ne. 'v' .and. vmu .ne. 'm' .and. vmu .ne. 'u' ))
     *goto 23000
      info = -3
      return
23000 continue
      if(.not.( nnull .lt. 1 .or. nobs .le. nnull .or. nobs .gt. ldq ))
     *goto 23002
      info = -1
      return
23002 continue
      n0 = nnull
      n = nobs - nnull
      call dsytr (q(n0+1,n0+1), ldq, n, tol, info, work)
      if(.not.( info .ne. 0 ))goto 23004
      return
23004 continue
      call dcopy (n-2, q(n0+2,n0+1), ldq+1, work, 1)
      call dqrsl (q(n0+2,n0+1), ldq, n-1, n-2, work, z(n0+2), dum, z(n0+
     *2), dum, dum, dum, 01000, info)
      if(.not.( job .eq. 0 ))goto 23006
      mchpr = 1.d0
23008 if(.not.( 1.d0 + mchpr .gt. 1.d0 ))goto 23009
      mchpr = mchpr / 2.d0
      goto 23008
23009 continue
      mchpr = mchpr * 2.d0
      limnla(2) = dmax1 (dasum (n, q(n0+1,n0+1), ldq+1) * 1.d2, mchpr)
      limnla(1) = limnla(2) * mchpr
      limnla(2) = dlog10 (limnla(2))
      limnla(1) = dlog10 (limnla(1))
23006 continue
      low = limnla(1)
      upp = limnla(2)
      if(.not.( job .le. 0 ))goto 23010
      call dgold (vmu, q(n0+1,n0+1), ldq, n, z(n0+1), low, upp, nlaht, 
     *score(1), varht, info, twk, work)
      if(.not.( vmu .eq. 'v' ))goto 23012
      score(1) = score(1) * dfloat (nobs) / dfloat (n)
23012 continue
      if(.not.( vmu .eq. 'm' ))goto 23014
      score(1) = score(1) * dfloat (n) / dfloat (nobs)
23014 continue
      if(.not.( vmu .eq. 'u' ))goto 23016
      score(1) = score(1) * dfloat (n) / dfloat (nobs) + 2.d0 * varht
23016 continue
      goto 23011
23010 continue
      call deval (vmu, q(n0+1,n0+1), ldq, n, z(n0+1), job, low, upp, 
     *nlaht, score, varht, info, twk, work)
      dum = dfloat (nobs) / dfloat (n)
      j=1
23018 if(.not.(j.le.job+1))goto 23020
      if(.not.( vmu .eq. 'v' ))goto 23021
      score(j) = score(j) * dum
23021 continue
      if(.not.( vmu .eq. 'm' ))goto 23023
      score(j) = score(j) / dum
23023 continue
      if(.not.( vmu .eq. 'u' ))goto 23025
      score(j) = score(j) / dum + 2.d0 * varht
23025 continue
      j=j+1
      goto 23018
23020 continue
23011 continue
      return
      end
SHAR_EOF
cat << \SHAR_EOF > dcrdr.f
      subroutine dcrdr (s, lds, nobs, nnull, qraux, jpvt, q, ldq, nlaht,
     * r, ldr, nr, cr, ldcr, dr, lddr, wk, info)
      integer lds, nobs, nnull, jpvt(*), ldq, ldr, nr, ldcr, lddr, info
      double precision s(lds,*), qraux(*), q(ldq,*), nlaht, r(ldr,*), 
     *cr(ldcr,*), dr(lddr,*), wk(2,*)
      double precision dum, ddot
      integer i, j, n, n0
      info = 0
      if(.not.( nnull .lt. 1 .or. nnull .ge. nobs .or. nobs .gt. lds 
     *.or. nobs .gt. ldq .or. ldr .lt. nobs .or. nr .lt. 1 .or. ldcr 
     *.lt. nobs .or. lddr .lt. nnull ))goto 23000
      info = -1
      return
23000 continue
      n0 = nnull
      n = nobs - nnull
      call dcopy (n-2, q(n0+2,n0+1), ldq+1, wk, 1)
      j=1
23002 if(.not.(j.le.nr))goto 23004
      call dqrsl (s, lds, nobs, nnull, qraux, r(1,j), dum, r(1,j), dum, 
     *dum, dum, 01000, info)
      call dqrsl (q(n0+2,n0+1), ldq, n-1, n-2, wk, r(n0+2,j), dum, r(n0+
     *2,j), dum, dum, dum, 01000, info)
      j=j+1
      goto 23002
23004 continue
      call dset (n, 10.d0 ** nlaht, wk(2,1), 2)
      call daxpy (n, 1.d0, q(n0+1,n0+1), ldq+1, wk(2,1), 2)
      call dcopy (n-1, q(n0+1,n0+2), ldq+1, wk(1,2), 2)
      call dpbfa (wk, 2, n, 1, info)
      if(.not.( info .ne. 0 ))goto 23005
      info = -2
      return
23005 continue
      j=1
23007 if(.not.(j.le.nr))goto 23009
      call dpbsl (wk, 2, n, 1, r(n0+1,j))
      j=j+1
      goto 23007
23009 continue
      call dcopy (n-2, q(n0+2,n0+1), ldq+1, wk, 1)
      j=1
23010 if(.not.(j.le.nr))goto 23012
      call dqrsl (q(n0+2,n0+1), ldq, n-1, n-2, wk, r(n0+2,j), r(n0+2,j),
     * dum, dum, dum, dum, 10000, info)
      j=j+1
      goto 23010
23012 continue
      j=1
23013 if(.not.(j.le.nr))goto 23015
      call dset (n0, 0.d0, cr(1,j), 1)
      call dcopy (n, r(n0+1,j), 1, cr(n0+1,j), 1)
      call dqrsl (s, lds, nobs, nnull, qraux, cr(1,j), cr(1,j), dum, 
     *dum, dum, dum, 10000, info)
      j=j+1
      goto 23013
23015 continue
      j=1
23016 if(.not.(j.le.nr))goto 23018
      i=1
23019 if(.not.(i.le.n0))goto 23021
      dr(i,j) = r(i,j) - ddot (n, r(n0+1,j), 1, q(n0+1,i), 1)
      i=i+1
      goto 23019
23021 continue
      call dtrsl (s, lds, n0, dr(1,j), 01, info)
      call dprmut (dr(1,j), n0, jpvt, 1)
      j=j+1
      goto 23016
23018 continue
      return
      end
SHAR_EOF
cat << \SHAR_EOF > ddeev.f
      subroutine ddeev (vmu, nobs, q, ldqr, ldqc, n, nq, u, ldu, uaux, 
     *t, x, theta, nlaht, score, varht, hes, ldh, gra, hwk1, hwk2, gwk1,
     * gwk2, kwk, ldk, work1, work2, work3, info)
      character*1 vmu
      integer nobs, ldqr, ldqc, n, nq, ldu, ldh, ldk, info
      double precision q(ldqr,ldqc,*), u(ldu,*), uaux(*), t(2,*), x(*), 
     *theta(*), nlaht, score, varht, hes(ldh,*), gra(*), hwk1(nq,*), 
     *hwk2(nq,*), gwk1(*), gwk2(*), kwk(ldk,ldk,*), work1(*), work2(*), 
     *work3(*)
      double precision trc, det, dum, ddot
      integer i, j, m
      info = 0
      call dset (nq, 0.d0, gra, 1)
      call dset (nq*nq, 0.d0, hes, 1)
      if(.not.( vmu .ne. 'v' .and. vmu .ne. 'm' .and. vmu .ne. 'u' ))
     *goto 23000
      info = -3
      return
23000 continue
      if(.not.( nobs .lt. n .or. ldqr .lt. n .or. ldqc .lt. n .or. nq 
     *.le. 0 .or. ldu .lt. n-1 .or. ldh .lt. nq .or. ldk .lt. n ))goto 2
     *3002
      info = -1
      return
23002 continue
      i=2
23004 if(.not.(i.le.nq))goto 23006
      if(.not.( theta(i) .le. -25.d0 ))goto 23007
      goto 23005
23007 continue
      j=1
23009 if(.not.(j.le.n))goto 23011
      call dcopy (n-j+1, q(j,j,i), 1, kwk(j,j,i), 1)
      call dscal (n-j+1, 10.d0 ** theta(i), kwk(j,j,i), 1)
      j=j+1
      goto 23009
23011 continue
      call dqrslm (u, ldu, n-1, n-2, uaux, kwk(2,2,i), n, 0, info, 
     *work1)
      call dqrsl (u, ldu, n-1, n-2, uaux, kwk(2,1,i), dum, kwk(2,1,i), 
     *dum, dum, dum, 01000, info)
23005 i=i+1
      goto 23004
23006 continue
      call dcopy (n, t(2,1), 2, kwk(1,1,1), n+1)
      call dcopy (n-1, t(1,2), 2, kwk(2,1,1), n+1)
      j=1
23012 if(.not.(j.lt.n-1))goto 23014
      call dset (n-j-1, 0.d0, kwk(j+2,j,1), 1)
      j=j+1
      goto 23012
23014 continue
      i=2
23015 if(.not.(i.le.nq))goto 23017
      if(.not.( theta(i) .le. -25.d0 ))goto 23018
      goto 23016
23018 continue
      j=1
23020 if(.not.(j.le.n))goto 23022
      call daxpy (n-j+1, -1.d0, kwk(j,j,i), 1, kwk(j,j,1), 1)
      j=j+1
      goto 23020
23022 continue
23016 i=i+1
      goto 23015
23017 continue
      i=1
23023 if(.not.(i.le.nq))goto 23025
      if(.not.( theta(i) .le. -25.d0 ))goto 23026
      goto 23024
23026 continue
      j=1
23028 if(.not.(j.lt.n))goto 23030
      call dcopy (n-j, kwk(j+1,j,i), 1, kwk(j,j+1,i), n)
      j=j+1
      goto 23028
23030 continue
23024 i=i+1
      goto 23023
23025 continue
      call dset (n, 10.d0 ** nlaht, work1, 1)
      call daxpy (n, 1.d0, work1, 1, t(2,1), 2)
      call dpbfa (t, 2, n, 1, info)
      if(.not.( info .ne. 0 ))goto 23031
      info = -2
      return
23031 continue
      i=1
23033 if(.not.(i.le.nq))goto 23035
      if(.not.( theta(i) .le. -25.d0 ))goto 23036
      goto 23034
23036 continue
      j=1
23038 if(.not.(j.le.n))goto 23040
      call dpbsl (t, 2, n, 1, kwk(1,j,i))
      j=j+1
      goto 23038
23040 continue
23034 i=i+1
      goto 23033
23035 continue
      call dcopy (n, x, 1, work1, 1)
      call dpbsl (t, 2, n, 1, work1)
      if(.not.( vmu .ne. 'm' ))goto 23041
      call dcopy (n, work1, 1, work2, 1)
      call dscal (n, 2.d0, work2, 1)
      goto 23042
23041 continue
      call dcopy (n, x, 1, work2, 1)
23042 continue
      i=1
23043 if(.not.(i.le.nq))goto 23045
      if(.not.( theta(i) .le. -25.d0 ))goto 23046
      goto 23044
23046 continue
      call dgemv ('t', n, n, 1.d0, kwk(1,1,i), n, work2, 1, 0.d0, work3,
     * 1)
      gwk1(i) = - ddot (n, work1, 1, work3, 1)
23044 i=i+1
      goto 23043
23045 continue
      i=1
23048 if(.not.(i.le.nq))goto 23050
      gwk2(i) = 0.d0
      if(.not.( theta(i) .le. -25.d0 ))goto 23051
      goto 23049
23051 continue
      j=1
23053 if(.not.(j.le.n))goto 23055
      if(.not.( vmu .ne. 'm' ))goto 23056
      call dcopy (n, kwk(1,j,i), 1, work1, 1)
      call dpbsl (t, 2, n, 1, work1)
      gwk2(i) = gwk2(i) - work1(j)
      goto 23057
23056 continue
      gwk2(i) = gwk2(i) - kwk(j,j,i)
23057 continue
      j=j+1
      goto 23053
23055 continue
23049 i=i+1
      goto 23048
23050 continue
      if(.not.( vmu .ne. 'm' ))goto 23058
      call dcopy (n, x, 1, work1, 1)
      call dpbsl (t, 2, n, 1, work1)
      i=1
23060 if(.not.(i.le.nq))goto 23062
      if(.not.( theta(i) .le. -25.d0 ))goto 23063
      goto 23061
23063 continue
      call dgemv ('n', n, n, 1.d0, kwk(1,1,i), n, work1, 1, 0.d0, work2,
     * 1)
      j=1
23065 if(.not.(j.le.i))goto 23067
      if(.not.( theta(j) .le. -25.d0 ))goto 23068
      goto 23066
23068 continue
      call dgemv ('n', n, n, 1.d0, kwk(1,1,j), n, work1, 1, 0.d0, work3,
     * 1)
      hwk1(i,j) = 2.d0 * ddot (n, work2, 1, work3, 1)
      call dgemv ('t', n, n, 1.d0, kwk(1,1,j), n, work1, 1, 0.d0, work3,
     * 1)
      hwk1(i,j) = hwk1(i,j) + 2.d0 * ddot (n, work2, 1, work3, 1)
23066 j=j+1
      goto 23065
23067 continue
      call dgemv ('t', n, n, 1.d0, kwk(1,1,i), n, work1, 1, 0.d0, work2,
     * 1)
      j=1
23070 if(.not.(j.le.i))goto 23072
      if(.not.( theta(j) .le. -25.d0 ))goto 23073
      goto 23071
23073 continue
      call dgemv ('n', n, n, 1.d0, kwk(1,1,j), n, work1, 1, 0.d0, work3,
     * 1)
      hwk1(i,j) = hwk1(i,j) + 2.d0 * ddot (n, work2, 1, work3, 1)
23071 j=j+1
      goto 23070
23072 continue
23061 i=i+1
      goto 23060
23062 continue
      goto 23059
23058 continue
      call dcopy (n, x, 1, work1, 1)
      call dpbsl (t, 2, n, 1, work1)
      i=1
23075 if(.not.(i.le.nq))goto 23077
      if(.not.( theta(i) .le. -25.d0 ))goto 23078
      goto 23076
23078 continue
      call dgemv ('n', n, n, 1.d0, kwk(1,1,i), n, work1, 1, 0.d0, work2,
     * 1)
      j=1
23080 if(.not.(j.le.i))goto 23082
      if(.not.( theta(j) .le. -25.d0 ))goto 23083
      goto 23081
23083 continue
      call dgemv ('t', n, n, 1.d0, kwk(1,1,j), n, x, 1, 0.d0, work3, 1)
      hwk1(i,j) = 2.d0 * ddot (n, work2, 1, work3, 1)
23081 j=j+1
      goto 23080
23082 continue
23076 i=i+1
      goto 23075
23077 continue
23059 continue
      i=1
23085 if(.not.(i.le.nq))goto 23087
      if(.not.( theta(i) .le. -25.d0 ))goto 23088
      goto 23086
23088 continue
      hwk1(i,i) = hwk1(i,i) + gwk1(i)
23086 i=i+1
      goto 23085
23087 continue
      i=1
23090 if(.not.(i.le.nq))goto 23092
      if(.not.( theta(i) .le. -25.d0 ))goto 23093
      goto 23091
23093 continue
      m=1
23095 if(.not.(m.le.i))goto 23097
      hwk2(i,m) = 0.d0
      if(.not.( theta(m) .le. -25.d0 ))goto 23098
      goto 23096
23098 continue
      j=1
23100 if(.not.(j.le.n))goto 23102
      if(.not.( vmu .ne. 'm' ))goto 23103
      call dcopy (n, kwk(1,j,m), 1, work1, 1)
      call dpbsl (t, 2, n, 1, work1)
      hwk2(i,m) = hwk2(i,m) + 2.d0 * ddot (n, kwk(j,1,i), n, work1, 1)
      goto 23104
23103 continue
      hwk2(i,m) = hwk2(i,m) + ddot (n, kwk(j,1,i), n, kwk(1,j,m), 1)
23104 continue
      j=j+1
      goto 23100
23102 continue
23096 m=m+1
      goto 23095
23097 continue
23091 i=i+1
      goto 23090
23092 continue
      i=1
23105 if(.not.(i.le.nq))goto 23107
      if(.not.( theta(i) .le. -25.d0 ))goto 23108
      goto 23106
23108 continue
      hwk2(i,i) = hwk2(i,i) + gwk2(i)
23106 i=i+1
      goto 23105
23107 continue
      if(.not.( vmu .eq. 'v' ))goto 23110
      trc = dfloat (nobs) * 10.d0 ** (-nlaht) * varht / score
      i=1
23112 if(.not.(i.le.nq))goto 23114
      if(.not.( theta(i) .le. -25.d0 ))goto 23115
      goto 23113
23115 continue
      gra(i) = gwk1(i) / trc / trc - 2.d0 * score * gwk2(i) / trc / 
     *dfloat(nobs)
23113 i=i+1
      goto 23112
23114 continue
      call dscal (nq, dfloat (nobs), gra, 1)
23110 continue
      if(.not.( vmu .eq. 'u' ))goto 23117
      dum = 10.d0 ** nlaht
      i=1
23119 if(.not.(i.le.nq))goto 23121
      if(.not.( theta(i) .le. -25.d0 ))goto 23122
      goto 23120
23122 continue
      gra(i) = dum * dum * gwk1(i) - 2.d0 * varht * dum * gwk2(i)
23120 i=i+1
      goto 23119
23121 continue
      call dscal (nq, 1.d0/dfloat (n), gra, 1)
23117 continue
      if(.not.( vmu .eq. 'm' ))goto 23124
      det = 10.d0 ** (-nlaht) * varht / score
      i=1
23126 if(.not.(i.le.nq))goto 23128
      if(.not.( theta(i) .le. -25.d0 ))goto 23129
      goto 23127
23129 continue
      gra(i) = gwk1(i) / det - dfloat (nobs) / dfloat (n) * score * 
     *gwk2(i)
23127 i=i+1
      goto 23126
23128 continue
      call dscal (nq, 1.d0 / dfloat (nobs), gra, 1)
23124 continue
      if(.not.( vmu .eq. 'v' ))goto 23131
      i=1
23133 if(.not.(i.le.nq))goto 23135
      if(.not.( theta(i) .le. -25.d0 ))goto 23136
      goto 23134
23136 continue
      j=1
23138 if(.not.(j.le.i))goto 23140
      if(.not.( theta(j) .le. -25.d0 ))goto 23141
      goto 23139
23141 continue
      hes(i,j) = hwk1(i,j) / trc / trc - 2.d0 * gwk1(i) * gwk2(j) / trc 
     *** 3 - 2.d0 * gwk1(j) * gwk2(i) / trc ** 3 - 2.d0 * score * hwk2(
     *i,j) / trc / dfloat (nobs) + 6.d0 * score * gwk2(i) * gwk2(j) / 
     *trc / trc / dfloat (nobs)
23139 j=j+1
      goto 23138
23140 continue
      call dscal (i, dfloat (nobs), hes(i,1), ldh)
23134 i=i+1
      goto 23133
23135 continue
23131 continue
      if(.not.( vmu .eq. 'u' ))goto 23143
      i=1
23145 if(.not.(i.le.nq))goto 23147
      if(.not.( theta(i) .le. -25.d0 ))goto 23148
      goto 23146
23148 continue
      j=1
23150 if(.not.(j.le.i))goto 23152
      if(.not.( theta(j) .le. -25.d0 ))goto 23153
      goto 23151
23153 continue
      hes(i,j) = dum * dum * hwk1(i,j) - 2.d0 * varht * dum * hwk2(i,j)
23151 j=j+1
      goto 23150
23152 continue
      call dscal (i, 1.d0/dfloat (n), hes(i,1), ldh)
23146 i=i+1
      goto 23145
23147 continue
23143 continue
      if(.not.( vmu .eq. 'm' ))goto 23155
      i=1
23157 if(.not.(i.le.nq))goto 23159
      if(.not.( theta(i) .le. -25.d0 ))goto 23160
      goto 23158
23160 continue
      j=1
23162 if(.not.(j.le.i))goto 23164
      if(.not.( theta(j) .le. -25.d0 ))goto 23165
      goto 23163
23165 continue
      hes(i,j) = hwk1(i,j) / det - gwk1(i) * gwk2(j) / det / dfloat (n) 
     *- gwk1(j) * gwk2(i) / det / dfloat (n) - dfloat (nobs) / dfloat (
     *n) * score * hwk2(i,j) + dfloat (nobs) / dfloat (n) ** 2 * score *
     * gwk2(i) * gwk2(j)
23163 j=j+1
      goto 23162
23164 continue
      call dscal (i, 1.d0 / dfloat (nobs), hes(i,1), ldh)
23158 i=i+1
      goto 23157
23159 continue
23155 continue
      return
      end
SHAR_EOF
cat << \SHAR_EOF > deval.f
      subroutine deval (vmu, q, ldq, n, z, nint, low, upp, nlaht, score,
     * varht, info, twk, work)
      character*1 vmu
      integer ldq, n, nint, info
      double precision q(ldq,*), z(*), low, upp, nlaht, score(*), varht,
     * twk(2,*), work(*)
      double precision tmp, minscr, mlo, varhtwk
      integer j
      info = 0
      if(.not.( upp .lt. low ))goto 23000
      mlo = low
      low = upp
      upp = mlo
23000 continue
      if(.not.( (vmu .ne. 'v' .and. vmu .ne. 'm' .and. vmu .ne. 'u') 
     *.or. nint .lt. 1 ))goto 23002
      info = -3
      return
23002 continue
      if(.not.( 1 .gt. n .or. n .gt. ldq ))goto 23004
      info = -1
      return
23004 continue
      j=1
23006 if(.not.(j.le.nint+1))goto 23008
      tmp = low + dfloat (j-1) * ( upp - low ) / dfloat (nint)
      call dset (n, 10.d0 ** (tmp), twk(2,1), 2)
      call daxpy (n, 1.d0, q, ldq+1, twk(2,1), 2)
      call dcopy (n-1, q(1,2), ldq+1, twk(1,2), 2)
      twk(1,1) = 10.d0**tmp
      call dtrev (vmu, twk, 2, n, z, score(j), varht, info, work)
      if(.not.( info .ne. 0 ))goto 23009
      info = -2
      return
23009 continue
      if(.not.( score(j) .le. minscr .or. j .eq. 1 ))goto 23011
      minscr = score(j)
      nlaht = tmp
      varhtwk = varht
23011 continue
      j=j+1
      goto 23006
23008 continue
      varht = varhtwk
      return
      end
SHAR_EOF
cat << \SHAR_EOF > dgold.f
      subroutine dgold (vmu, q, ldq, n, z, low, upp, nlaht, score, 
     *varht, info, twk, work)
      character*1 vmu
      integer ldq, n, info
      double precision q(ldq,*), z(*), low, upp, nlaht, score, varht, 
     *twk(2,*), work(*)
      double precision ratio, mlo, mup, tmpl, tmpu
      ratio = ( dsqrt (5.d0) - 1.d0 ) / 2.d0
      info = 0
      if(.not.( upp .lt. low ))goto 23000
      mlo = low
      low = upp
      upp = mlo
23000 continue
      if(.not.( vmu .ne. 'v' .and. vmu .ne. 'm' .and. vmu .ne. 'u' ))
     *goto 23002
      info = -3
      return
23002 continue
      if(.not.( n .lt. 1 .or. n .gt. ldq ))goto 23004
      info = -1
      return
23004 continue
      mlo = upp - ratio * (upp - low)
      call dset (n, 10.d0 ** (mlo), twk(2,1), 2)
      call daxpy (n, 1.d0, q, ldq+1, twk(2,1), 2)
      call dcopy (n-1, q(1,2), ldq+1, twk(1,2), 2)
      twk(1,1) = 10.d0**mlo
      call dtrev (vmu, twk, 2, n, z, tmpl, varht, info, work)
      if(.not.( info .ne. 0 ))goto 23006
      info = -2
      return
23006 continue
      mup = low + ratio * (upp - low)
      call dset (n, 10.d0 ** (mup), twk(2,1), 2)
      call daxpy (n, 1.d0, q, ldq+1, twk(2,1), 2)
      call dcopy (n-1, q(1,2), ldq+1, twk(1,2), 2)
      twk(1,1) = 10.d0**mup
      call dtrev (vmu, twk, 2, n, z, tmpu, varht, info, work)
      if(.not.( info .ne. 0 ))goto 23008
      info = -2
      return
23008 continue
23010 continue
      if(.not.( mup - mlo .lt. 1.d-7 ))goto 23013
      goto 23012
23013 continue
      if(.not.( tmpl .lt. tmpu ))goto 23015
      upp = mup
      mup = mlo
      tmpu = tmpl
      mlo = upp - ratio * (upp - low)
      call dset (n, 10.d0 ** (mlo), twk(2,1), 2)
      call daxpy (n, 1.d0, q, ldq+1, twk(2,1), 2)
      call dcopy (n-1, q(1,2), ldq+1, twk(1,2), 2)
      twk(1,1) = 10.d0**mlo
      call dtrev (vmu, twk, 2, n, z, tmpl, varht, info, work)
      if(.not.( info .ne. 0 ))goto 23017
      info = -2
      return
23017 continue
      goto 23016
23015 continue
      low = mlo
      mlo = mup
      tmpl = tmpu
      mup = low + ratio * (upp - low)
      call dset (n, 10.d0 ** (mup), twk(2,1), 2)
      call daxpy (n, 1.d0, q, ldq+1, twk(2,1), 2)
      call dcopy (n-1, q(1,2), ldq+1, twk(1,2), 2)
      twk(1,1) = 10.d0**mup
      call dtrev (vmu, twk, 2, n, z, tmpu, varht, info, work)
      if(.not.( info .ne. 0 ))goto 23019
      info = -2
      return
23019 continue
23016 continue
23011 goto 23010
23012 continue
      nlaht = ( mup + mlo ) / 2.d0
      call dset (n, 10.d0 ** (nlaht), twk(2,1), 2)
      call daxpy (n, 1.d0, q, ldq+1, twk(2,1), 2)
      call dcopy (n-1, q(1,2), ldq+1, twk(1,2), 2)
      twk(1,1) = 10.d0**nlaht
      call dtrev (vmu, twk, 2, n, z, score, varht, info, work)
      if(.not.( info .ne. 0 ))goto 23021
      info = -2
      return
23021 continue
      return
      end
SHAR_EOF
cat << \SHAR_EOF > dmcdc.f
      subroutine dmcdc (a, lda, p, e, jpvt, info)
      integer lda, p, jpvt(*), info
      double precision a(lda,*), e(*)
      double precision beta, delta, theta, tmp, dasum, ddot
      integer i, j, jmax, jtmp, idamax
      info = 0
      if(.not.( lda .lt. p .or. p .lt. 1 ))goto 23000
      info = -1
      return
23000 continue
      tmp = 1.d0
23002 if(.not.( 1.d0 + tmp .gt. 1.d0 ))goto 23003
      tmp = tmp / 2.d0
      goto 23002
23003 continue
      jmax = idamax (p, a, lda+1)
      beta = dmax1 (2.d0 * tmp, dabs (a(jmax,jmax)))
      tmp = dsqrt (dfloat (p*p-1))
      if(.not.( tmp .lt. 1.d0 ))goto 23004
      tmp = 1.d0
23004 continue
      j=2
23006 if(.not.(j.le.p))goto 23008
      jmax = idamax (j-1, a(1,j), 1)
      beta = dmax1 (beta, dabs (a(jmax,j)) / tmp)
      j=j+1
      goto 23006
23008 continue
      delta = dasum (p, a, lda+1) / dfloat (p) * 1.d-7
      delta = dmax1 (delta, 1.d-10)
      j=1
23009 if(.not.(j.le.p))goto 23011
      jpvt(j) = j
      j=j+1
      goto 23009
23011 continue
      j=1
23012 if(.not.(j.le.p))goto 23014
      jmax = idamax (p-j+1, a(j,j), lda+1) + j - 1
      if(.not.( jmax .ne. j ))goto 23015
      call dswap (j-1, a(1,j), 1, a(1,jmax), 1)
      call dswap (jmax-j-1, a(j,j+1), lda, a(j+1,jmax), 1)
      call dswap (p-jmax, a(j,jmax+1), lda, a(jmax,jmax+1), lda)
      tmp = a(j,j)
      a(j,j) = a(jmax,jmax)
      a(jmax,jmax) = tmp
      jtmp = jpvt(j)
      jpvt(j) = jpvt(jmax)
      jpvt(jmax) = jtmp
23015 continue
      i=1
23017 if(.not.(i.lt.j))goto 23019
      a(i,j) = a(i,j) / a(i,i)
      i=i+1
      goto 23017
23019 continue
      i=j+1
23020 if(.not.(i.le.p))goto 23022
      a(j,i) = a(j,i) - ddot (j-1, a(1,j), 1, a(1,i), 1)
      i=i+1
      goto 23020
23022 continue
      if(.not.( j .eq. p ))goto 23023
      theta = 0.d0
      goto 23024
23023 continue
      jmax = idamax (p-j, a(j,j+1), lda) + j
      theta = dabs (a(j,jmax))
23024 continue
      tmp = dmax1 (delta, dabs (a(j,j)), theta ** 2 / beta)
      e(j) = tmp - a(j,j)
      a(j,j) = tmp
      i=j+1
23025 if(.not.(i.le.p))goto 23027
      a(i,i) = a(i,i) - a(j,i) ** 2 / a(j,j)
      i=i+1
      goto 23025
23027 continue
      j=j+1
      goto 23012
23014 continue
      j=1
23028 if(.not.(j.le.p))goto 23030
      a(j,j) = dsqrt (a(j,j))
      call dscal (p-j, a(j,j), a(j,j+1), lda)
      j=j+1
      goto 23028
23030 continue
      return
      end
SHAR_EOF
cat << \SHAR_EOF > dmudr.f
      subroutine dmudr (vmu, s, lds, nobs, nnull, q, ldqr, ldqc, nq, y, 
     *tol, init, prec, maxite, theta, nlaht, score, varht, c, d, wk, 
     *info)
      integer lds, nobs, nnull, ldqr, ldqc, nq, init, maxite, info
      double precision s(lds,*), q(ldqr,ldqc,*), y(*), tol, prec, theta(
     **), nlaht, score, varht, c(*), d(*), wk(*)
      character*1 vmu
      integer n, n0
      integer iqraux, itraux, itwk, iqwk, iywk, ithewk, ihes, igra, 
     *ihwk1, ihwk2, igwk1, igwk2, ikwk, iwork1, iwork2, ijpvt, ipvtwk
      n = nobs
      n0 = nnull
      iqraux = 1
      itraux = iqraux + n0
      itwk = itraux + (n-n0-2)
      iqwk = itwk + 2 * (n-n0)
      iywk = iqwk + n * n
      ithewk = iywk + n
      ihes = ithewk + nq
      igra = ihes + nq * nq
      ihwk1 = igra + nq
      ihwk2 = ihwk1 + nq * nq
      igwk1 = ihwk2 + nq * nq
      igwk2 = igwk1 + nq
      ikwk = igwk2 + nq
      iwork1 = ikwk + (n-n0) * (n-n0) * nq
      iwork2 = iwork1 + n
      ijpvt = iwork2 + n
      ipvtwk = ijpvt + n0
      call dmudr1 (vmu, s, lds, nobs, nnull, q, ldqr, ldqc, nq, y, tol, 
     *init, prec, maxite, theta, nlaht, score, varht, c, d, wk(iqraux), 
     *wk(ijpvt), wk(itwk), wk(itraux), wk(iqwk), wk(iywk), wk(ithewk), 
     *wk(ihes), wk(igra), wk(ihwk1), wk(ihwk2), wk(igwk1), wk(igwk2), 
     *wk(ipvtwk), wk(ikwk), wk(iwork1), wk(iwork2), info)
      return
      end
SHAR_EOF
cat << \SHAR_EOF > dmudr1.f
      subroutine dmudr1 (vmu, s, lds, nobs, nnull, q, ldqr, ldqc, nq, y,
     * tol, init, prec, maxite, theta, nlaht, score, varht, c, d, qraux,
     * jpvt, twk, traux, qwk, ywk, thewk, hes, gra, hwk1, hwk2, gwk1, 
     *gwk2, pvtwk, kwk, work1, work2, info)
      integer lds, nobs, nnull, ldqr, ldqc, nq, init, maxite, jpvt(*), 
     *pvtwk(*), info
      double precision s(lds,*), q(ldqr,ldqc,*), y(*), tol, prec, theta(
     **), nlaht, score, varht, c(*), d(*), qraux(*), traux(*), twk(2,*),
     * qwk(ldqr,*), ywk(*), thewk(*), hes(nq,*), gra(*), hwk1(nq,*), 
     *hwk2(nq,*), gwk1(*), gwk2(*), kwk(nobs-nnull,nobs-nnull,*), work1(
     **), work2(*)
      character*1 vmu
      double precision alph, scrold, scrwk, nlawk, limnla(2), tmp, 
     *dasum, ddot
      integer n, n0, i, j, iwk, maxitwk, idamax, job
      info = 0
      n0 = nnull
      n = nobs - nnull
      maxitwk = maxite
      if(.not.( (vmu .ne. 'v' .and. vmu .ne. 'm' .and. vmu .ne. 'u') 
     *.or. (init .ne. 0 .and. init .ne. 1) .or. (maxitwk .le.0) .or. (
     *prec .le. 0.d0) ))goto 23000
      info = -3
      return
23000 continue
      if(.not.( lds .lt. nobs .or. nobs .le. n0 .or. n0 .lt. 1 .or. 
     *ldqr .lt. nobs .or. ldqc .lt. nobs .or. nq .le. 0 ))goto 23002
      info = -1
      return
23002 continue
      call dstup (s, lds, nobs, n0, qraux, jpvt, y, q, ldqr, ldqc, nq, 
     *info, work1)
      if(.not.( info .ne. 0 ))goto 23004
      return
23004 continue
      if(.not.( init .eq. 1 ))goto 23006
      call dcopy (nq, theta, 1, thewk, 1)
      goto 23007
23006 continue
      i=1
23008 if(.not.(i.le.nq))goto 23010
      thewk(i) = dasum (n, q(n0+1,n0+1,i), ldqr+1)
      if(.not.( thewk(i) .gt. 0.d0 ))goto 23011
      thewk(i) = 1.d0 / thewk(i)
23011 continue
      i=i+1
      goto 23008
23010 continue
      j=1
23013 if(.not.(j.le.nobs))goto 23015
      call dset (nobs-j+1, 0.d0, qwk(j,j), 1)
      j=j+1
      goto 23013
23015 continue
      i=1
23016 if(.not.(i.le.nq))goto 23018
      j=1
23019 if(.not.(j.le.nobs))goto 23021
      call daxpy (nobs-j+1, thewk(i), q(j,j,i), 1, qwk(j,j), 1)
      j=j+1
      goto 23019
23021 continue
      i=i+1
      goto 23016
23018 continue
      call dcopy (nobs, y, 1, ywk, 1)
      call dcore (vmu, qwk, ldqr, nobs, n0, tol, ywk, 0, limnla, nlawk, 
     *scrwk, varht, info, twk, work1)
      if(.not.(info .ne. 0 ))goto 23022
      return
23022 continue
      call dcoef (s, lds, nobs, n0, qraux, jpvt, ywk, qwk, ldqr, nlawk, 
     *c, d, info, twk)
      call dqrsl (s, lds, nobs, n0, qraux, c, tmp, c, tmp, tmp, tmp, 
     *01000, info)
      i=1
23024 if(.not.(i.le.nq))goto 23026
      call dsymv('l', n, thewk(i), q(n0+1,n0+1,i), ldqr, c(n0+1), 1, 0.
     *d0, work1, 1)
      thewk(i) = ddot (n, c(n0+1), 1, work1, 1) * thewk(i)
      if(.not.( thewk(i) .gt. 0.d0 ))goto 23027
      thewk(i) = dlog10 (thewk(i))
      goto 23028
23027 continue
      thewk(i) = -25.d0
23028 continue
      i=i+1
      goto 23024
23026 continue
23007 continue
      scrold = 1.d10
      job = 0
23029 continue
      if(.not.( nq .eq. 1 ))goto 23032
      theta(1) = 0.d0
      goto 23031
23032 continue
      j=1
23034 if(.not.(j.le.nobs))goto 23036
      call dset (nobs-j+1, 0.d0, qwk(j,j), 1)
      j=j+1
      goto 23034
23036 continue
      i=1
23037 if(.not.(i.le.nq))goto 23039
      if(.not.( thewk(i) .le. -25.d0 ))goto 23040
      goto 23038
23040 continue
      j=1
23042 if(.not.(j.le.nobs))goto 23044
      call daxpy (nobs-j+1, 10.d0 ** thewk(i), q(j,j,i), 1, qwk(j,j), 1)
      j=j+1
      goto 23042
23044 continue
23038 i=i+1
      goto 23037
23039 continue
      call dcopy (nobs, y, 1, ywk, 1)
      call dcore (vmu, qwk, ldqr, nobs, n0, tol, ywk, job, limnla, 
     *nlawk, scrwk, varht, info, twk, work1)
      if(.not.(info .ne. 0 ))goto 23045
      return
23045 continue
      if(.not.( scrold .lt. scrwk ))goto 23047
      tmp = dabs (gwk1(idamax (nq, gwk1, 1)))
      if(.not.( alph * tmp .gt. - prec ))goto 23049
      info = -5
      return
23049 continue
      alph = alph / 2.d0
      i=1
23051 if(.not.(i.le.nq))goto 23053
      thewk(i) = theta(i) + alph * gwk1(i)
      i=i+1
      goto 23051
23053 continue
      goto 23030
23047 continue
      maxitwk = maxitwk - 1
      call dcopy (n-2, qwk(n0+2,n0+1), ldqr+1, traux, 1)
      call dcopy (n, qwk(n0+1,n0+1), ldqr+1, twk(2,1), 2)
      call dcopy (n-1, qwk(n0+1,n0+2), ldqr+1, twk(1,2), 2)
      call ddeev (vmu, nobs, q(n0+1,n0+1,1), ldqr, ldqc, n, nq, qwk(n0+
     *2,n0+1), ldqr, traux, twk, ywk(n0+1), thewk, nlawk, scrwk, varht, 
     *hes, nq, gra, hwk1, hwk2, gwk1, gwk2, kwk, n, work1, work2, c, 
     *info)
      iwk = 0
      i=1
23054 if(.not.(i.le.nq))goto 23056
      if(.not.( thewk(i) .le. -25.d0 ))goto 23057
      goto 23055
23057 continue
      iwk = iwk + 1
      call dcopy (nq, hes(1,i), 1, hes(1,iwk), 1)
23055 i=i+1
      goto 23054
23056 continue
      iwk = 0
      i=1
23059 if(.not.(i.le.nq))goto 23061
      if(.not.( thewk(i) .le. -25.d0 ))goto 23062
      goto 23060
23062 continue
      iwk = iwk + 1
      call dcopy (nq, hes(i,1), nq, hes(iwk,1), nq)
      gwk1(iwk) = gra(i)
      work2(iwk) = gra(i)
23060 i=i+1
      goto 23059
23061 continue
      i=1
23064 if(.not.(i.lt.iwk))goto 23066
      call dcopy (iwk-i, hes(i+1,i), 1, hes(i,i+1), nq)
      i=i+1
      goto 23064
23066 continue
      call dmcdc (hes, nq, iwk, gwk2, pvtwk, info)
      call dprmut (gwk1, iwk, pvtwk, 0)
      call dposl (hes, nq, iwk, gwk1)
      call dprmut (gwk1, iwk, pvtwk, 1)
      alph = -1.d0
      j = iwk
      i=nq
23067 if(.not.(i.ge.1))goto 23069
      if(.not.( thewk(i) .le. -25.0 ))goto 23070
      gwk1(i) = 0.d0
      goto 23071
23070 continue
      gwk1(i) = gwk1(iwk)
      iwk = iwk - 1
23071 continue
      i=i-1
      goto 23067
23069 continue
      call dscal (nq, 1.d0/dlog(1.d1), gwk1, 1)
      tmp = dabs (gwk1(idamax (nq, gwk1, 1)))
      if(.not.( tmp .gt. 1.d0 ))goto 23072
      call dscal (nq, 1.d0/tmp, gwk1, 1)
23072 continue
      i=1
23074 if(.not.(i.le.nq))goto 23076
      if(.not.( thewk(i) .le. -25.d0 ))goto 23077
      goto 23075
23077 continue
      thewk(i) = thewk(i) - nlawk
23075 i=i+1
      goto 23074
23076 continue
      call dcopy (nq, thewk, 1, theta, 1)
      tmp = gra(idamax (nq, gra, 1)) ** 2
      if(.not.( tmp .lt. prec ** 2 .or. scrold - scrwk .lt. prec * (
     *scrwk + 1.d0) .and. tmp .lt. prec * (scrwk + 1.d0) ** 2 ))goto 230
     *79
      goto 23031
23079 continue
      if(.not.( maxitwk .lt. 1 ))goto 23081
      info = -4
      return
23081 continue
      scrold = scrwk
      i=1
23083 if(.not.(i.le.nq))goto 23085
      thewk(i) = thewk(i) + alph * gwk1(i)
      i=i+1
      goto 23083
23085 continue
      job = -1
      limnla(1) = -1.d0
      limnla(2) = 1.d0
23030 goto 23029
23031 continue
      j=1
23086 if(.not.(j.le.nobs))goto 23088
      call dset (nobs-j+1, 0.d0, qwk(j,j), 1)
      j=j+1
      goto 23086
23088 continue
      i=1
23089 if(.not.(i.le.nq))goto 23091
      if(.not.( theta(i) .le. -25.d0 ))goto 23092
      goto 23090
23092 continue
      j=1
23094 if(.not.(j.le.nobs))goto 23096
      call daxpy (nobs-j+1, 10.d0 ** theta(i), q(j,j,i), 1, qwk(j,j), 1)
      j=j+1
      goto 23094
23096 continue
23090 i=i+1
      goto 23089
23091 continue
      call dcopy (nobs, y, 1, ywk, 1)
      call dcore (vmu, qwk, ldqr, nobs, n0, tol, ywk, job, limnla, 
     *nlaht, score, varht, info, twk, work1)
      if(.not.(info .ne. 0 ))goto 23097
      return
23097 continue
      call dcoef (s, lds, nobs, n0, qraux, jpvt, ywk, qwk, ldqr, nlaht, 
     *c, d, info, twk)
      return
      end
SHAR_EOF
cat << \SHAR_EOF > dqrslm.f
      subroutine dqrslm (x, ldx, n, k, qraux, a, lda, job, info, work)
      integer ldx, n, k, lda, job, info
      double precision x(ldx,*), qraux(*), a(lda,*), work(*)
      double precision tmp, alph, ddot
      integer i, j, step
      info = 0
      if(.not.( lda .lt. n .or. n .lt. k .or. k .lt. 1 ))goto 23000
      info = -1
      return
23000 continue
      if(.not.( job .ne. 0 .and. job .ne. 1 ))goto 23002
      info = 1
      return
23002 continue
      if(.not.( job .eq. 0 ))goto 23004
      j = 1
      step = 1
      goto 23005
23004 continue
      j = k
      step = -1
23005 continue
23006 if(.not.( j .ge. 1 .and. j .le. k ))goto 23007
      if(.not.( qraux(j) .eq. 0.0d0 ))goto 23008
      j = j + step
      goto 23006
23008 continue
      tmp = x(j,j)
      x(j,j) = qraux(j)
      i=1
23010 if(.not.(i.lt.j))goto 23012
      alph = - ddot (n-j+1, x(j,j), 1, a(j,i), 1) / x(j,j)
      call daxpy (n-j+1, alph, x(j,j), 1, a(j,i), 1)
      i=i+1
      goto 23010
23012 continue
      alph = 1.d0 / x(j,j)
      call dsymv ('l', n-j+1, alph, a(j,j), lda, x(j,j), 1, 0.d0, work(
     *j), 1)
      alph = - ddot (n-j+1, work(j), 1, x(j,j), 1) / 2.d0 / x(j,j)
      call daxpy (n-j+1, alph, x(j,j), 1, work(j), 1)
      call dsyr2 ('l', n-j+1, -1.d0, x(j,j), 1, work(j), 1, a(j,j), lda)
      x(j,j) = tmp
      j = j + step
      goto 23006
23007 continue
      return
      end
SHAR_EOF
cat << \SHAR_EOF > dsidr.f
      subroutine dsidr (vmu, s, lds, nobs, nnull, y, q, ldq, tol, job, 
     *limnla, nlaht, score, varht, c, d, qraux, jpvt, wk, info)
      character*1 vmu
      integer lds, nobs, nnull, ldq, job, jpvt(*), info
      double precision s(lds,*), y(*), q(ldq,*), tol, limnla(2), nlaht, 
     *score(*), varht, c(*), d(*), qraux(*), wk(*)
      info = 0
      if(.not.( nnull .lt. 1 .or. nnull .ge. nobs .or. nobs .gt. lds 
     *.or. nobs .gt. ldq ))goto 23000
      info = -1
      return
23000 continue
      if(.not.( vmu .ne. 'v' .and. vmu .ne. 'm' .and. vmu .ne. 'u' ))
     *goto 23002
      info = -3
      return
23002 continue
      call dstup (s, lds, nobs, nnull, qraux, jpvt, y, q, ldq, nobs, 1, 
     *info, wk)
      if(.not.( info .ne. 0 ))goto 23004
      return
23004 continue
      call dcore (vmu, q, ldq, nobs, nnull, tol, y, job, limnla, nlaht, 
     *score, varht, info, wk, wk(2*nobs+1))
      if(.not.( info .ne. 0 ))goto 23006
      return
23006 continue
      call dcoef (s, lds, nobs, nnull, qraux, jpvt, y, q, ldq, nlaht, c,
     * d, info, wk)
      return
      end
SHAR_EOF
cat << \SHAR_EOF > dsms.f
      subroutine dsms (s, lds, nobs, nnull, jpvt, q, ldq, nlaht, sms, 
     *ldsms, wk, info)
      integer lds, nobs, nnull, jpvt(*), ldq, ldsms, info
      double precision s(lds,*), q(ldq,*), nlaht, sms(ldsms,*), wk(2,*)
      double precision dum, ddot
      integer i, j, n, n0
      info = 0
      if(.not.( nnull .lt. 1 .or. nnull .ge. nobs .or. nobs .gt. lds 
     *.or. nobs .gt. ldq .or. ldsms .lt. nnull ))goto 23000
      info = -1
      return
23000 continue
      n0 = nnull
      n = nobs - nnull
      call dcopy (n-2, q(n0+2,n0+1), ldq+1, wk, 1)
      j=1
23002 if(.not.(j.le.n0))goto 23004
      call dcopy (n, q(n0+1,j), 1, q(j,n0+1), ldq)
      call dqrsl (q(n0+2,n0+1), ldq, n-1, n-2, wk, q(n0+2,j), dum, q(n0+
     *2,j), dum, dum, dum, 01000, info)
      j=j+1
      goto 23002
23004 continue
      call dset (n, 10.d0 ** nlaht, wk(2,1), 2)
      call daxpy (n, 1.d0, q(n0+1,n0+1), ldq+1, wk(2,1), 2)
      call dcopy (n-1, q(n0+1,n0+2), ldq+1, wk(1,2), 2)
      call dpbfa (wk, 2, n, 1, info)
      if(.not.( info .ne. 0 ))goto 23005
      info = -2
      return
23005 continue
      j=1
23007 if(.not.(j.le.n0))goto 23009
      call dpbsl (wk, 2, n, 1, q(n0+1,j))
      j=j+1
      goto 23007
23009 continue
      call dcopy (n-2, q(n0+2,n0+1), ldq+1, wk, 1)
      j=1
23010 if(.not.(j.le.n0))goto 23012
      call dqrsl (q(n0+2,n0+1), ldq, n-1, n-2, wk, q(n0+2,j), q(n0+2,j),
     * dum, dum, dum, dum, 10000, info)
      j=j+1
      goto 23010
23012 continue
      i=1
23013 if(.not.(i.le.n0))goto 23015
      j=1
23016 if(.not.(j.lt.i))goto 23018
      sms(i,j) = sms(j,i)
      j=j+1
      goto 23016
23018 continue
      j=i
23019 if(.not.(j.le.n0))goto 23021
      sms(i,j) = q(j,i) - ddot (n, q(n0+1,j), 1, q(i,n0+1), ldq)
      j=j+1
      goto 23019
23021 continue
      sms(i,i) = sms(i,i) + 10.d0**nlaht
      i=i+1
      goto 23013
23015 continue
      j=1
23022 if(.not.(j.le.n0))goto 23024
      call dtrsl (s, lds, n0, sms(1,j), 01, info)
      j=j+1
      goto 23022
23024 continue
      i=1
23025 if(.not.(i.le.n0))goto 23027
      call dcopy (n0, sms(i,1), ldsms, wk, 1)
      call dtrsl (s, lds, n0, wk, 01, info)
      call dprmut (wk, n0, jpvt, 1)
      call dcopy (n0, wk, 1, sms(i,1), ldsms)
      i=i+1
      goto 23025
23027 continue
      j=1
23028 if(.not.(j.le.n0))goto 23030
      call dprmut (sms(1,j), n0, jpvt, 1)
      j=j+1
      goto 23028
23030 continue
      j=1
23031 if(.not.(j.le.n0))goto 23033
      call dcopy (n, q(j,n0+1), ldq, q(n0+1,j), 1)
      j=j+1
      goto 23031
23033 continue
      return
      end
SHAR_EOF
cat << \SHAR_EOF > dstup.f
      subroutine dstup (s, lds, nobs, nnull, qraux, jpvt, y, q, ldqr, 
     *ldqc, nq, info, work)
      integer lds, nobs, nnull, jpvt(*), ldqr, ldqc, nq, info
      double precision s(lds,*), y(*), qraux(*), q(ldqr,ldqc,*), work(*)
      double precision dum
      integer j
      info = 0
      if(.not.( nobs .lt. 1 .or. nobs .gt. lds .or. nobs .gt. ldqr .or. 
     *nobs .gt. ldqc ))goto 23000
      info = -1
      return
23000 continue
      j=1
23002 if(.not.(j.le.nnull))goto 23004
      jpvt(j) = 0
      j=j+1
      goto 23002
23004 continue
      call dqrdc (s, lds, nobs, nnull, qraux, jpvt, work, 1)
      call dqrsl (s, lds, nobs, nnull, qraux, y, dum, y, work, dum, dum,
     * 01100, info)
      if(.not.( info .ne. 0 ))goto 23005
      return
23005 continue
      j=1
23007 if(.not.(j.le.nq))goto 23009
      call dqrslm (s, lds, nobs, nnull, qraux, q(1,1,j), ldqr, 0, info, 
     *work)
      j=j+1
      goto 23007
23009 continue
      return
      end
SHAR_EOF
cat << \SHAR_EOF > dsytr.f
      subroutine dsytr (x, ldx, n, tol, info, work)
      integer ldx, n, info
      double precision x(ldx,*), tol, work(*)
      double precision nrmtot, nrmxj, alph, toltot, tolcum, toluni, dn, 
     *ddot
      integer j
      info = 0
      if(.not.( ldx .lt. n .or. n .le. 2 ))goto 23000
      info = -1
      return
23000 continue
      nrmtot = ddot (n, x, ldx+1, x, ldx+1)
      j=1 
23002 if(.not.(j.lt.n))goto 23004
      nrmtot = nrmtot + 2.d0 * ddot (n-j, x(j+1,j), 1, x(j+1,j), 1)
       j=j+1 
      goto 23002
23004 continue
      toltot = 1.d0
23005 if(.not.( 1.d0 + toltot .gt. 1.d0 ))goto 23006
      toltot = toltot / 2.d0
      goto 23005
23006 continue
      toltot = 4.d0 * toltot ** 2
      if(.not.( toltot .lt. tol ))goto 23007
      toltot = tol
23007 continue
      toltot = toltot * nrmtot
      dn = dfloat (n)
      toluni = toltot * 6.d0 / dn / ( dn - 1.d0 ) / ( 2.d0 * dn - 1.d0 )
      tolcum = 0.d0
      j=1 
23009 if(.not.(j.lt.n-1))goto 23011
      nrmtot = nrmtot - x(j,j) * x(j,j)
      nrmxj = ddot (n-j, x(j+1,j), 1, x(j+1,j), 1)
      dn = dfloat (n-j)
      tolcum = tolcum + toluni * dn * dn
      if(.not.( 2.d0 * nrmxj .le. tolcum ))goto 23012
      x(j,j+1) = 0.d0
      call dscal (n-j, 0.d0, x(j+1,j), 1)
      tolcum = tolcum - 2.d0 * nrmxj
      toltot = toltot - 2.d0 * nrmxj
      goto 23010
23012 continue
      if(.not.( x(j+1,j) .lt. 0.d0 ))goto 23014
      x(j,j+1) = dsqrt (nrmxj)
      goto 23015
23014 continue
      x(j,j+1) = - dsqrt (nrmxj)
23015 continue
      nrmtot = nrmtot - 2.d0 * nrmxj
      call dscal (n-j, -1.d0/x(j,j+1), x(j+1,j), 1)
      x(j+1,j) = 1.d0 + x(j+1,j)
      alph = 1.d0 / x(j+1,j)
      call dsymv ('l', n-j, alph, x(j+1,j+1), ldx, x(j+1,j), 1, 0.d0, 
     *work(j+1), 1)
      alph = - ddot (n-j, work(j+1), 1, x(j+1,j), 1) / 2.d0 / x(j+1,j)
      call daxpy (n-j, alph, x(j+1,j), 1, work(j+1), 1)
      call dsyr2 ('l', n-j, -1.d0, x(j+1,j), 1, work(j+1), 1, x(j+1,j+1)
     *, ldx)
23010  j=j+1 
      goto 23009
23011 continue
      x(n-1,n) = x(n,n-1)
      return
      end
SHAR_EOF
cat << \SHAR_EOF > dtrev.f
      subroutine dtrev (vmu, t, ldt, n, z, score, varht, info, work)
      character*1 vmu
      integer n, info
      double precision t(ldt,*), z(*), score, varht, work(*)
      double precision nume, deno, tmp, alph, la, dasum, ddot
      integer j
      info = 0
      if(.not.( vmu .ne. 'v' .and. vmu .ne. 'm' .and. vmu .ne. 'u' ))
     *goto 23000
      info = -3
      return
23000 continue
      la = t(1,1)
      alph = dfloat (n) / dasum (n, t(2,1), ldt)
      call dscal (n, alph, t(2,1), ldt)
      call dscal (n-1, alph, t(1,2), ldt)
      call dpbfa (t, ldt, n, 1, info)
      if(.not.( info .ne. 0 ))goto 23002
      return
23002 continue
      call dcopy (n, z, 1, work, 1)
      call dpbsl (t, ldt, n, 1, work)
      if(.not.( vmu .eq. 'v' ))goto 23004
      tmp = 1.d0 / t(2,n) / t(2,n)
      deno = tmp
      j=n-1
23006 if(.not.(j.gt.0))goto 23008
      tmp = ( 1.d0 + t(1,j+1) * t(1,j+1) * tmp ) / t(2,j) / t(2,j)
      deno = deno + tmp
      j=j-1
      goto 23006
23008 continue
      nume = ddot (n, work, 1, work, 1) / dfloat (n)
      deno = deno / dfloat (n)
      varht = alph * la * nume / deno
      score = nume / deno / deno
23004 continue
      if(.not.( vmu .eq. 'm' ))goto 23009
      deno = dlog (t(2,n))
      j=n-1
23011 if(.not.(j.gt.0))goto 23013
      deno = deno + dlog (t(2,j))
      j=j-1
      goto 23011
23013 continue
      nume = ddot (n, z, 1, work, 1) / dfloat (n)
      varht = alph * la * nume
      score = nume * dexp (2.d0 * deno / dfloat (n))
23009 continue
      if(.not.( vmu .eq. 'u' ))goto 23014
      nume = ddot (n, work, 1, work, 1) / dfloat (n)
      tmp = 1.d0 / t(2,n) / t(2,n)
      deno = tmp
      j=n-1
23016 if(.not.(j.gt.0))goto 23018
      tmp = ( 1.d0 + t(1,j+1) * t(1,j+1) * tmp ) / t(2,j) / t(2,j)
      deno = deno + tmp
      j=j-1
      goto 23016
23018 continue
      deno = deno / dfloat (n)
      score = alph * alph * la * la * nume - 2.d0 * varht * alph * la * 
     *deno
23014 continue
      return
      end
SHAR_EOF
cd ..
mkdir lib
cd lib
cat << \SHAR_EOF > dpbfa.f
      SUBROUTINE DPBFA(ABD,LDA,N,M,INFO)
      INTEGER LDA,N,M,INFO
      DOUBLE PRECISION ABD(LDA,1)
C
C     DPBFA FACTORS A DOUBLE PRECISION SYMMETRIC POSITIVE DEFINITE
C     MATRIX STORED IN BAND FORM.
C
C     DPBFA IS USUALLY CALLED BY DPBCO, BUT IT CAN BE CALLED
C     DIRECTLY WITH A SAVING IN TIME IF  RCOND  IS NOT NEEDED.
C
C     ON ENTRY
C
C        ABD     DOUBLE PRECISION(LDA, N)
C                THE MATRIX TO BE FACTORED.  THE COLUMNS OF THE UPPER
C                TRIANGLE ARE STORED IN THE COLUMNS OF ABD AND THE
C                DIAGONALS OF THE UPPER TRIANGLE ARE STORED IN THE
C                ROWS OF ABD .  SEE THE COMMENTS BELOW FOR DETAILS.
C
C        LDA     INTEGER
C                THE LEADING DIMENSION OF THE ARRAY  ABD .
C                LDA MUST BE .GE. M + 1 .
C
C        N       INTEGER
C                THE ORDER OF THE MATRIX  A .
C
C        M       INTEGER
C                THE NUMBER OF DIAGONALS ABOVE THE MAIN DIAGONAL.
C                0 .LE. M .LT. N .
C
C     ON RETURN
C
C        ABD     AN UPPER TRIANGULAR MATRIX  R , STORED IN BAND
C                FORM, SO THAT  A = TRANS(R)*R .
C
C        INFO    INTEGER
C                = 0  FOR NORMAL RETURN.
C                = K  IF THE LEADING MINOR OF ORDER  K  IS NOT
C                     POSITIVE DEFINITE.
C
C     BAND STORAGE
C
C           IF  A  IS A SYMMETRIC POSITIVE DEFINITE BAND MATRIX,
C           THE FOLLOWING PROGRAM SEGMENT WILL SET UP THE INPUT.
C
C                   M = (BAND WIDTH ABOVE DIAGONAL)
C                   DO 20 J = 1, N
C                      I1 = MAX0(1, J-M)
C                      DO 10 I = I1, J
C                         K = I-J+M+1
C                         ABD(K,J) = A(I,J)
C                10    CONTINUE
C                20 CONTINUE
C
C     LINPACK.  THIS VERSION DATED 08/14/78 .
C     CLEVE MOLER, UNIVERSITY OF NEW MEXICO, ARGONNE NATIONAL LAB.
C
C     SUBROUTINES AND FUNCTIONS
C
C     BLAS DDOT
C     FORTRAN MAX0,DSQRT
C
C     INTERNAL VARIABLES
C
      DOUBLE PRECISION DDOT,T
      DOUBLE PRECISION S
      INTEGER IK,J,JK,K,MU
C     BEGIN BLOCK WITH ...EXITS TO 40
C
C
         DO 30 J = 1, N
            INFO = J
            S = 0.0D0
            IK = M + 1
            JK = MAX0(J-M,1)
            MU = MAX0(M+2-J,1)
            IF (M .LT. MU) GO TO 20
            DO 10 K = MU, M
               T = ABD(K,J) - DDOT(K-MU,ABD(IK,JK),1,ABD(MU,J),1)
               T = T/ABD(M+1,JK)
               ABD(K,J) = T
               S = S + T*T
               IK = IK - 1
               JK = JK + 1
   10       CONTINUE
   20       CONTINUE
            S = ABD(M+1,J) - S
C     ......EXIT
            IF (S .LE. 0.0D0) GO TO 40
            ABD(M+1,J) = DSQRT(S)
   30    CONTINUE
         INFO = 0
   40 CONTINUE
      RETURN
      END
SHAR_EOF
cat << \SHAR_EOF > dpbsl.f
      SUBROUTINE DPBSL(ABD,LDA,N,M,B)
      INTEGER LDA,N,M
      DOUBLE PRECISION ABD(LDA,1),B(1)
C
C     DPBSL SOLVES THE DOUBLE PRECISION SYMMETRIC POSITIVE DEFINITE
C     BAND SYSTEM  A*X = B
C     USING THE FACTORS COMPUTED BY DPBCO OR DPBFA.
C
C     ON ENTRY
C
C        ABD     DOUBLE PRECISION(LDA, N)
C                THE OUTPUT FROM DPBCO OR DPBFA.
C
C        LDA     INTEGER
C                THE LEADING DIMENSION OF THE ARRAY  ABD .
C
C        N       INTEGER
C                THE ORDER OF THE MATRIX  A .
C
C        M       INTEGER
C                THE NUMBER OF DIAGONALS ABOVE THE MAIN DIAGONAL.
C
C        B       DOUBLE PRECISION(N)
C                THE RIGHT HAND SIDE VECTOR.
C
C     ON RETURN
C
C        B       THE SOLUTION VECTOR  X .
C
C     ERROR CONDITION
C
C        A DIVISION BY ZERO WILL OCCUR IF THE INPUT FACTOR CONTAINS
C        A ZERO ON THE DIAGONAL.  TECHNICALLY THIS INDICATES
C        SINGULARITY BUT IT IS USUALLY CAUSED BY IMPROPER SUBROUTINE
C        ARGUMENTS.  IT WILL NOT OCCUR IF THE SUBROUTINES ARE CALLED
C        CORRECTLY AND  INFO .EQ. 0 .
C
C     TO COMPUTE  INVERSE(A) * C  WHERE  C  IS A MATRIX
C     WITH  P  COLUMNS
C           CALL DPBCO(ABD,LDA,N,RCOND,Z,INFO)
C           IF (RCOND IS TOO SMALL .OR. INFO .NE. 0) GO TO ...
C           DO 10 J = 1, P
C              CALL DPBSL(ABD,LDA,N,C(1,J))
C        10 CONTINUE
C
C     LINPACK.  THIS VERSION DATED 08/14/78 .
C     CLEVE MOLER, UNIVERSITY OF NEW MEXICO, ARGONNE NATIONAL LAB.
C
C     SUBROUTINES AND FUNCTIONS
C
C     BLAS DAXPY,DDOT
C     FORTRAN MIN0
C
C     INTERNAL VARIABLES
C
      DOUBLE PRECISION DDOT,T
      INTEGER K,KB,LA,LB,LM
C
C     SOLVE TRANS(R)*Y = B
C
      DO 10 K = 1, N
         LM = MIN0(K-1,M)
         LA = M + 1 - LM
         LB = K - LM
         T = DDOT(LM,ABD(LA,K),1,B(LB),1)
         B(K) = (B(K) - T)/ABD(M+1,K)
   10 CONTINUE
C
C     SOLVE R*X = Y
C
      DO 20 KB = 1, N
         K = N + 1 - KB
         LM = MIN0(K-1,M)
         LA = M + 1 - LM
         LB = K - LM
         B(K) = B(K)/ABD(M+1,K)
         T = -B(K)
         CALL DAXPY(LM,T,ABD(LA,K),1,B(LB),1)
   20 CONTINUE
      RETURN
      END
SHAR_EOF
cat << \SHAR_EOF > dpofa.f
      SUBROUTINE DPOFA(A,LDA,N,INFO)
      INTEGER LDA,N,INFO
      DOUBLE PRECISION A(LDA,1)
C
C     DPOFA FACTORS A DOUBLE PRECISION SYMMETRIC POSITIVE DEFINITE
C     MATRIX.
C
C     DPOFA IS USUALLY CALLED BY DPOCO, BUT IT CAN BE CALLED
C     DIRECTLY WITH A SAVING IN TIME IF  RCOND  IS NOT NEEDED.
C     (TIME FOR DPOCO) = (1 + 18/N)*(TIME FOR DPOFA) .
C
C     ON ENTRY
C
C        A       DOUBLE PRECISION(LDA, N)
C                THE SYMMETRIC MATRIX TO BE FACTORED.  ONLY THE
C                DIAGONAL AND UPPER TRIANGLE ARE USED.
C
C        LDA     INTEGER
C                THE LEADING DIMENSION OF THE ARRAY  A .
C
C        N       INTEGER
C                THE ORDER OF THE MATRIX  A .
C
C     ON RETURN
C
C        A       AN UPPER TRIANGULAR MATRIX  R  SO THAT  A = TRANS(R)*R
C                WHERE  TRANS(R)  IS THE TRANSPOSE.
C                THE STRICT LOWER TRIANGLE IS UNALTERED.
C                IF  INFO .NE. 0 , THE FACTORIZATION IS NOT COMPLETE.
C
C        INFO    INTEGER
C                = 0  FOR NORMAL RETURN.
C                = K  SIGNALS AN ERROR CONDITION.  THE LEADING MINOR
C                     OF ORDER  K  IS NOT POSITIVE DEFINITE.
C
C     LINPACK.  THIS VERSION DATED 08/14/78 .
C     CLEVE MOLER, UNIVERSITY OF NEW MEXICO, ARGONNE NATIONAL LAB.
C
C     SUBROUTINES AND FUNCTIONS
C
C     BLAS DDOT
C     FORTRAN DSQRT
C
C     INTERNAL VARIABLES
C
      DOUBLE PRECISION DDOT,T
      DOUBLE PRECISION S
      INTEGER J,JM1,K
C     BEGIN BLOCK WITH ...EXITS TO 40
C
C
         DO 30 J = 1, N
            INFO = J
            S = 0.0D0
            JM1 = J - 1
            IF (JM1 .LT. 1) GO TO 20
            DO 10 K = 1, JM1
               T = A(K,J) - DDOT(K-1,A(1,K),1,A(1,J),1)
               T = T/A(K,K)
               A(K,J) = T
               S = S + T*T
   10       CONTINUE
   20       CONTINUE
            S = A(J,J) - S
C     ......EXIT
            IF (S .LE. 0.0D0) GO TO 40
            A(J,J) = DSQRT(S)
   30    CONTINUE
         INFO = 0
   40 CONTINUE
      RETURN
      END
SHAR_EOF
cat << \SHAR_EOF > dposl.f
      SUBROUTINE DPOSL(A,LDA,N,B)
      INTEGER LDA,N
      DOUBLE PRECISION A(LDA,1),B(1)
C
C     DPOSL SOLVES THE DOUBLE PRECISION SYMMETRIC POSITIVE DEFINITE
C     SYSTEM A * X = B
C     USING THE FACTORS COMPUTED BY DPOCO OR DPOFA.
C
C     ON ENTRY
C
C        A       DOUBLE PRECISION(LDA, N)
C                THE OUTPUT FROM DPOCO OR DPOFA.
C
C        LDA     INTEGER
C                THE LEADING DIMENSION OF THE ARRAY  A .
C
C        N       INTEGER
C                THE ORDER OF THE MATRIX  A .
C
C        B       DOUBLE PRECISION(N)
C                THE RIGHT HAND SIDE VECTOR.
C
C     ON RETURN
C
C        B       THE SOLUTION VECTOR  X .
C
C     ERROR CONDITION
C
C        A DIVISION BY ZERO WILL OCCUR IF THE INPUT FACTOR CONTAINS
C        A ZERO ON THE DIAGONAL.  TECHNICALLY THIS INDICATES
C        SINGULARITY BUT IT IS USUALLY CAUSED BY IMPROPER SUBROUTINE
C        ARGUMENTS.  IT WILL NOT OCCUR IF THE SUBROUTINES ARE CALLED
C        CORRECTLY AND  INFO .EQ. 0 .
C
C     TO COMPUTE  INVERSE(A) * C  WHERE  C  IS A MATRIX
C     WITH  P  COLUMNS
C           CALL DPOCO(A,LDA,N,RCOND,Z,INFO)
C           IF (RCOND IS TOO SMALL .OR. INFO .NE. 0) GO TO ...
C           DO 10 J = 1, P
C              CALL DPOSL(A,LDA,N,C(1,J))
C        10 CONTINUE
C
C     LINPACK.  THIS VERSION DATED 08/14/78 .
C     CLEVE MOLER, UNIVERSITY OF NEW MEXICO, ARGONNE NATIONAL LAB.
C
C     SUBROUTINES AND FUNCTIONS
C
C     BLAS DAXPY,DDOT
C
C     INTERNAL VARIABLES
C
      DOUBLE PRECISION DDOT,T
      INTEGER K,KB
C
C     SOLVE TRANS(R)*Y = B
C
      DO 10 K = 1, N
         T = DDOT(K-1,A(1,K),1,B(1),1)
         B(K) = (B(K) - T)/A(K,K)
   10 CONTINUE
C
C     SOLVE R*X = Y
C
      DO 20 KB = 1, N
         K = N + 1 - KB
         B(K) = B(K)/A(K,K)
         T = -B(K)
         CALL DAXPY(K-1,T,A(1,K),1,B(1),1)
   20 CONTINUE
      RETURN
      END
SHAR_EOF
cat << \SHAR_EOF > dqrdc.f
      SUBROUTINE DQRDC(X,LDX,N,P,QRAUX,JPVT,WORK,JOB)
      INTEGER LDX,N,P,JOB
      INTEGER JPVT(1)
      DOUBLE PRECISION X(LDX,1),QRAUX(1),WORK(1)
C
C     DQRDC USES HOUSEHOLDER TRANSFORMATIONS TO COMPUTE THE QR
C     FACTORIZATION OF AN N BY P MATRIX X.  COLUMN PIVOTING
C     BASED ON THE 2-NORMS OF THE REDUCED COLUMNS MAY BE
C     PERFORMED AT THE USERS OPTION.
C
C     ON ENTRY
C
C        X       DOUBLE PRECISION(LDX,P), WHERE LDX .GE. N.
C                X CONTAINS THE MATRIX WHOSE DECOMPOSITION IS TO BE
C                COMPUTED.
C
C        LDX     INTEGER.
C                LDX IS THE LEADING DIMENSION OF THE ARRAY X.
C
C        N       INTEGER.
C                N IS THE NUMBER OF ROWS OF THE MATRIX X.
C
C        P       INTEGER.
C                P IS THE NUMBER OF COLUMNS OF THE MATRIX X.
C
C        JPVT    INTEGER(P).
C                JPVT CONTAINS INTEGERS THAT CONTROL THE SELECTION
C                OF THE PIVOT COLUMNS.  THE K-TH COLUMN X(K) OF X
C                IS PLACED IN ONE OF THREE CLASSES ACCORDING TO THE
C                VALUE OF JPVT(K).
C
C                   IF JPVT(K) .GT. 0, THEN X(K) IS AN INITIAL
C                                      COLUMN.
C
C                   IF JPVT(K) .EQ. 0, THEN X(K) IS A FREE COLUMN.
C
C                   IF JPVT(K) .LT. 0, THEN X(K) IS A FINAL COLUMN.
C
C                BEFORE THE DECOMPOSITION IS COMPUTED, INITIAL COLUMNS
C                ARE MOVED TO THE BEGINNING OF THE ARRAY X AND FINAL
C                COLUMNS TO THE END.  BOTH INITIAL AND FINAL COLUMNS
C                ARE FROZEN IN PLACE DURING THE COMPUTATION AND ONLY
C                FREE COLUMNS ARE MOVED.  AT THE K-TH STAGE OF THE
C                REDUCTION, IF X(K) IS OCCUPIED BY A FREE COLUMN
C                IT IS INTERCHANGED WITH THE FREE COLUMN OF LARGEST
C                REDUCED NORM.  JPVT IS NOT REFERENCED IF
C                JOB .EQ. 0.
C
C        WORK    DOUBLE PRECISION(P).
C                WORK IS A WORK ARRAY.  WORK IS NOT REFERENCED IF
C                JOB .EQ. 0.
C
C        JOB     INTEGER.
C                JOB IS AN INTEGER THAT INITIATES COLUMN PIVOTING.
C                IF JOB .EQ. 0, NO PIVOTING IS DONE.
C                IF JOB .NE. 0, PIVOTING IS DONE.
C
C     ON RETURN
C
C        X       X CONTAINS IN ITS UPPER TRIANGLE THE UPPER
C                TRIANGULAR MATRIX R OF THE QR FACTORIZATION.
C                BELOW ITS DIAGONAL X CONTAINS INFORMATION FROM
C                WHICH THE ORTHOGONAL PART OF THE DECOMPOSITION
C                CAN BE RECOVERED.  NOTE THAT IF PIVOTING HAS
C                BEEN REQUESTED, THE DECOMPOSITION IS NOT THAT
C                OF THE ORIGINAL MATRIX X BUT THAT OF X
C                WITH ITS COLUMNS PERMUTED AS DESCRIBED BY JPVT.
C
C        QRAUX   DOUBLE PRECISION(P).
C                QRAUX CONTAINS FURTHER INFORMATION REQUIRED TO RECOVER
C                THE ORTHOGONAL PART OF THE DECOMPOSITION.
C
C        JPVT    JPVT(K) CONTAINS THE INDEX OF THE COLUMN OF THE
C                ORIGINAL MATRIX THAT HAS BEEN INTERCHANGED INTO
C                THE K-TH COLUMN, IF PIVOTING WAS REQUESTED.
C
C     LINPACK. THIS VERSION DATED 08/14/78 .
C     G.W. STEWART, UNIVERSITY OF MARYLAND, ARGONNE NATIONAL LAB.
C
C     DQRDC USES THE FOLLOWING FUNCTIONS AND SUBPROGRAMS.
C
C     BLAS DAXPY,DDOT,DSCAL,DSWAP,DNRM2
C     FORTRAN DABS,DMAX1,MIN0,DSQRT
C
C     INTERNAL VARIABLES
C
      INTEGER J,JP,L,LP1,LUP,MAXJ,PL,PU
      DOUBLE PRECISION MAXNRM,DNRM2,TT
      DOUBLE PRECISION DDOT,NRMXL,T
      LOGICAL NEGJ,SWAPJ
C
C
      PL = 1
      PU = 0
      IF (JOB .EQ. 0) GO TO 60
C
C        PIVOTING HAS BEEN REQUESTED.  REARRANGE THE COLUMNS
C        ACCORDING TO JPVT.
C
         DO 20 J = 1, P
            SWAPJ = JPVT(J) .GT. 0
            NEGJ = JPVT(J) .LT. 0
            JPVT(J) = J
            IF (NEGJ) JPVT(J) = -J
            IF (.NOT.SWAPJ) GO TO 10
               IF (J .NE. PL) CALL DSWAP(N,X(1,PL),1,X(1,J),1)
               JPVT(J) = JPVT(PL)
               JPVT(PL) = J
               PL = PL + 1
   10       CONTINUE
   20    CONTINUE
         PU = P
         DO 50 JJ = 1, P
            J = P - JJ + 1
            IF (JPVT(J) .GE. 0) GO TO 40
               JPVT(J) = -JPVT(J)
               IF (J .EQ. PU) GO TO 30
                  CALL DSWAP(N,X(1,PU),1,X(1,J),1)
                  JP = JPVT(PU)
                  JPVT(PU) = JPVT(J)
                  JPVT(J) = JP
   30          CONTINUE
               PU = PU - 1
   40       CONTINUE
   50    CONTINUE
   60 CONTINUE
C
C     COMPUTE THE NORMS OF THE FREE COLUMNS.
C
      IF (PU .LT. PL) GO TO 80
      DO 70 J = PL, PU
         QRAUX(J) = DNRM2(N,X(1,J),1)
         WORK(J) = QRAUX(J)
   70 CONTINUE
   80 CONTINUE
C
C     PERFORM THE HOUSEHOLDER REDUCTION OF X.
C
      LUP = MIN0(N,P)
      DO 200 L = 1, LUP
         IF (L .LT. PL .OR. L .GE. PU) GO TO 120
C
C           LOCATE THE COLUMN OF LARGEST NORM AND BRING IT
C           INTO THE PIVOT POSITION.
C
            MAXNRM = 0.0D0
            MAXJ = L
            DO 100 J = L, PU
               IF (QRAUX(J) .LE. MAXNRM) GO TO 90
                  MAXNRM = QRAUX(J)
                  MAXJ = J
   90          CONTINUE
  100       CONTINUE
            IF (MAXJ .EQ. L) GO TO 110
               CALL DSWAP(N,X(1,L),1,X(1,MAXJ),1)
               QRAUX(MAXJ) = QRAUX(L)
               WORK(MAXJ) = WORK(L)
               JP = JPVT(MAXJ)
               JPVT(MAXJ) = JPVT(L)
               JPVT(L) = JP
  110       CONTINUE
  120    CONTINUE
         QRAUX(L) = 0.0D0
         IF (L .EQ. N) GO TO 190
C
C           COMPUTE THE HOUSEHOLDER TRANSFORMATION FOR COLUMN L.
C
            NRMXL = DNRM2(N-L+1,X(L,L),1)
            IF (NRMXL .EQ. 0.0D0) GO TO 180
               IF (X(L,L) .NE. 0.0D0) NRMXL = DSIGN(NRMXL,X(L,L))
               CALL DSCAL(N-L+1,1.0D0/NRMXL,X(L,L),1)
               X(L,L) = 1.0D0 + X(L,L)
C
C              APPLY THE TRANSFORMATION TO THE REMAINING COLUMNS,
C              UPDATING THE NORMS.
C
               LP1 = L + 1
               IF (P .LT. LP1) GO TO 170
               DO 160 J = LP1, P
                  T = -DDOT(N-L+1,X(L,L),1,X(L,J),1)/X(L,L)
                  CALL DAXPY(N-L+1,T,X(L,L),1,X(L,J),1)
                  IF (J .LT. PL .OR. J .GT. PU) GO TO 150
                  IF (QRAUX(J) .EQ. 0.0D0) GO TO 150
                     TT = 1.0D0 - (DABS(X(L,J))/QRAUX(J))**2
                     TT = DMAX1(TT,0.0D0)
                     T = TT
                     TT = 1.0D0 + 0.05D0*TT*(QRAUX(J)/WORK(J))**2
                     IF (TT .EQ. 1.0D0) GO TO 130
                        QRAUX(J) = QRAUX(J)*DSQRT(T)
                     GO TO 140
  130                CONTINUE
                        QRAUX(J) = DNRM2(N-L,X(L+1,J),1)
                        WORK(J) = QRAUX(J)
  140                CONTINUE
  150             CONTINUE
  160          CONTINUE
  170          CONTINUE
C
C              SAVE THE TRANSFORMATION.
C
               QRAUX(L) = X(L,L)
               X(L,L) = -NRMXL
  180       CONTINUE
  190    CONTINUE
  200 CONTINUE
      RETURN
      END
SHAR_EOF
cat << \SHAR_EOF > dqrsl.f
      SUBROUTINE DQRSL(X,LDX,N,K,QRAUX,Y,QY,QTY,B,RSD,XB,JOB,INFO)
      INTEGER LDX,N,K,JOB,INFO
      DOUBLE PRECISION X(LDX,1),QRAUX(1),Y(1),QY(1),QTY(1),B(1),RSD(1),
     *                 XB(1)
C
C     DQRSL APPLIES THE OUTPUT OF DQRDC TO COMPUTE COORDINATE
C     TRANSFORMATIONS, PROJECTIONS, AND LEAST SQUARES SOLUTIONS.
C     FOR K .LE. MIN(N,P), LET XK BE THE MATRIX
C
C            XK = (X(JPVT(1)),X(JPVT(2)), ... ,X(JPVT(K)))
C
C     FORMED FROM COLUMNNS JPVT(1), ... ,JPVT(K) OF THE ORIGINAL
C     N X P MATRIX X THAT WAS INPUT TO DQRDC (IF NO PIVOTING WAS
C     DONE, XK CONSISTS OF THE FIRST K COLUMNS OF X IN THEIR
C     ORIGINAL ORDER).  DQRDC PRODUCES A FACTORED ORTHOGONAL MATRIX Q
C     AND AN UPPER TRIANGULAR MATRIX R SUCH THAT
C
C              XK = Q * (R)
C                       (0)
C
C     THIS INFORMATION IS CONTAINED IN CODED FORM IN THE ARRAYS
C     X AND QRAUX.
C
C     ON ENTRY
C
C        X      DOUBLE PRECISION(LDX,P).
C               X CONTAINS THE OUTPUT OF DQRDC.
C
C        LDX    INTEGER.
C               LDX IS THE LEADING DIMENSION OF THE ARRAY X.
C
C        N      INTEGER.
C               N IS THE NUMBER OF ROWS OF THE MATRIX XK.  IT MUST
C               HAVE THE SAME VALUE AS N IN DQRDC.
C
C        K      INTEGER.
C               K IS THE NUMBER OF COLUMNS OF THE MATRIX XK.  K
C               MUST NNOT BE GREATER THAN MIN(N,P), WHERE P IS THE
C               SAME AS IN THE CALLING SEQUENCE TO DQRDC.
C
C        QRAUX  DOUBLE PRECISION(P).
C               QRAUX CONTAINS THE AUXILIARY OUTPUT FROM DQRDC.
C
C        Y      DOUBLE PRECISION(N)
C               Y CONTAINS AN N-VECTOR THAT IS TO BE MANIPULATED
C               BY DQRSL.
C
C        JOB    INTEGER.
C               JOB SPECIFIES WHAT IS TO BE COMPUTED.  JOB HAS
C               THE DECIMAL EXPANSION ABCDE, WITH THE FOLLOWING
C               MEANING.
C
C                    IF A.NE.0, COMPUTE QY.
C                    IF B,C,D, OR E .NE. 0, COMPUTE QTY.
C                    IF C.NE.0, COMPUTE B.
C                    IF D.NE.0, COMPUTE RSD.
C                    IF E.NE.0, COMPUTE XB.
C
C               NOTE THAT A REQUEST TO COMPUTE B, RSD, OR XB
C               AUTOMATICALLY TRIGGERS THE COMPUTATION OF QTY, FOR
C               WHICH AN ARRAY MUST BE PROVIDED IN THE CALLING
C               SEQUENCE.
C
C     ON RETURN
C
C        QY     DOUBLE PRECISION(N).
C               QY CONNTAINS Q*Y, IF ITS COMPUTATION HAS BEEN
C               REQUESTED.
C
C        QTY    DOUBLE PRECISION(N).
C               QTY CONTAINS TRANS(Q)*Y, IF ITS COMPUTATION HAS
C               BEEN REQUESTED.  HERE TRANS(Q) IS THE
C               TRANSPOSE OF THE MATRIX Q.
C
C        B      DOUBLE PRECISION(K)
C               B CONTAINS THE SOLUTION OF THE LEAST SQUARES PROBLEM
C
C                    MINIMIZE NORM2(Y - XK*B),
C
C               IF ITS COMPUTATION HAS BEEN REQUESTED.  (NOTE THAT
C               IF PIVOTING WAS REQUESTED IN DQRDC, THE J-TH
C               COMPONENT OF B WILL BE ASSOCIATED WITH COLUMN JPVT(J)
C               OF THE ORIGINAL MATRIX X THAT WAS INPUT INTO DQRDC.)
C
C        RSD    DOUBLE PRECISION(N).
C               RSD CONTAINS THE LEAST SQUARES RESIDUAL Y - XK*B,
C               IF ITS COMPUTATION HAS BEEN REQUESTED.  RSD IS
C               ALSO THE ORTHOGONAL PROJECTION OF Y ONTO THE
C               ORTHOGONAL COMPLEMENT OF THE COLUMN SPACE OF XK.
C
C        XB     DOUBLE PRECISION(N).
C               XB CONTAINS THE LEAST SQUARES APPROXIMATION XK*B,
C               IF ITS COMPUTATION HAS BEEN REQUESTED.  XB IS ALSO
C               THE ORTHOGONAL PROJECTION OF Y ONTO THE COLUMN SPACE
C               OF X.
C
C        INFO   INTEGER.
C               INFO IS ZERO UNLESS THE COMPUTATION OF B HAS
C               BEEN REQUESTED AND R IS EXACTLY SINGULAR.  IN
C               THIS CASE, INFO IS THE INDEX OF THE FIRST ZERO
C               DIAGONAL ELEMENT OF R AND B IS LEFT UNALTERED.
C
C     THE PARAMETERS QY, QTY, B, RSD, AND XB ARE NOT REFERENCED
C     IF THEIR COMPUTATION IS NOT REQUESTED AND IN THIS CASE
C     CAN BE REPLACED BY DUMMY VARIABLES IN THE CALLING PROGRAM.
C     TO SAVE STORAGE, THE USER MAY IN SOME CASES USE THE SAME
C     ARRAY FOR DIFFERENT PARAMETERS IN THE CALLING SEQUENCE.  A
C     FREQUENTLY OCCURING EXAMPLE IS WHEN ONE WISHES TO COMPUTE
C     ANY OF B, RSD, OR XB AND DOES NOT NEED Y OR QTY.  IN THIS
C     CASE ONE MAY IDENTIFY Y, QTY, AND ONE OF B, RSD, OR XB, WHILE
C     PROVIDING SEPARATE ARRAYS FOR ANYTHING ELSE THAT IS TO BE
C     COMPUTED.  THUS THE CALLING SEQUENCE
C
C          CALL DQRSL(X,LDX,N,K,QRAUX,Y,DUM,Y,B,Y,DUM,110,INFO)
C
C     WILL RESULT IN THE COMPUTATION OF B AND RSD, WITH RSD
C     OVERWRITING Y.  MORE GENERALLY, EACH ITEM IN THE FOLLOWING
C     LIST CONTAINS GROUPS OF PERMISSIBLE IDENTIFICATIONS FOR
C     A SINGLE CALLINNG SEQUENCE.
C
C          1. (Y,QTY,B) (RSD) (XB) (QY)
C
C          2. (Y,QTY,RSD) (B) (XB) (QY)
C
C          3. (Y,QTY,XB) (B) (RSD) (QY)
C
C          4. (Y,QY) (QTY,B) (RSD) (XB)
C
C          5. (Y,QY) (QTY,RSD) (B) (XB)
C
C          6. (Y,QY) (QTY,XB) (B) (RSD)
C
C     IN ANY GROUP THE VALUE RETURNED IN THE ARRAY ALLOCATED TO
C     THE GROUP CORRESPONDS TO THE LAST MEMBER OF THE GROUP.
C
C     LINPACK. THIS VERSION DATED 08/14/78 .
C     G.W. STEWART, UNIVERSITY OF MARYLAND, ARGONNE NATIONAL LAB.
C
C     DQRSL USES THE FOLLOWING FUNCTIONS AND SUBPROGRAMS.
C
C     BLAS DAXPY,DCOPY,DDOT
C     FORTRAN DABS,MIN0,MOD
C
C     INTERNAL VARIABLES
C
      INTEGER I,J,JJ,JU,KP1
      DOUBLE PRECISION DDOT,T,TEMP
      LOGICAL CB,CQY,CQTY,CR,CXB
C
C
C     SET INFO FLAG.
C
      INFO = 0
C
C     DETERMINE WHAT IS TO BE COMPUTED.
C
      CQY = JOB/10000 .NE. 0
      CQTY = MOD(JOB,10000) .NE. 0
      CB = MOD(JOB,1000)/100 .NE. 0
      CR = MOD(JOB,100)/10 .NE. 0
      CXB = MOD(JOB,10) .NE. 0
      JU = MIN0(K,N-1)
C
C     SPECIAL ACTION WHEN N=1.
C
      IF (JU .NE. 0) GO TO 40
         IF (CQY) QY(1) = Y(1)
         IF (CQTY) QTY(1) = Y(1)
         IF (CXB) XB(1) = Y(1)
         IF (.NOT.CB) GO TO 30
            IF (X(1,1) .NE. 0.0D0) GO TO 10
               INFO = 1
            GO TO 20
   10       CONTINUE
               B(1) = Y(1)/X(1,1)
   20       CONTINUE
   30    CONTINUE
         IF (CR) RSD(1) = 0.0D0
      GO TO 250
   40 CONTINUE
C
C        SET UP TO COMPUTE QY OR QTY.
C
         IF (CQY) CALL DCOPY(N,Y,1,QY,1)
         IF (CQTY) CALL DCOPY(N,Y,1,QTY,1)
         IF (.NOT.CQY) GO TO 70
C
C           COMPUTE QY.
C
            DO 60 JJ = 1, JU
               J = JU - JJ + 1
               IF (QRAUX(J) .EQ. 0.0D0) GO TO 50
                  TEMP = X(J,J)
                  X(J,J) = QRAUX(J)
                  T = -DDOT(N-J+1,X(J,J),1,QY(J),1)/X(J,J)
                  CALL DAXPY(N-J+1,T,X(J,J),1,QY(J),1)
                  X(J,J) = TEMP
   50          CONTINUE
   60       CONTINUE
   70    CONTINUE
         IF (.NOT.CQTY) GO TO 100
C
C           COMPUTE TRANS(Q)*Y.
C
            DO 90 J = 1, JU
               IF (QRAUX(J) .EQ. 0.0D0) GO TO 80
                  TEMP = X(J,J)
                  X(J,J) = QRAUX(J)
                  T = -DDOT(N-J+1,X(J,J),1,QTY(J),1)/X(J,J)
                  CALL DAXPY(N-J+1,T,X(J,J),1,QTY(J),1)
                  X(J,J) = TEMP
   80          CONTINUE
   90       CONTINUE
  100    CONTINUE
C
C        SET UP TO COMPUTE B, RSD, OR XB.
C
         IF (CB) CALL DCOPY(K,QTY,1,B,1)
         KP1 = K + 1
         IF (CXB) CALL DCOPY(K,QTY,1,XB,1)
         IF (CR .AND. K .LT. N) CALL DCOPY(N-K,QTY(KP1),1,RSD(KP1),1)
         IF (.NOT.CXB .OR. KP1 .GT. N) GO TO 120
            DO 110 I = KP1, N
               XB(I) = 0.0D0
  110       CONTINUE
  120    CONTINUE
         IF (.NOT.CR) GO TO 140
            DO 130 I = 1, K
               RSD(I) = 0.0D0
  130       CONTINUE
  140    CONTINUE
         IF (.NOT.CB) GO TO 190
C
C           COMPUTE B.
C
            DO 170 JJ = 1, K
               J = K - JJ + 1
               IF (X(J,J) .NE. 0.0D0) GO TO 150
                  INFO = J
C           ......EXIT
                  GO TO 180
  150          CONTINUE
               B(J) = B(J)/X(J,J)
               IF (J .EQ. 1) GO TO 160
                  T = -B(J)
                  CALL DAXPY(J-1,T,X(1,J),1,B,1)
  160          CONTINUE
  170       CONTINUE
  180       CONTINUE
  190    CONTINUE
         IF (.NOT.CR .AND. .NOT.CXB) GO TO 240
C
C           COMPUTE RSD OR XB AS REQUIRED.
C
            DO 230 JJ = 1, JU
               J = JU - JJ + 1
               IF (QRAUX(J) .EQ. 0.0D0) GO TO 220
                  TEMP = X(J,J)
                  X(J,J) = QRAUX(J)
                  IF (.NOT.CR) GO TO 200
                     T = -DDOT(N-J+1,X(J,J),1,RSD(J),1)/X(J,J)
                     CALL DAXPY(N-J+1,T,X(J,J),1,RSD(J),1)
  200             CONTINUE
                  IF (.NOT.CXB) GO TO 210
                     T = -DDOT(N-J+1,X(J,J),1,XB(J),1)/X(J,J)
                     CALL DAXPY(N-J+1,T,X(J,J),1,XB(J),1)
  210             CONTINUE
                  X(J,J) = TEMP
  220          CONTINUE
  230       CONTINUE
  240    CONTINUE
  250 CONTINUE
      RETURN
      END
SHAR_EOF
cat << \SHAR_EOF > dtrsl.f
      SUBROUTINE DTRSL(T,LDT,N,B,JOB,INFO)
      INTEGER LDT,N,JOB,INFO
      DOUBLE PRECISION T(LDT,1),B(1)
C
C
C     DTRSL SOLVES SYSTEMS OF THE FORM
C
C                   T * X = B
C     OR
C                   TRANS(T) * X = B
C
C     WHERE T IS A TRIANGULAR MATRIX OF ORDER N. HERE TRANS(T)
C     DENOTES THE TRANSPOSE OF THE MATRIX T.
C
C     ON ENTRY
C
C         T         DOUBLE PRECISION(LDT,N)
C                   T CONTAINS THE MATRIX OF THE SYSTEM. THE ZERO
C                   ELEMENTS OF THE MATRIX ARE NOT REFERENCED, AND
C                   THE CORRESPONDING ELEMENTS OF THE ARRAY CAN BE
C                   USED TO STORE OTHER INFORMATION.
C
C         LDT       INTEGER
C                   LDT IS THE LEADING DIMENSION OF THE ARRAY T.
C
C         N         INTEGER
C                   N IS THE ORDER OF THE SYSTEM.
C
C         B         DOUBLE PRECISION(N).
C                   B CONTAINS THE RIGHT HAND SIDE OF THE SYSTEM.
C
C         JOB       INTEGER
C                   JOB SPECIFIES WHAT KIND OF SYSTEM IS TO BE SOLVED.
C                   IF JOB IS
C
C                        00   SOLVE T*X=B, T LOWER TRIANGULAR,
C                        01   SOLVE T*X=B, T UPPER TRIANGULAR,
C                        10   SOLVE TRANS(T)*X=B, T LOWER TRIANGULAR,
C                        11   SOLVE TRANS(T)*X=B, T UPPER TRIANGULAR.
C
C     ON RETURN
C
C         B         B CONTAINS THE SOLUTION, IF INFO .EQ. 0.
C                   OTHERWISE B IS UNALTERED.
C
C         INFO      INTEGER
C                   INFO CONTAINS ZERO IF THE SYSTEM IS NONSINGULAR.
C                   OTHERWISE INFO CONTAINS THE INDEX OF
C                   THE FIRST ZERO DIAGONAL ELEMENT OF T.
C
C     LINPACK. THIS VERSION DATED 08/14/78 .
C     G. W. STEWART, UNIVERSITY OF MARYLAND, ARGONNE NATIONAL LAB.
C
C     SUBROUTINES AND FUNCTIONS
C
C     BLAS DAXPY,DDOT
C     FORTRAN MOD
C
C     INTERNAL VARIABLES
C
      DOUBLE PRECISION DDOT,TEMP
      INTEGER CASE,J,JJ
C
C     BEGIN BLOCK PERMITTING ...EXITS TO 150
C
C        CHECK FOR ZERO DIAGONAL ELEMENTS.
C
         DO 10 INFO = 1, N
C     ......EXIT
            IF (T(INFO,INFO) .EQ. 0.0D0) GO TO 150
   10    CONTINUE
         INFO = 0
C
C        DETERMINE THE TASK AND GO TO IT.
C
         CASE = 1
         IF (MOD(JOB,10) .NE. 0) CASE = 2
         IF (MOD(JOB,100)/10 .NE. 0) CASE = CASE + 2
         GO TO (20,50,80,110), CASE
C
C        SOLVE T*X=B FOR T LOWER TRIANGULAR
C
   20    CONTINUE
            B(1) = B(1)/T(1,1)
            IF (N .LT. 2) GO TO 40
            DO 30 J = 2, N
               TEMP = -B(J-1)
               CALL DAXPY(N-J+1,TEMP,T(J,J-1),1,B(J),1)
               B(J) = B(J)/T(J,J)
   30       CONTINUE
   40       CONTINUE
         GO TO 140
C
C        SOLVE T*X=B FOR T UPPER TRIANGULAR.
C
   50    CONTINUE
            B(N) = B(N)/T(N,N)
            IF (N .LT. 2) GO TO 70
            DO 60 JJ = 2, N
               J = N - JJ + 1
               TEMP = -B(J+1)
               CALL DAXPY(J,TEMP,T(1,J+1),1,B(1),1)
               B(J) = B(J)/T(J,J)
   60       CONTINUE
   70       CONTINUE
         GO TO 140
C
C        SOLVE TRANS(T)*X=B FOR T LOWER TRIANGULAR.
C
   80    CONTINUE
            B(N) = B(N)/T(N,N)
            IF (N .LT. 2) GO TO 100
            DO 90 JJ = 2, N
               J = N - JJ + 1
               B(J) = B(J) - DDOT(JJ-1,T(J+1,J),1,B(J+1),1)
               B(J) = B(J)/T(J,J)
   90       CONTINUE
  100       CONTINUE
         GO TO 140
C
C        SOLVE TRANS(T)*X=B FOR T UPPER TRIANGULAR.
C
  110    CONTINUE
            B(1) = B(1)/T(1,1)
            IF (N .LT. 2) GO TO 130
            DO 120 J = 2, N
               B(J) = B(J) - DDOT(J-1,T(1,J),1,B(1),1)
               B(J) = B(J)/T(J,J)
  120       CONTINUE
  130       CONTINUE
  140    CONTINUE
  150 CONTINUE
      RETURN
      END
SHAR_EOF
cat << \SHAR_EOF > dasum.f
      DOUBLE PRECISION FUNCTION DASUM(N,DX,INCX)
C
C     TAKES THE SUM OF THE ABSOLUTE VALUES.
C     JACK DONGARRA, LINPACK, 3/11/78.
C
      DOUBLE PRECISION DX(1),DTEMP
      INTEGER I,INCX,M,MP1,N,NINCX
C
      DASUM = 0.0D0
      DTEMP = 0.0D0
      IF(N.LE.0)RETURN
      IF(INCX.EQ.1)GO TO 20
C
C        CODE FOR INCREMENT NOT EQUAL TO 1
C
      NINCX = N*INCX
      DO 10 I = 1,NINCX,INCX
        DTEMP = DTEMP + DABS(DX(I))
   10 CONTINUE
      DASUM = DTEMP
      RETURN
C
C        CODE FOR INCREMENT EQUAL TO 1
C
C
C        CLEAN-UP LOOP
C
   20 M = MOD(N,6)
      IF( M .EQ. 0 ) GO TO 40
      DO 30 I = 1,M
        DTEMP = DTEMP + DABS(DX(I))
   30 CONTINUE
      IF( N .LT. 6 ) GO TO 60
   40 MP1 = M + 1
      DO 50 I = MP1,N,6
        DTEMP = DTEMP + DABS(DX(I)) + DABS(DX(I + 1)) + DABS(DX(I + 2))
     *  + DABS(DX(I + 3)) + DABS(DX(I + 4)) + DABS(DX(I + 5))
   50 CONTINUE
   60 DASUM = DTEMP
      RETURN
      END
SHAR_EOF
cat << \SHAR_EOF > daxpy.f
      SUBROUTINE DAXPY(N,DA,DX,INCX,DY,INCY)
C
C     CONSTANT TIMES A VECTOR PLUS A VECTOR.
C     USES UNROLLED LOOPS FOR INCREMENTS EQUAL TO ONE.
C     JACK DONGARRA, LINPACK, 3/11/78.
C
      DOUBLE PRECISION DX(1),DY(1),DA
      INTEGER I,INCX,INCY,M,MP1,N
C
      IF(N.LE.0)RETURN
      IF (DA .EQ. 0.0D0) RETURN
      IF(INCX.EQ.1.AND.INCY.EQ.1)GO TO 20
C
C        CODE FOR UNEQUAL INCREMENTS OR EQUAL INCREMENTS
C          NOT EQUAL TO 1
C
      IX = 1
      IY = 1
      IF(INCX.LT.0)IX = (-N+1)*INCX + 1
      IF(INCY.LT.0)IY = (-N+1)*INCY + 1
      DO 10 I = 1,N
        DY(IY) = DY(IY) + DA*DX(IX)
        IX = IX + INCX
        IY = IY + INCY
   10 CONTINUE
      RETURN
C
C        CODE FOR BOTH INCREMENTS EQUAL TO 1
C
C
C        CLEAN-UP LOOP
C
   20 M = MOD(N,4)
      IF( M .EQ. 0 ) GO TO 40
      DO 30 I = 1,M
        DY(I) = DY(I) + DA*DX(I)
   30 CONTINUE
      IF( N .LT. 4 ) RETURN
   40 MP1 = M + 1
      DO 50 I = MP1,N,4
        DY(I) = DY(I) + DA*DX(I)
        DY(I + 1) = DY(I + 1) + DA*DX(I + 1)
        DY(I + 2) = DY(I + 2) + DA*DX(I + 2)
        DY(I + 3) = DY(I + 3) + DA*DX(I + 3)
   50 CONTINUE
      RETURN
      END
SHAR_EOF
cat << \SHAR_EOF > dcopy.f
      SUBROUTINE  DCOPY(N,DX,INCX,DY,INCY)
C
C     COPIES A VECTOR, X, TO A VECTOR, Y.
C     USES UNROLLED LOOPS FOR INCREMENTS EQUAL TO ONE.
C     JACK DONGARRA, LINPACK, 3/11/78.
C
      DOUBLE PRECISION DX(1),DY(1)
      INTEGER I,INCX,INCY,IX,IY,M,MP1,N
C
      IF(N.LE.0)RETURN
      IF(INCX.EQ.1.AND.INCY.EQ.1)GO TO 20
C
C        CODE FOR UNEQUAL INCREMENTS OR EQUAL INCREMENTS
C          NOT EQUAL TO 1
C
      IX = 1
      IY = 1
      IF(INCX.LT.0)IX = (-N+1)*INCX + 1
      IF(INCY.LT.0)IY = (-N+1)*INCY + 1
      DO 10 I = 1,N
        DY(IY) = DX(IX)
        IX = IX + INCX
        IY = IY + INCY
   10 CONTINUE
      RETURN
C
C        CODE FOR BOTH INCREMENTS EQUAL TO 1
C
C
C        CLEAN-UP LOOP
C
   20 M = MOD(N,7)
      IF( M .EQ. 0 ) GO TO 40
      DO 30 I = 1,M
        DY(I) = DX(I)
   30 CONTINUE
      IF( N .LT. 7 ) RETURN
   40 MP1 = M + 1
      DO 50 I = MP1,N,7
        DY(I) = DX(I)
        DY(I + 1) = DX(I + 1)
        DY(I + 2) = DX(I + 2)
        DY(I + 3) = DX(I + 3)
        DY(I + 4) = DX(I + 4)
        DY(I + 5) = DX(I + 5)
        DY(I + 6) = DX(I + 6)
   50 CONTINUE
      RETURN
      END
SHAR_EOF
cat << \SHAR_EOF > ddot.f
      DOUBLE PRECISION FUNCTION DDOT(N,DX,INCX,DY,INCY)
C
C     FORMS THE DOT PRODUCT OF TWO VECTORS.
C     USES UNROLLED LOOPS FOR INCREMENTS EQUAL TO ONE.
C     JACK DONGARRA, LINPACK, 3/11/78.
C
      DOUBLE PRECISION DX(1),DY(1),DTEMP
      INTEGER I,INCX,INCY,IX,IY,M,MP1,N
C
      DDOT = 0.0D0
      DTEMP = 0.0D0
      IF(N.LE.0)RETURN
      IF(INCX.EQ.1.AND.INCY.EQ.1)GO TO 20
C
C        CODE FOR UNEQUAL INCREMENTS OR EQUAL INCREMENTS
C          NOT EQUAL TO 1
C
      IX = 1
      IY = 1
      IF(INCX.LT.0)IX = (-N+1)*INCX + 1
      IF(INCY.LT.0)IY = (-N+1)*INCY + 1
      DO 10 I = 1,N
        DTEMP = DTEMP + DX(IX)*DY(IY)
        IX = IX + INCX
        IY = IY + INCY
   10 CONTINUE
      DDOT = DTEMP
      RETURN
C
C        CODE FOR BOTH INCREMENTS EQUAL TO 1
C
C
C        CLEAN-UP LOOP
C
   20 M = MOD(N,5)
      IF( M .EQ. 0 ) GO TO 40
      DO 30 I = 1,M
        DTEMP = DTEMP + DX(I)*DY(I)
   30 CONTINUE
      IF( N .LT. 5 ) GO TO 60
   40 MP1 = M + 1
      DO 50 I = MP1,N,5
        DTEMP = DTEMP + DX(I)*DY(I) + DX(I + 1)*DY(I + 1) +
     *   DX(I + 2)*DY(I + 2) + DX(I + 3)*DY(I + 3) + DX(I + 4)*DY(I + 4)
   50 CONTINUE
   60 DDOT = DTEMP
      RETURN
      END
SHAR_EOF
cat << \SHAR_EOF > dscal.f
      SUBROUTINE  DSCAL(N,DA,DX,INCX)
C
C     SCALES A VECTOR BY A CONSTANT.
C     USES UNROLLED LOOPS FOR INCREMENT EQUAL TO ONE.
C     JACK DONGARRA, LINPACK, 3/11/78.
C
      DOUBLE PRECISION DA,DX(1)
      INTEGER I,INCX,M,MP1,N,NINCX
C
      IF(N.LE.0)RETURN
      IF(INCX.EQ.1)GO TO 20
C
C        CODE FOR INCREMENT NOT EQUAL TO 1
C
      NINCX = N*INCX
      DO 10 I = 1,NINCX,INCX
        DX(I) = DA*DX(I)
   10 CONTINUE
      RETURN
C
C        CODE FOR INCREMENT EQUAL TO 1
C
C
C        CLEAN-UP LOOP
C
   20 M = MOD(N,5)
      IF( M .EQ. 0 ) GO TO 40
      DO 30 I = 1,M
        DX(I) = DA*DX(I)
   30 CONTINUE
      IF( N .LT. 5 ) RETURN
   40 MP1 = M + 1
      DO 50 I = MP1,N,5
        DX(I) = DA*DX(I)
        DX(I + 1) = DA*DX(I + 1)
        DX(I + 2) = DA*DX(I + 2)
        DX(I + 3) = DA*DX(I + 3)
        DX(I + 4) = DA*DX(I + 4)
   50 CONTINUE
      RETURN
      END
SHAR_EOF
cat << \SHAR_EOF > dswap.f
      SUBROUTINE  DSWAP (N,DX,INCX,DY,INCY)
C
C     INTERCHANGES TWO VECTORS.
C     USES UNROLLED LOOPS FOR INCREMENTS EQUAL ONE.
C     JACK DONGARRA, LINPACK, 3/11/78.
C
      DOUBLE PRECISION DX(1),DY(1),DTEMP
      INTEGER I,INCX,INCY,IX,IY,M,MP1,N
C
      IF(N.LE.0)RETURN
      IF(INCX.EQ.1.AND.INCY.EQ.1)GO TO 20
C
C       CODE FOR UNEQUAL INCREMENTS OR EQUAL INCREMENTS NOT EQUAL
C         TO 1
C
      IX = 1
      IY = 1
      IF(INCX.LT.0)IX = (-N+1)*INCX + 1
      IF(INCY.LT.0)IY = (-N+1)*INCY + 1
      DO 10 I = 1,N
        DTEMP = DX(IX)
        DX(IX) = DY(IY)
        DY(IY) = DTEMP
        IX = IX + INCX
        IY = IY + INCY
   10 CONTINUE
      RETURN
C
C       CODE FOR BOTH INCREMENTS EQUAL TO 1
C
C
C       CLEAN-UP LOOP
C
   20 M = MOD(N,3)
      IF( M .EQ. 0 ) GO TO 40
      DO 30 I = 1,M
        DTEMP = DX(I)
        DX(I) = DY(I)
        DY(I) = DTEMP
   30 CONTINUE
      IF( N .LT. 3 ) RETURN
   40 MP1 = M + 1
      DO 50 I = MP1,N,3
        DTEMP = DX(I)
        DX(I) = DY(I)
        DY(I) = DTEMP
        DTEMP = DX(I + 1)
        DX(I + 1) = DY(I + 1)
        DY(I + 1) = DTEMP
        DTEMP = DX(I + 2)
        DX(I + 2) = DY(I + 2)
        DY(I + 2) = DTEMP
   50 CONTINUE
      RETURN
      END
SHAR_EOF
cat << \SHAR_EOF > idamax.f
      INTEGER FUNCTION IDAMAX(N,DX,INCX)
C
C     FINDS THE INDEX OF ELEMENT HAVING MAX. ABSOLUTE VALUE.
C     JACK DONGARRA, LINPACK, 3/11/78.
C
      DOUBLE PRECISION DX(1),DMAX
      INTEGER I,INCX,IX,N
C
      IDAMAX = 0
      IF( N .LT. 1 ) RETURN
      IDAMAX = 1
      IF(N.EQ.1)RETURN
      IF(INCX.EQ.1)GO TO 20
C
C        CODE FOR INCREMENT NOT EQUAL TO 1
C
      IX = 1
      DMAX = DABS(DX(1))
      IX = IX + INCX
      DO 10 I = 2,N
         IF(DABS(DX(IX)).LE.DMAX) GO TO 5
         IDAMAX = I
         DMAX = DABS(DX(IX))
    5    IX = IX + INCX
   10 CONTINUE
      RETURN
C
C        CODE FOR INCREMENT EQUAL TO 1
C
   20 DMAX = DABS(DX(1))
      DO 30 I = 2,N
         IF(DABS(DX(I)).LE.DMAX) GO TO 30
         IDAMAX = I
         DMAX = DABS(DX(I))
   30 CONTINUE
      RETURN
      END
SHAR_EOF
cat << \SHAR_EOF > dgemv.f
************************************************************************
*
      SUBROUTINE DGEMV ( TRANS, M, N, ALPHA, A, LDA, X, INCX,
     $                   BETA, Y, INCY )
*     .. Scalar Arguments ..
      DOUBLE PRECISION   ALPHA, BETA
      INTEGER            INCX, INCY, LDA, M, N
      CHARACTER*1        TRANS
*     .. Array Arguments ..
      DOUBLE PRECISION   A( LDA, * ), X( * ), Y( * )
*     ..
*
*  Purpose
*  =======
*
*  DGEMV  performs one of the matrix-vector operations
*
*     y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y,
*
*  where alpha and beta are scalars, x and y are vectors and A is an
*  m by n matrix.
*
*  Parameters
*  ==========
*
*  TRANS  - CHARACTER*1.
*           On entry, TRANS specifies the operation to be performed as
*           follows:
*
*              TRANS = 'N' or 'n'   y := alpha*A*x + beta*y.
*
*              TRANS = 'T' or 't'   y := alpha*A'*x + beta*y.
*
*              TRANS = 'C' or 'c'   y := alpha*A'*x + beta*y.
*
*           Unchanged on exit.
*
*  M      - INTEGER.
*           On entry, M specifies the number of rows of the matrix A.
*           M must be at least zero.
*           Unchanged on exit.
*
*  N      - INTEGER.
*           On entry, N specifies the number of columns of the matrix A.
*           N must be at least zero.
*           Unchanged on exit.
*
*  ALPHA  - DOUBLE PRECISION.
*           On entry, ALPHA specifies the scalar alpha.
*           Unchanged on exit.
*
*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ).
*           Before entry, the leading m by n part of the array A must
*           contain the matrix of coefficients.
*           Unchanged on exit.
*
*  LDA    - INTEGER.
*           On entry, LDA specifies the first dimension of A as declared
*           in the calling (sub) program. LDA must be at least
*           max( 1, m ).
*           Unchanged on exit.
*
*  X      - DOUBLE PRECISION array of DIMENSION at least
*           ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
*           and at least
*           ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
*           Before entry, the incremented array X must contain the
*           vector x.
*           Unchanged on exit.
*
*  INCX   - INTEGER.
*           On entry, INCX specifies the increment for the elements of
*           X. INCX must not be zero.
*           Unchanged on exit.
*
*  BETA   - DOUBLE PRECISION.
*           On entry, BETA specifies the scalar beta. When BETA is
*           supplied as zero then Y need not be set on input.
*           Unchanged on exit.
*
*  Y      - DOUBLE PRECISION array of DIMENSION at least
*           ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
*           and at least
*           ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
*           Before entry with BETA non-zero, the incremented array Y
*           must contain the vector y. On exit, Y is overwritten by the
*           updated vector y.
*
*  INCY   - INTEGER.
*           On entry, INCY specifies the increment for the elements of
*           Y. INCY must not be zero.
*           Unchanged on exit.
*
*
*  Level 2 Blas routine.
*
*  -- Written on 22-October-1986.
*     Jack Dongarra, Argonne National Lab.
*     Jeremy Du Croz, Nag Central Office.
*     Sven Hammarling, Nag Central Office.
*     Richard Hanson, Sandia National Labs.
*
*
*     .. Parameters ..
      DOUBLE PRECISION   ONE         , ZERO
      PARAMETER        ( ONE = 1.0D+0, ZERO = 0.0D+0 )
*     .. Local Scalars ..
      DOUBLE PRECISION   TEMP
      INTEGER            I, INFO, IX, IY, J, JX, JY, KX, KY, LENX, LENY
*     .. External Functions ..
      LOGICAL            LSAME
      EXTERNAL           LSAME
*     .. External Subroutines ..
      EXTERNAL           XERBLA
*     .. Intrinsic Functions ..
      INTRINSIC          MAX
*     ..
*     .. Executable Statements ..
*
*     Test the input parameters.
*
      INFO = 0
      IF     ( .NOT.LSAME( TRANS, 'N' ).AND.
     $         .NOT.LSAME( TRANS, 'T' ).AND.
     $         .NOT.LSAME( TRANS, 'C' )      )THEN
         INFO = 1
      ELSE IF( M.LT.0 )THEN
         INFO = 2
      ELSE IF( N.LT.0 )THEN
         INFO = 3
      ELSE IF( LDA.LT.MAX( 1, M ) )THEN
         INFO = 6
      ELSE IF( INCX.EQ.0 )THEN
         INFO = 8
      ELSE IF( INCY.EQ.0 )THEN
         INFO = 11
      END IF
      IF( INFO.NE.0 )THEN
         CALL XERBLA( 'DGEMV ', INFO )
         RETURN
      END IF
*
*     Quick return if possible.
*
      IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR.
     $    ( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) )
     $   RETURN
*
*     Set  LENX  and  LENY, the lengths of the vectors x and y, and set
*     up the start points in  X  and  Y.
*
      IF( LSAME( TRANS, 'N' ) )THEN
         LENX = N
         LENY = M
      ELSE
         LENX = M
         LENY = N
      END IF
      IF( INCX.GT.0 )THEN
         KX = 1
      ELSE
         KX = 1 - ( LENX - 1 )*INCX
      END IF
      IF( INCY.GT.0 )THEN
         KY = 1
      ELSE
         KY = 1 - ( LENY - 1 )*INCY
      END IF
*
*     Start the operations. In this version the elements of A are
*     accessed sequentially with one pass through A.
*
*     First form  y := beta*y.
*
      IF( BETA.NE.ONE )THEN
         IF( INCY.EQ.1 )THEN
            IF( BETA.EQ.ZERO )THEN
               DO 10, I = 1, LENY
                  Y( I ) = ZERO
   10          CONTINUE
            ELSE
               DO 20, I = 1, LENY
                  Y( I ) = BETA*Y( I )
   20          CONTINUE
            END IF
         ELSE
            IY = KY
            IF( BETA.EQ.ZERO )THEN
               DO 30, I = 1, LENY
                  Y( IY ) = ZERO
                  IY      = IY   + INCY
   30          CONTINUE
            ELSE
               DO 40, I = 1, LENY
                  Y( IY ) = BETA*Y( IY )
                  IY      = IY           + INCY
   40          CONTINUE
            END IF
         END IF
      END IF
      IF( ALPHA.EQ.ZERO )
     $   RETURN
      IF( LSAME( TRANS, 'N' ) )THEN
*
*        Form  y := alpha*A*x + y.
*
         JX = KX
         IF( INCY.EQ.1 )THEN
            DO 60, J = 1, N
               IF( X( JX ).NE.ZERO )THEN
                  TEMP = ALPHA*X( JX )
                  DO 50, I = 1, M
                     Y( I ) = Y( I ) + TEMP*A( I, J )
   50             CONTINUE
               END IF
               JX = JX + INCX
   60       CONTINUE
         ELSE
            DO 80, J = 1, N
               IF( X( JX ).NE.ZERO )THEN
                  TEMP = ALPHA*X( JX )
                  IY   = KY
                  DO 70, I = 1, M
                     Y( IY ) = Y( IY ) + TEMP*A( I, J )
                     IY      = IY      + INCY
   70             CONTINUE
               END IF
               JX = JX + INCX
   80       CONTINUE
         END IF
      ELSE
*
*        Form  y := alpha*A'*x + y.
*
         JY = KY
         IF( INCX.EQ.1 )THEN
            DO 100, J = 1, N
               TEMP = ZERO
               DO 90, I = 1, M
                  TEMP = TEMP + A( I, J )*X( I )
   90          CONTINUE
               Y( JY ) = Y( JY ) + ALPHA*TEMP
               JY      = JY      + INCY
  100       CONTINUE
         ELSE
            DO 120, J = 1, N
               TEMP = ZERO
               IX   = KX
               DO 110, I = 1, M
                  TEMP = TEMP + A( I, J )*X( IX )
                  IX   = IX   + INCX
  110          CONTINUE
               Y( JY ) = Y( JY ) + ALPHA*TEMP
              JY      = JY      + INCY
  120       CONTINUE
         END IF
      END IF
*
      RETURN
*
*     End of DGEMV .
*
      END
*
SHAR_EOF
cat << \SHAR_EOF > dsymv.f
************************************************************************
*
      SUBROUTINE DSYMV ( UPLO, N, ALPHA, A, LDA, X, INCX,
     $                   BETA, Y, INCY )
*     .. Scalar Arguments ..
      DOUBLE PRECISION   ALPHA, BETA
      INTEGER            INCX, INCY, LDA, N
      CHARACTER*1        UPLO
*     .. Array Arguments ..
      DOUBLE PRECISION   A( LDA, * ), X( * ), Y( * )
*     ..
*
*  Purpose
*  =======
*
*  DSYMV  performs the matrix-vector  operation
*
*     y := alpha*A*x + beta*y,
*
*  where alpha and beta are scalars, x and y are n element vectors and
*  A is an n by n symmetric matrix.
*
*  Parameters
*  ==========
*
*  UPLO   - CHARACTER*1.
*           On entry, UPLO specifies whether the upper or lower
*           triangular part of the array A is to be referenced as
*           follows:
*
*              UPLO = 'U' or 'u'   Only the upper triangular part of A
*                                  is to be referenced.
*
*              UPLO = 'L' or 'l'   Only the lower triangular part of A
*                                  is to be referenced.
*
*           Unchanged on exit.
*
*  N      - INTEGER.
*           On entry, N specifies the order of the matrix A.
*           N must be at least zero.
*           Unchanged on exit.
*
*  ALPHA  - DOUBLE PRECISION.
*           On entry, ALPHA specifies the scalar alpha.
*           Unchanged on exit.
*
*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ).
*           Before entry with  UPLO = 'U' or 'u', the leading n by n
*           upper triangular part of the array A must contain the upper
*           triangular part of the symmetric matrix and the strictly
*           lower triangular part of A is not referenced.
*           Before entry with UPLO = 'L' or 'l', the leading n by n
*           lower triangular part of the array A must contain the lower
*           triangular part of the symmetric matrix and the strictly
*           upper triangular part of A is not referenced.
*           Unchanged on exit.
*
*  LDA    - INTEGER.
*           On entry, LDA specifies the first dimension of A as declared
*           in the calling (sub) program. LDA must be at least
*           max( 1, n ).
*           Unchanged on exit.
*
*  X      - DOUBLE PRECISION array of dimension at least
*           ( 1 + ( n - 1 )*abs( INCX ) ).
*           Before entry, the incremented array X must contain the n
*           element vector x.
*           Unchanged on exit.
*
*  INCX   - INTEGER.
*           On entry, INCX specifies the increment for the elements of
*           X. INCX must not be zero.
*           Unchanged on exit.
*
*  BETA   - DOUBLE PRECISION.
*           On entry, BETA specifies the scalar beta. When BETA is
*           supplied as zero then Y need not be set on input.
*           Unchanged on exit.
*
*  Y      - DOUBLE PRECISION array of dimension at least
*           ( 1 + ( n - 1 )*abs( INCY ) ).
*           Before entry, the incremented array Y must contain the n
*           element vector y. On exit, Y is overwritten by the updated
*           vector y.
*
*  INCY   - INTEGER.
*           On entry, INCY specifies the increment for the elements of
*           Y. INCY must not be zero.
*           Unchanged on exit.
*
*
*  Level 2 Blas routine.
*
*  -- Written on 22-October-1986.
*     Jack Dongarra, Argonne National Lab.
*     Jeremy Du Croz, Nag Central Office.
*     Sven Hammarling, Nag Central Office.
*     Richard Hanson, Sandia National Labs.
*
*
*     .. Parameters ..
      DOUBLE PRECISION   ONE         , ZERO
      PARAMETER        ( ONE = 1.0D+0, ZERO = 0.0D+0 )
*     .. Local Scalars ..
      DOUBLE PRECISION   TEMP1, TEMP2
      INTEGER            I, INFO, IX, IY, J, JX, JY, KX, KY
*     .. External Functions ..
      LOGICAL            LSAME
      EXTERNAL           LSAME
*     .. External Subroutines ..
      EXTERNAL           XERBLA
*     .. Intrinsic Functions ..
      INTRINSIC          MAX
*     ..
*     .. Executable Statements ..
*
*     Test the input parameters.
*
      INFO = 0
      IF     ( .NOT.LSAME( UPLO, 'U' ).AND.
     $         .NOT.LSAME( UPLO, 'L' )      )THEN
         INFO = 1
      ELSE IF( N.LT.0 )THEN
         INFO = 2
      ELSE IF( LDA.LT.MAX( 1, N ) )THEN
         INFO = 5
      ELSE IF( INCX.EQ.0 )THEN
         INFO = 7
      ELSE IF( INCY.EQ.0 )THEN
         INFO = 10
      END IF
      IF( INFO.NE.0 )THEN
         CALL XERBLA( 'DSYMV ', INFO )
         RETURN
      END IF
*
*     Quick return if possible.
*
      IF( ( N.EQ.0 ).OR.( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) )
     $   RETURN
*
*     Set up the start points in  X  and  Y.
*
      IF( INCX.GT.0 )THEN
         KX = 1
      ELSE
         KX = 1 - ( N - 1 )*INCX
      END IF
      IF( INCY.GT.0 )THEN
         KY = 1
      ELSE
         KY = 1 - ( N - 1 )*INCY
      END IF
*
*     Start the operations. In this version the elements of A are
*     accessed sequentially with one pass through the triangular part
*     of A.
*
*     First form  y := beta*y.
*
      IF( BETA.NE.ONE )THEN
         IF( INCY.EQ.1 )THEN
            IF( BETA.EQ.ZERO )THEN
               DO 10, I = 1, N
                  Y( I ) = ZERO
   10          CONTINUE
            ELSE
               DO 20, I = 1, N
                  Y( I ) = BETA*Y( I )
   20          CONTINUE
            END IF
         ELSE
            IY = KY
            IF( BETA.EQ.ZERO )THEN
               DO 30, I = 1, N
                  Y( IY ) = ZERO
                  IY      = IY   + INCY
   30          CONTINUE
            ELSE
               DO 40, I = 1, N
                  Y( IY ) = BETA*Y( IY )
                  IY      = IY           + INCY
   40          CONTINUE
            END IF
         END IF
      END IF
      IF( ALPHA.EQ.ZERO )
     $   RETURN
      IF( LSAME( UPLO, 'U' ) )THEN
*
*        Form  y  when A is stored in upper triangle.
*
         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
            DO 60, J = 1, N
               TEMP1 = ALPHA*X( J )
               TEMP2 = ZERO
               DO 50, I = 1, J - 1
                  Y( I ) = Y( I ) + TEMP1*A( I, J )
                  TEMP2  = TEMP2  + A( I, J )*X( I )
   50          CONTINUE
               Y( J ) = Y( J ) + TEMP1*A( J, J ) + ALPHA*TEMP2
   60       CONTINUE
         ELSE
            JX = KX
            JY = KY
            DO 80, J = 1, N
               TEMP1 = ALPHA*X( JX )
               TEMP2 = ZERO
               IX    = KX
               IY    = KY
               DO 70, I = 1, J - 1
                  Y( IY ) = Y( IY ) + TEMP1*A( I, J )
                  TEMP2   = TEMP2   + A( I, J )*X( IX )
                  IX      = IX      + INCX
                  IY      = IY      + INCY
   70          CONTINUE
               Y( JY ) = Y( JY ) + TEMP1*A( J, J ) + ALPHA*TEMP2
               JX      = JX      + INCX
               JY      = JY      + INCY
   80       CONTINUE
         END IF
      ELSE
*
*        Form  y  when A is stored in lower triangle.
*
         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
            DO 100, J = 1, N
               TEMP1  = ALPHA*X( J )
               TEMP2  = ZERO
               Y( J ) = Y( J )       + TEMP1*A( J, J )
               DO 90, I = J + 1, N
                  Y( I ) = Y( I ) + TEMP1*A( I, J )
                  TEMP2  = TEMP2  + A( I, J )*X( I )
   90          CONTINUE
               Y( J ) = Y( J ) + ALPHA*TEMP2
  100       CONTINUE
         ELSE
            JX = KX
            JY = KY
            DO 120, J = 1, N
               TEMP1   = ALPHA*X( JX )
               TEMP2   = ZERO
               Y( JY ) = Y( JY )       + TEMP1*A( J, J )
               IX      = JX
               IY      = JY
               DO 110, I = J + 1, N
                  IX      = IX      + INCX
                  IY      = IY      + INCY
                  Y( IY ) = Y( IY ) + TEMP1*A( I, J )
                  TEMP2   = TEMP2   + A( I, J )*X( IX )
  110          CONTINUE
               Y( JY ) = Y( JY ) + ALPHA*TEMP2
               JX      = JX      + INCX
               JY      = JY      + INCY
  120       CONTINUE
         END IF
      END IF
*
      RETURN
*
*     End of DSYMV .
*
      END
*
SHAR_EOF
cat << \SHAR_EOF > dsyr2.f
************************************************************************
*
      SUBROUTINE DSYR2 ( UPLO, N, ALPHA, X, INCX, Y, INCY, A, LDA )
*     .. Scalar Arguments ..
      DOUBLE PRECISION   ALPHA
      INTEGER            INCX, INCY, LDA, N
      CHARACTER*1        UPLO
*     .. Array Arguments ..
      DOUBLE PRECISION   A( LDA, * ), X( * ), Y( * )
*     ..
*
*  Purpose
*  =======
*
*  DSYR2  performs the symmetric rank 2 operation
*
*     A := alpha*x*y' + alpha*y*x' + A,
*
*  where alpha is a scalar, x and y are n element vectors and A is an n
*  by n symmetric matrix.
*
*  Parameters
*  ==========
*
*  UPLO   - CHARACTER*1.
*           On entry, UPLO specifies whether the upper or lower
*           triangular part of the array A is to be referenced as
*           follows:
*
*              UPLO = 'U' or 'u'   Only the upper triangular part of A
*                                  is to be referenced.
*
*              UPLO = 'L' or 'l'   Only the lower triangular part of A
*                                  is to be referenced.
*
*           Unchanged on exit.
*
*  N      - INTEGER.
*           On entry, N specifies the order of the matrix A.
*           N must be at least zero.
*           Unchanged on exit.
*
*  ALPHA  - DOUBLE PRECISION.
*           On entry, ALPHA specifies the scalar alpha.
*           Unchanged on exit.
*
*  X      - DOUBLE PRECISION array of dimension at least
*           ( 1 + ( n - 1 )*abs( INCX ) ).
*           Before entry, the incremented array X must contain the n
*           element vector x.
*           Unchanged on exit.
*
*  INCX   - INTEGER.
*           On entry, INCX specifies the increment for the elements of
*           X. INCX must not be zero.
*           Unchanged on exit.
*
*  Y      - DOUBLE PRECISION array of dimension at least
*           ( 1 + ( n - 1 )*abs( INCY ) ).
*           Before entry, the incremented array Y must contain the n
*           element vector y.
*           Unchanged on exit.
*
*  INCY   - INTEGER.
*           On entry, INCY specifies the increment for the elements of
*           Y. INCY must not be zero.
*           Unchanged on exit.
*
*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ).
*           Before entry with  UPLO = 'U' or 'u', the leading n by n
*           upper triangular part of the array A must contain the upper
*           triangular part of the symmetric matrix and the strictly
*           lower triangular part of A is not referenced. On exit, the
*           upper triangular part of the array A is overwritten by the
*           upper triangular part of the updated matrix.
*           Before entry with UPLO = 'L' or 'l', the leading n by n
*           lower triangular part of the array A must contain the lower
*           triangular part of the symmetric matrix and the strictly
*           upper triangular part of A is not referenced. On exit, the
*           lower triangular part of the array A is overwritten by the
*           lower triangular part of the updated matrix.
*
*  LDA    - INTEGER.
*           On entry, LDA specifies the first dimension of A as declared
*           in the calling (sub) program. LDA must be at least
*           max( 1, n ).
*           Unchanged on exit.
*
*
*  Level 2 Blas routine.
*
*  -- Written on 22-October-1986.
*     Jack Dongarra, Argonne National Lab.
*     Jeremy Du Croz, Nag Central Office.
*     Sven Hammarling, Nag Central Office.
*     Richard Hanson, Sandia National Labs.
*
*
*     .. Parameters ..
      DOUBLE PRECISION   ZERO
      PARAMETER        ( ZERO = 0.0D+0 )
*     .. Local Scalars ..
      DOUBLE PRECISION   TEMP1, TEMP2
      INTEGER            I, INFO, IX, IY, J, JX, JY, KX, KY
*     .. External Functions ..
      LOGICAL            LSAME
      EXTERNAL           LSAME
*     .. External Subroutines ..
      EXTERNAL           XERBLA
*     .. Intrinsic Functions ..
      INTRINSIC          MAX
*     ..
*     .. Executable Statements ..
*
*     Test the input parameters.
*
      INFO = 0
      IF     ( .NOT.LSAME( UPLO, 'U' ).AND.
     $         .NOT.LSAME( UPLO, 'L' )      )THEN
         INFO = 1
      ELSE IF( N.LT.0 )THEN
         INFO = 2
      ELSE IF( INCX.EQ.0 )THEN
         INFO = 5
      ELSE IF( INCY.EQ.0 )THEN
         INFO = 7
      ELSE IF( LDA.LT.MAX( 1, N ) )THEN
         INFO = 9
      END IF
      IF( INFO.NE.0 )THEN
         CALL XERBLA( 'DSYR2 ', INFO )
         RETURN
      END IF
*
*     Quick return if possible.
*
      IF( ( N.EQ.0 ).OR.( ALPHA.EQ.ZERO ) )
     $   RETURN
*
*     Set up the start points in X and Y if the increments are not both
*     unity.
*
      IF( ( INCX.NE.1 ).OR.( INCY.NE.1 ) )THEN
         IF( INCX.GT.0 )THEN
            KX = 1
         ELSE
            KX = 1 - ( N - 1 )*INCX
         END IF
         IF( INCY.GT.0 )THEN
            KY = 1
         ELSE
            KY = 1 - ( N - 1 )*INCY
         END IF
         JX = KX
         JY = KY
      END IF
*
*     Start the operations. In this version the elements of A are
*     accessed sequentially with one pass through the triangular part
*     of A.
*
      IF( LSAME( UPLO, 'U' ) )THEN
*
*        Form  A  when A is stored in the upper triangle.
*
         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
            DO 20, J = 1, N
               IF( ( X( J ).NE.ZERO ).OR.( Y( J ).NE.ZERO ) )THEN
                  TEMP1 = ALPHA*Y( J )
                  TEMP2 = ALPHA*X( J )
                  DO 10, I = 1, J
                     A( I, J ) = A( I, J ) + X( I )*TEMP1 + Y( I )*TEMP2
   10             CONTINUE
               END IF
   20       CONTINUE
         ELSE
            DO 40, J = 1, N
               IF( ( X( JX ).NE.ZERO ).OR.( Y( JY ).NE.ZERO ) )THEN
                  TEMP1 = ALPHA*Y( JY )
                  TEMP2 = ALPHA*X( JX )
                  IX    = KX
                  IY    = KY
                  DO 30, I = 1, J
                     A( I, J ) = A( I, J ) + X( IX )*TEMP1
     $                                     + Y( IY )*TEMP2
                     IX        = IX        + INCX
                     IY        = IY        + INCY
   30             CONTINUE
               END IF
               JX = JX + INCX
               JY = JY + INCY
   40       CONTINUE
         END IF
      ELSE
*
*        Form  A  when A is stored in the lower triangle.
*
         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
            DO 60, J = 1, N
               IF( ( X( J ).NE.ZERO ).OR.( Y( J ).NE.ZERO ) )THEN
                  TEMP1 = ALPHA*Y( J )
                  TEMP2 = ALPHA*X( J )
                  DO 50, I = J, N
                     A( I, J ) = A( I, J ) + X( I )*TEMP1 + Y( I )*TEMP2
   50             CONTINUE
               END IF
   60       CONTINUE
         ELSE
            DO 80, J = 1, N
               IF( ( X( JX ).NE.ZERO ).OR.( Y( JY ).NE.ZERO ) )THEN
                  TEMP1 = ALPHA*Y( JY )
                  TEMP2 = ALPHA*X( JX )
                  IX    = JX
                  IY    = JY
                  DO 70, I = J, N
                     A( I, J ) = A( I, J ) + X( IX )*TEMP1
     $                                     + Y( IY )*TEMP2
                     IX        = IX        + INCX
                     IY        = IY        + INCY
   70             CONTINUE
               END IF
               JX = JX + INCX
               JY = JY + INCY
   80       CONTINUE
         END IF
      END IF
*
      RETURN
*
*     End of DSYR2 .
*
      END
*
SHAR_EOF
cat << \SHAR_EOF > uni.f
      real function uni(jd)
c***begin prologue  uni
c***date written   810915
c***revision date  830805
c***category no.  l6a21
c***keywords  random numbers, uniform random numbers
c***author    blue, james, scientific computing division, nbs
c             kahaner, david, scientific computing division, nbs
c             marsaglia, george, computer science dept., wash state univ
c
c***purpose  this routine generates quasi uniform random numbers on [0,1
c             and can be used on any computer with which allows integers
c             at least as large as 32767.
c***description
c
c       this routine generates quasi uniform random numbers on the inter
c       [0,1).  it can be used with any computer which allows
c       integers at least as large as 32767.
c
c
c   use
c       first time....
c                   z = uni(jd)
c                     here jd is any  n o n - z e r o  integer.
c                     this causes initialization of the program
c                     and the first random number to be returned as z.
c       subsequent times...
c                   z = uni(0)
c                     causes the next random number to be returned as z.
c
c
c..................................................................
c   note: users who wish to transport this program from one computer
c         to another should read the following information.....
c
c   machine dependencies...
c      mdig = a lower bound on the number of binary digits available
c              for representing integers, including the sign bit.
c              this value must be at least 16, but may be increased
c              in line with remark a below.
c
c   remarks...
c     a. this program can be used in two ways:
c        (1) to obtain repeatable results on different computers,
c            set 'mdig' to the smallest of its values on each, or,
c        (2) to allow the longest sequence of random numbers to be
c            generated without cycling (repeating) set 'mdig' to the
c            largest possible value.
c     b. the sequence of numbers generated depends on the initial
c          input 'jd' as well as the value of 'mdig'.
c          if mdig=16 one should find that
c            the first evaluation
c              z=uni(305) gives z=.027832881...
c            the second evaluation
c              z=uni(0) gives   z=.56102176...
c            the third evaluation
c              z=uni(0) gives   z=.41456343...
c            the thousandth evaluation
c              z=uni(0) gives   z=.19797357...
c
c***references  marsaglia g., "comments on the perfect uniform random
c                 number generator", unpublished notes, wash s. u.
c***routines called  i1mach,xerror
c***end prologue  uni
      integer m(17)
c
      save i,j,m,m1,m2
c
      data m(1),m(2),m(3),m(4),m(5),m(6),m(7),m(8),m(9),m(10),m(11),
     1     m(12),m(13),m(14),m(15),m(16),m(17)
     2                   / 30788,23052,2053,19346,10646,19427,23975,
     3                     19049,10949,19693,29746,26748,2796,23890,
     4                     29168,31924,16499 /
      data m1,m2,i,j / 32767,256,5,17 /
c***first executable statement  uni
      if(jd .eq. 0) go to 3
c  fill
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
      mdig=32
C     mdig=i1mach(8)+1
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
c          be sure that mdig at least 16...
C     if(mdig.lt.16)call xerror('uni--mdig less than 16',22,1,2)
      m1= 2**(mdig-2) + (2**(mdig-2)-1)
      m2 = 2**(mdig/2)
      jseed = min0(iabs(jd),m1)
      if( mod(jseed,2).eq.0 ) jseed=jseed-1
      k0 =mod(9069,m2)
      k1 = 9069/m2
      j0 = mod(jseed,m2)
      j1 = jseed/m2
      do 2 i=1,17
        jseed = j0*k0
        j1 = mod(jseed/m2+j0*k1+j1*k0,m2/2)
        j0 = mod(jseed,m2)
    2   m(i) = j0+m2*j1
      i=5
      j=17
c  begin main loop here
    3 k=m(i)-m(j)
      if(k .lt. 0) k=k+m1
      m(j)=k
      i=i-1
      if(i .eq. 0) i=17
      j=j-1
      if(j .eq. 0) j=17
      uni=float(k)/float(m1)
      return
      end
SHAR_EOF
cat << \SHAR_EOF > rnor.f
      real function rnor(jd)
c***begin prologue  rnor
c***date written   810915
c***revision date  830805
c***category no.  l6a14
c***keywords  random numbers, uniform random numbers
c***author    kahaner, david, scientific computing division, nbs
c             marsaglia, george, computer science dept., wash state univ
c
c***purpose  generates quasi normal random numbers, with mean zero and
c             unit standard deviation, and can be used with any computer
c             with integers at least as large as 32767.
c***description
c
c       rnor generates quasi normal random numbers with zero mean and
c       unit standard deviation.
c       it can be used with any computer with integers at least as
c       large as 32767.
c
c
c   use
c       first time....
c                   z = rnor(jd)
c                     here jd is any  n o n - z e r o  integer.
c                     this causes initialization of the program
c                     and the first random number to be returned as z.
c       subsequent times...
c                   z = rnor(0)
c                     causes the next random number to be returned as z.
c
c.....................................................................
c
c    note: users who wish to transport this program to other
c           computers should read the following ....
c
c   machine dependencies...
c      mdig = a lower bound on the number of binary digits available
c              for representing integers, including the sign bit.
c              this must be at least 16, but can be increased in
c              line with remark a below.
c
c   remarks...
c     a. this program can be used in two ways:
c        (1) to obtain repeatable results on different computers,
c            set 'mdig' to the smallest of its values on each, or,
c        (2) to allow the longest sequence of random numbers to be
c            generated without cycling (repeating) set 'mdig' to the
c            largest possible value.
c     b. the sequence of numbers generated depends on the initial
c          input 'jd' as well as the value of 'mdig'.
c          if mdig=16 one should find that
c            the first evaluation
c              z=rnor(87) gives  z=-.40079207...
c            the second evaluation
c              z=rnor(0) gives   z=-1.8728870...
c            the third evaluation
c              z=rnor(0) gives   z=1.8216004...
c            the fourth evaluation
c              z=rnor(0) gives   z=.69410355...
c            the thousandth evaluation
c              z=rnor(0) gives   z=.96782424...
c
c***references  marsaglia & tsang, "a fast, easily implemented
c                 method for sampling from decreasing or
c                 symmetric unimodal density functions", to be
c                 published in siam j sisc 1983.
c***routines called  i1mach,xerror
c***end prologue  rnor
      real v(65),w(65)
      integer m(17)
      save i1,j1,m,m1,m2,rmax
      data aa,b,c,rmax/12.37586,.4878992,12.67706,3.0518509e-5/
      data c1,c2,pc,xn/.9689279,1.301198,.1958303e-1,2.776994/
      data v/ .3409450, .4573146, .5397793, .6062427, .6631691
     +, .7136975, .7596125, .8020356, .8417227, .8792102, .9148948
     +, .9490791, .9820005, 1.0138492, 1.0447810, 1.0749254, 1.1043917
     +,1.1332738, 1.1616530, 1.1896010, 1.2171815, 1.2444516, 1.2714635
     +,1.2982650, 1.3249008, 1.3514125, 1.3778399, 1.4042211, 1.4305929
     +,1.4569915, 1.4834526, 1.5100121, 1.5367061, 1.5635712, 1.5906454
     +,1.6179680, 1.6455802, 1.6735255, 1.7018503, 1.7306045, 1.7598422
     +,1.7896223, 1.8200099, 1.8510770, 1.8829044, 1.9155830, 1.9492166
     +,1.9839239, 2.0198430, 2.0571356, 2.0959930, 2.1366450, 2.1793713
     +,2.2245175, 2.2725185, 2.3239338, 2.3795007, 2.4402218, 2.5075117
     +,2.5834658, 2.6713916, 2.7769943, 2.7769943, 2.7769943, 2.7769943/
      data w/   .10405134e-04, .13956560e-04, .16473259e-04,
     + .18501623e-04, .20238931e-04, .21780983e-04, .23182241e-04,
     + .24476931e-04, .25688121e-04, .26832186e-04, .27921226e-04,
     + .28964480e-04, .29969191e-04, .30941168e-04, .31885160e-04,
     + .32805121e-04, .33704388e-04, .34585827e-04, .35451919e-04,
     + .36304851e-04, .37146564e-04, .37978808e-04, .38803170e-04,
     + .39621114e-04, .40433997e-04, .41243096e-04, .42049621e-04,
     + .42854734e-04, .43659562e-04, .44465208e-04, .45272764e-04,
     + .46083321e-04, .46897980e-04, .47717864e-04, .48544128e-04,
     + .49377973e-04, .50220656e-04, .51073504e-04, .51937936e-04,
     + .52815471e-04, .53707761e-04, .54616606e-04, .55543990e-04,
     + .56492112e-04, .57463436e-04, .58460740e-04, .59487185e-04,
     + .60546402e-04, .61642600e-04, .62780711e-04, .63966581e-04,
     + .65207221e-04, .66511165e-04, .67888959e-04, .69353880e-04,
     + .70922996e-04, .72618816e-04, .74471933e-04, .76525519e-04,
     + .78843526e-04, .81526890e-04, .84749727e-04,
     + .84749727e-04, .84749727e-04, .84749727e-04/
      data m(1),m(2),m(3),m(4),m(5),m(6),m(7),m(8),m(9),m(10),m(11),
     1     m(12),m(13),m(14),m(15),m(16),m(17)
     2                   / 30788,23052,2053,19346,10646,19427,23975,
     3                     19049,10949,19693,29746,26748,2796,23890,
     4                     29168,31924,16499 /
      data m1,m2,i1,j1 / 32767,256,5,17 /
c fast part...
c
c
c***first executable statement  rnor
      if(jd.ne.0)go to 27
   10 continue
      i=m(i1)-m(j1)
      if(i .lt. 0) i=i+m1
      m(j1)=i
      i1=i1-1
      if(i1 .eq. 0) i1=17
      j1=j1-1
      if(j1 .eq. 0) j1=17
      j=mod(i,64)+1
      rnor=i*w(j+1)
      if( ( (i/m2)/2 )*2.eq.(i/m2))rnor=-rnor
      if(abs(rnor).le.v(j))return
c slow part; aa is a*f(0)
      x=(abs(rnor)-v(j))/(v(j+1)-v(j))
      y=uni(0)
      s=x+y
      if(s.gt.c2)go to 11
      if(s.le.c1)return
      if(y.gt.c-aa*exp(-.5*(b-b*x)**2))go to 11
      if(exp(-.5*v(j+1)**2)+y*pc/v(j+1).le.exp(-.5*rnor**2))return
c tail part; 3.855849 is .5*xn**2
   22 s=xn-alog(uni(0))/xn
      if(3.855849+alog(uni(0))-xn*s.gt.-.5*s**2)go to 22
      rnor=sign(s,rnor)
      return
   11 rnor=sign(b-b*x,rnor)
      return
c  fill
   27 continue
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
      mdig=32
C     mdig=i1mach(8)+1
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
c          be sure that mdig at least 16...
C     if(mdig.lt.16)call xerror('rnor--mdig less than 16',23,1,2)
      m1 = 2**(mdig-2) + (2**(mdig-2)-1)
      m2 = 2**(mdig/2)
      jseed = min0(iabs(jd),m1)
      if( mod(jseed,2).eq.0 ) jseed=jseed-1
      k0 =mod(9069,m2)
      k1 = 9069/m2
      j0 = mod(jseed,m2)
      j1 = jseed/m2
      do 2 i=1,17
        jseed = j0*k0
        j1 = mod(jseed/m2+j0*k1+j1*k0,m2/2)
        j0 = mod(jseed,m2)
    2   m(i) = j0+m2*j1
      j1=17
      i1=5
      rmax = 1./float(m1)
c        seed uniform (0,1) generator.  (just a dummy call)
      rnor=uni(jd)
      do 28 i=1,65
   28  w(i)=rmax*v(i)
      go to 10
      end
SHAR_EOF
cat << \SHAR_EOF > dset.f
      subroutine  dset(n,da,dx,incx)
      integer n,incx
      double precision da,dx(*)
c
c Purpose : set vector dx to constant da. Unrolled loops are used for 
c	increment equal to one.
c
c On Entry:
c   n			length of dx
c   da			any constant
c   incx		increment for dx
c
c On Exit:
c   dx(n)		vector with all n entries set to da
c
c $Header: dset.f,v 2.1 86/04/08 14:06:25 lindstrom Exp $
c
      integer i,m,mp1,nincx
c
      if(n.le.0)return
      if(incx.eq.1)go to 20
c
c        code for increment not equal to 1
c
      nincx = n*incx
      do 10 i = 1,nincx,incx
        dx(i) = da
   10 continue
      return
c
c        code for increment equal to 1
c
c
c        clean-up loop
c
   20 m = mod(n,5)
      if( m .eq. 0 ) go to 40
      do 30 i = 1,m
        dx(i) = da
   30 continue
      if( n .lt. 5 ) return
   40 mp1 = m + 1
      do 50 i = mp1,n,5
        dx(i) = da
        dx(i + 1) = da
        dx(i + 2) = da
        dx(i + 3) = da
        dx(i + 4) = da
   50 continue
      return
      end
SHAR_EOF
cat << \SHAR_EOF > dprmut.f
      subroutine dprmut (x,npar,jpvt,job)
      integer npar,jpvt(npar),job
      double precision x(npar)
c
c Purpose: permute the elements of the array x according to the index 
c	vector jpvt (either forward or backward permutation).
c
c On Entry:
c   x(npar)		array to be permuted
c   npar		size of x (and jpvt)
c   jpvt		indices of the permutation
c   job			indicator of forward or backward permutation
c			if job = 0 forward permutation  
c				x(jpvt(i)) moved to x(i)
c			if job is nonzero backward permutation 
c				x(i) moved to x(jpvt(i))
c On Exit:
c   x(npar)		array with permuted entries
c
c   Written:	Yin Ling	U. of Maryland, August,1978
c
c $Header: dprmut.f,v 2.1 86/04/08 14:05:53 lindstrom Exp $
c
      integer i,j,k
      double precision t
c
      if (npar .le. 1) then
         return
      endif
      do 10 j = 1,npar
         jpvt(j) = -jpvt(j)
   10 continue
      if (job .eq. 0) then
c		forward permutation
         do 30 i = 1,npar 
            if (jpvt(i) .gt. 0) then
               goto 30
            endif
            j = i
            jpvt(j) = -jpvt(j)
            k = jpvt(j)
c           while
   20       if (jpvt(k) .lt. 0) then
               t = x(j)
               x(j) = x(k)
               x(k) = t
               jpvt(k) = -jpvt(k)
               j = k
               k = jpvt(k)
               goto 20
c           endwhile
            endif
   30    continue
      endif
      if (job .ne. 0 ) then
c			backward permutation
         do 50 i = 1,npar 
            if (jpvt(i) .gt. 0) then
               goto 50
            endif
            jpvt(i) = -jpvt(i)
            j = jpvt(i)
c           while
   40       if (j .ne. i) then
               t = x(i)
               x(i) = x(j)
               x(j) = t
               jpvt(j) = -jpvt(j)
               j = jpvt(j)
               goto 40
c           endwhile
            endif
   50    continue
      endif
      return
      end
SHAR_EOF
cat << \SHAR_EOF > dnrm2.f
      DOUBLE PRECISION FUNCTION DNRM2 ( N, DX, INCX)
      INTEGER          NEXT
      DOUBLE PRECISION   DX(1), CUTLO, CUTHI, HITEST, SUM, XMAX,ZERO,ONE
      DATA   ZERO, ONE /0.0D0, 1.0D0/
C
C     EUCLIDEAN NORM OF THE N-VECTOR STORED IN DX() WITH STORAGE
C     INCREMENT INCX .
C     IF    N .LE. 0 RETURN WITH RESULT = 0.
C     IF N .GE. 1 THEN INCX MUST BE .GE. 1
C
C           C.L.LAWSON, 1978 JAN 08
C
C     FOUR PHASE METHOD     USING TWO BUILT-IN CONSTANTS THAT ARE
C     HOPEFULLY APPLICABLE TO ALL MACHINES.
C         CUTLO = MAXIMUM OF  DSQRT(U/EPS)  OVER ALL KNOWN MACHINES.
C         CUTHI = MINIMUM OF  DSQRT(V)      OVER ALL KNOWN MACHINES.
C     WHERE
C         EPS = SMALLEST NO. SUCH THAT EPS + 1. .GT. 1.
C         U   = SMALLEST POSITIVE NO.   (UNDERFLOW LIMIT)
C         V   = LARGEST  NO.            (OVERFLOW  LIMIT)
C
C     BRIEF OUTLINE OF ALGORITHM..
C
C     PHASE 1    SCANS ZERO COMPONENTS.
C     MOVE TO PHASE 2 WHEN A COMPONENT IS NONZERO AND .LE. CUTLO
C     MOVE TO PHASE 3 WHEN A COMPONENT IS .GT. CUTLO
C     MOVE TO PHASE 4 WHEN A COMPONENT IS .GE. CUTHI/M
C     WHERE M = N FOR X() REAL AND M = 2*N FOR COMPLEX.
C
C     VALUES FOR CUTLO AND CUTHI..
C     FROM THE ENVIRONMENTAL PARAMETERS LISTED IN THE IMSL CONVERTER
C     DOCUMENT THE LIMITING VALUES ARE AS FOLLOWS..
C     CUTLO, S.P.   U/EPS = 2**(-102) FOR  HONEYWELL.  CLOSE SECONDS ARE
C                   UNIVAC AND DEC AT 2**(-103)
C                   THUS CUTLO = 2**(-51) = 4.44089E-16
C     CUTHI, S.P.   V = 2**127 FOR UNIVAC, HONEYWELL, AND DEC.
C                   THUS CUTHI = 2**(63.5) = 1.30438E19
C     CUTLO, D.P.   U/EPS = 2**(-67) FOR HONEYWELL AND DEC.
C                   THUS CUTLO = 2**(-33.5) = 8.23181D-11
C     CUTHI, D.P.   SAME AS S.P.  CUTHI = 1.30438D19
C     DATA CUTLO, CUTHI / 8.232D-11,  1.304D19 /
C     DATA CUTLO, CUTHI / 4.441E-16,  1.304E19 /
      DATA CUTLO, CUTHI / 8.232D-11,  1.304D19 /
C
      IF(N .GT. 0) GO TO 10
         DNRM2  = ZERO
         GO TO 300
C
   10 ASSIGN 30 TO NEXT
      SUM = ZERO
      NN = N * INCX
C                                                 BEGIN MAIN LOOP
      I = 1
   20    GO TO NEXT,(30, 50, 70, 110)
   30 IF( DABS(DX(I)) .GT. CUTLO) GO TO 85
      ASSIGN 50 TO NEXT
      XMAX = ZERO
C
C                        PHASE 1.  SUM IS ZERO
C
   50 IF( DX(I) .EQ. ZERO) GO TO 200
      IF( DABS(DX(I)) .GT. CUTLO) GO TO 85
C
C                                PREPARE FOR PHASE 2.
      ASSIGN 70 TO NEXT
      GO TO 105
C
C                                PREPARE FOR PHASE 4.
C
  100 I = J
      ASSIGN 110 TO NEXT
      SUM = (SUM / DX(I)) / DX(I)
  105 XMAX = DABS(DX(I))
      GO TO 115
C
C                   PHASE 2.  SUM IS SMALL.
C                             SCALE TO AVOID DESTRUCTIVE UNDERFLOW.
C
   70 IF( DABS(DX(I)) .GT. CUTLO ) GO TO 75
C
C                     COMMON CODE FOR PHASES 2 AND 4.
C                     IN PHASE 4 SUM IS LARGE.  SCALE TO AVOID OVERFLOW.
C
  110 IF( DABS(DX(I)) .LE. XMAX ) GO TO 115
         SUM = ONE + SUM * (XMAX / DX(I))**2
         XMAX = DABS(DX(I))
         GO TO 200
C
  115 SUM = SUM + (DX(I)/XMAX)**2
      GO TO 200
C
C
C                  PREPARE FOR PHASE 3.
C
   75 SUM = (SUM * XMAX) * XMAX
C
C
C     FOR REAL OR D.P. SET HITEST = CUTHI/N
C     FOR COMPLEX      SET HITEST = CUTHI/(2*N)
C
   85 HITEST = CUTHI/FLOAT( N )
C
C                   PHASE 3.  SUM IS MID-RANGE.  NO SCALING.
C
      DO 95 J =I,NN,INCX
      IF(DABS(DX(J)) .GE. HITEST) GO TO 100
   95    SUM = SUM + DX(J)**2
      DNRM2 = DSQRT( SUM )
      GO TO 300
C
  200 CONTINUE
      I = I + INCX
      IF ( I .LE. NN ) GO TO 20
C
C              END OF MAIN LOOP.
C
C              COMPUTE SQUARE ROOT AND ADJUST FOR SCALING.
C
      DNRM2 = XMAX * DSQRT(SUM)
  300 CONTINUE
      RETURN
      END
SHAR_EOF
cat << \SHAR_EOF > Makefile
OBJECTS = dasum.o dpbfa.o dqrdc.o dsymv.o uni.o daxpy.o dpbsl.o dqrsl.o dsyr2.o dcopy.o dpofa.o dscal.o dtrsl.o ddot.o dposl.o dset.o idamax.o dgemv.o dprmut.o dswap.o rnor.o dnrm2.o lsame.o xerbla.o
FLAGS = -O

.SUFFIXES: .f .o

.f.o:
	f77 -c $(FLAGS) $*.f

lib.a :: $(OBJECTS)
	ar rv  lib.a $(OBJECTS)
	rm *.o
	ranlib  lib.a
SHAR_EOF
cat << \SHAR_EOF > README

This directory collects public domain FORTRAN routines called upon by
RKPACK routines: 
    Blas    -- dasum, daxpy, dcopy, ddot, dnrm2, dscal, dswap, idamax
    Blas2   -- dgemv, dsymv, dsyr2
    Linpack -- dpbfa, dpbsl, dpofa, dposl, dqrdc, dqrsl, dtrsl
    Other   -- dprmut, dset
and public domain pseudo random number generators:
    Cmlib   -- rnor, uni
Run `make' under standard UNIX system to compile and archive the *.o
files in lib.a.

Chong Gu
March 18, 1992
SHAR_EOF
cat << \SHAR_EOF > lsame.f
      LOGICAL FUNCTION LSAME ( CA, CB )
*     .. Scalar Arguments ..
      CHARACTER*1            CA, CB
*     ..
*
*  Purpose
*  =======
*
*  LSAME  tests if CA is the same letter as CB regardless of case.
*  CB is assumed to be an upper case letter. LSAME returns .TRUE. if
*  CA is either the same as CB or the equivalent lower case letter.
*
*  N.B. This version of the routine is only correct for ASCII code.
*       Installers must modify the routine for other character-codes.
*
*       For EBCDIC systems the constant IOFF must be changed to -64.
*       For CDC systems using 6-12 bit representations, the system-
*       specific code in comments must be activated.
*
*  Parameters
*  ==========
*
*  CA     - CHARACTER*1
*  CB     - CHARACTER*1
*           On entry, CA and CB specify characters to be compared.
*           Unchanged on exit.
*
*
*  Auxiliary routine for Level 2 Blas.
*
*  -- Written on 20-July-1986
*     Richard Hanson, Sandia National Labs.
*     Jeremy Du Croz, Nag Central Office.
*
*     .. Parameters ..
      INTEGER                IOFF
      PARAMETER            ( IOFF=32 )
*     .. Intrinsic Functions ..
      INTRINSIC              ICHAR
*     .. Executable Statements ..
*
*     Test if the characters are equal
*
      LSAME = CA .EQ. CB
*
*     Now test for equivalence
*
      IF ( .NOT.LSAME ) THEN
         LSAME = ICHAR(CA) - IOFF .EQ. ICHAR(CB)
      END IF
*
      RETURN
*
*  The following comments contain code for CDC systems using 6-12 bit
*  representations.
*
*     .. Parameters ..
*     INTEGER                ICIRFX
*     PARAMETER            ( ICIRFX=62 )
*     .. Scalar Arguments ..
*     CHARACTER*1            CB
*     .. Array Arguments ..
*     CHARACTER*1            CA(*)
*     .. Local Scalars ..
*     INTEGER                IVAL
*     .. Intrinsic Functions ..
*     INTRINSIC              ICHAR, CHAR
*     .. Executable Statements ..
*
*     See if the first character in string CA equals string CB.
*
*     LSAME = CA(1) .EQ. CB .AND. CA(1) .NE. CHAR(ICIRFX)
*
*     IF (LSAME) RETURN
*
*     The characters are not identical. Now check them for equivalence.
*     Look for the 'escape' character, circumflex, followed by the
*     letter.
*
*     IVAL = ICHAR(CA(2))
*     IF (IVAL.GE.ICHAR('A') .AND. IVAL.LE.ICHAR('Z')) THEN
*        LSAME = CA(1) .EQ. CHAR(ICIRFX) .AND. CA(2) .EQ. CB
*     END IF
*
*     RETURN
*
*     End of LSAME.
*
      END
SHAR_EOF
cat << \SHAR_EOF > xerbla.f
      SUBROUTINE XERBLA ( SRNAME, INFO )
*     ..    Scalar Arguments ..
      INTEGER            INFO
      CHARACTER*6        SRNAME
*     ..
*
*  Purpose
*  =======
*
*  XERBLA  is an error handler for the Level 2 BLAS routines.
*
*  It is called by the Level 2 BLAS routines if an input parameter is
*  invalid.
*
*  Installers should consider modifying the STOP statement in order to
*  call system-specific exception-handling facilities.
*
*  Parameters
*  ==========
*
*  SRNAME - CHARACTER*6.
*           On entry, SRNAME specifies the name of the routine which
*           called XERBLA.
*
*  INFO   - INTEGER.
*           On entry, INFO specifies the position of the invalid
*           parameter in the parameter-list of the calling routine.
*
*
*  Auxiliary routine for Level 2 Blas.
*
*  Written on 20-July-1986.
*
*     .. Executable Statements ..
*
      WRITE (*,99999) SRNAME, INFO
*
      STOP
*
99999 FORMAT ( ' ** On entry to ', A6, ' parameter number ', I2,
     $         ' had an illegal value' )
*
*     End of XERBLA.
*
      END
*
SHAR_EOF
cd ..
cat << \SHAR_EOF > README

This directory contains three subdirectories rkpk/, demo/, lib/, and a
LaTeX file rkpk.tex.  rkpk/ collects RKPACK routines, demo/ collects a
few application routines illustrating the user interface of RKPACK,
lib/ collects public domain routines from BLAS, BLAS2, LINPACK, and
CMLIB which are called upon by routines in rkpk/ and demo/, and
rkpk.tex provides a brief description of the code.

The materials are bundled for UNIX users with access to LaTeX
facilities and Ratfor preprocessor.  Run `latex rkpk.tex' twice to
produce the rkpk.dvi file, and consult the local expert to print out
the rkpk.dvi file to get UW-Madison Statistics TR 857.  Enter rkpk/
and lib/ to make the *.o archives and enter demo/ to check out sample
programs.

A few bugs in the earlier releases have been corrected in this
release.  Please let me know of any further problems as you encounter
them.  Thanks much.

DISCLAIMER:  THE CODE IS PROVIDED WITH NO CHARGE AND NO WARRANTY AND
THE USERS USE THE CODE AT THEIR OWN RISK.  FREE DISTRIBUTION OF
MATERIALS IN THIS BUNDLE IS GRANTED FOR NONCOMMERCIAL PURPOSES
PROVIDED THAT NO CHANGE IS MADE.


Chong Gu
Department of Statistics
Purdue University
West Lafayette, IN 47907

chong@pop.stat.purdue.edu

April 18, 1992
SHAR_EOF
cat << \SHAR_EOF > rkpk.tex
% -*- rkpk.tex -*-
% This is the LaTeX source file of the Technical Report describing Rkpack.
% Please run latex twice to get the cross references right.  Permission 
% for distribution is granted providing that no change is made.
%
% Creator:        Chong Gu  [chong@stat.purdue.edu]
% Creation time:  Saturday April 18 1992

\documentstyle{article}
\marginparwidth 0pt
\oddsidemargin  0pt
\evensidemargin  0pt
\marginparsep 0pt
\topmargin   0pt
\textwidth   6.5in
\textheight  8.5 in

\newtheorem{lemma}{Lemma}
\newtheorem{thm}{Theorem}
\newtheorem{coro}{Corollary}
\newtheorem{ex}{Example}
\newtheorem{defi}{Definition}
\newtheorem{algo}{Algorithm}
\newcommand{\mbf}[1]{\mbox{\boldmath $#1$}}

\title{\bf Rkpack and its applications: \\ Fitting smoothing spline models}
\author{{Chong Gu}\thanks{Research supported by AFOSR under grant AFOSR-87-0171
               and by NASA under contract NAG5-316.} \\ {\it
Department of Statistics} \\ {\it University of Wisconsin-Madison}\\
{(Technical Report No.857)}}
\date{May 1989}

\begin{document}
\maketitle
\begin{abstract}\em

[Added April 1992: There has been some changes and additions to the
package since its first release.  Please refer to the notes appended
at the end for a brief update.]

A minipackage which forms a core for fitting various kinds of
smoothing spline models is presented.  The smoothing parameter(s) are
chosen by generalized cross validation (GCV) or generalized maximum
likelihood (GML).  The kernel of the algorithms is based on
Householder tridiagonalization with distributed truncation.  The
interface to numerically unsophisticated users is through two drivers
which handle single and multiple smoothing parameter least squares
problems respectively. The drivers can also be used in iterations for
fitting generalized spline models with non Gaussian data.  Examples
are provided to illustrate how to generate the inputs needed by the
drivers in various settings, including thin plate splines and
additive/interaction splines.  This code embodies some recent
algorithmic developments for computing smoothing spline models in
several variables.

Key Words: {\em generalized cross validation, smoothing parameter.}
\end{abstract}


\section{Introduction}
Smoothing spline techniques are widely used for modeling noisy data.
A simple example is the famous (natural) cubic spline (on $R^{1}$)
known as the solution $f_{\lambda}$ to the problem
\begin{equation}
\label{cubic}
{\bf min}\ \frac{1}{n} \sum_{j=1}^{n} (y_{j}-f(x_{j}))^{2} + \lambda
\int (f'')^2 ,
\end{equation}
where $y_{j}$ is response and $x_{j}$ is covariate.  As
$\lambda\longrightarrow 0$, $f_{\lambda}$ tends to interpolate $y_{j}$
at $x_{j}$, and as $\lambda\longrightarrow\infty$, $f_{\lambda}$
converges to the least square simple linear regression.  When
$\lambda\in(0,\infty)$, we say that $f_{\lambda}$ {\em smooths} the
data, and the {\em smoothing parameter} $\lambda$ controls the
tradeoff between the goodness-of-fit $\sum_{j=1}^{n}
(y_{j}-f(x_{j}))^{2}$ and the average curvature $\int(f'')^{2}$
(roughness) of the solution.

The problem (\ref{cubic}) can be recast in the more general form
\begin{equation}
\label{gene_spli}
{\bf min}\ \frac{1}{n}\sum_{j=1}^{n}(y_{j}-L_{j}f)^{2}+\lambda J(f),
\end{equation}
where $f\in{\cal H}$, a Hilbert space in which $L_{j}$'s are bounded
linear functionals, and $J(f)$ is the square of a semi-norm in ${\cal
H}$ which measures the ``roughness''.  A popular multivariate
specialization of (\ref{gene_spli}) is the thin plate smoothing
spline, see Wahba (1980) and references therein.  The explicit
solution to (\ref{gene_spli}) was worked out by Kimeldorf and Wahba
(1971).  A practical data-driven method for selecting $\lambda$, known
as the generalized cross validation (GCV) method, was proposed by
Craven and Wahba (1979).

Several generalizations of (\ref{gene_spli}) have been proposed in the
literature.  The partial spline technique splits the covariates into
two groups, and models the response additively with linear model on
one group and spline model on the other, see Wahba (1986) and
references therein.  When the roughness penalty $J(f)$ is decomposed
orthogonally to several components each bearing its own smoothing
parameter, we end up with a multiple smoothing parameter problem.  An
important instance of such a setting is the additive/interaction
spline technique, see Barry (1986) and Wahba (1986).  Actually, the
partial spline technique can also be viewed as splitting $J(f)$ to two
parts and assigning one of the two smoothing parameters to $\infty$.
For asymmetric and/or categorical responses, least square is no longer
the natural choice for measuring the goodness-of-fit.  By replacing
the least square by other goodness-of-fit scores, e.g., the log
likelihood, we obtain the generalized spline models, see O'Sullivan et
al. (1986) and Gu (1990).  These generalizations are not exclusive
with each other.

In this report, we review some recent algorithmic developments for
computing general smoothing spline models and the generalizations,
with the smoothing parameter(s) selected via the GCV method.  The
algorithms are of order $O(n^{3})$.  For the univariate spline models,
there exists a linear order algorithm, see Hutchinson and de Hoog
(1985) and O'Sullivan (1985), hence the proposed algorithms are not
competitive.  However, for multivariate smoothing spline models, the
proposed algorithms are believed to be the best available, see Gu et
al. (1989).

The algorithms proposed are coded by the author in a minipackage by
the name {\em Rkpack}.  An earlier version of Rkpack includes only the
single smoothing parameter driver based on the algorithm of Gu et al.
(1989).  After the multiple smoothing parameter algorithm of Gu and
Wahba (1991) has been developed, the corresponding driver is added to
Rkpack.  Besides, a GML (generalized maximum likelihood, see Wahba
(1985)) option is also provided in the new version and the user
interface and the internal communications of the existing routines are
reconstructed, and some of the routines are renamed.  Ratfor (rational
Fortran) is used as the programming language.  Linpack, Blas (Dongarra
et al., 1979) and Blas2 (Dongarra et al., 1986) routines are used as
building blocks whenever convenient.  The routines have been tested
for internal consistency and tested against Gcvpack (Bates et al.,
1987) on thin plate spline examples.

In Section~2, we sketch the derivation of the single smoothing
parameter algorithm and describe the corresponding Rkpack driver {\tt
dsidr}.  In Section~3 we discuss the algorithm tackling the multiple
smoothing parameter problems and explain the Rkpack driver {\tt
dmudr}.  Section~4 describes some applications of these algorithms.
Section~5 collects miscellaneous points.


\section{A general algorithm}
\subsection{Derivation}
\label{form_sin}
Suppose we observe
\begin{equation}
\label{formu}
y_{j}=L_{j}f+\epsilon_{j},\ \ \ \ \ j=1,\cdots,n
\end{equation}
where the $L_{j}$'s are bounded linear functionals in some Hilbert
space ${\cal H}$, and the $\epsilon_{j}$'s are independent noise with
mean 0 and variance $\sigma^{2}$ possibly unknown.  We solve the
variational problem
\begin{equation}
\label{spli_sin}
{\bf min}\ \frac{1}{n}
\sum_{j=1}^{n}(y_{j}-L_{j}f)^{2}+\lambda\|P_{1}f\|^{2}
\end{equation}
in the space ${\cal H}$, where $P_{1}$ is a projection operator to a
subspace ${\cal H}_{1}$ with codimension $M$, and $\|\cdot\|$ is the
norm in ${\cal H}$.  The solution $f_{\lambda}$ to (\ref{spli_sin}) is
called a spline in a general sense.  It has been derived by Kimeldorf
and Wahba (1971) that the solution is of form
\begin{equation}
\label{solu_sin}
f_{\lambda}=\sum_{j=1}^{n}c_{j}(P_{1}\xi_{j})+\sum_{\nu=1}^{M}d_{\nu}\phi_{\nu},
\end{equation}
where $\xi_{j}$ is the representer of $L_{j}$, and
$\{\phi_{\nu}\}_{\nu=1}^{M}$ span the null space of $P_{1}$, while
$\mbf{c}=(c_{1},\cdots,c_{n})^{T}$ and
$\mbf{d}=(d_{1},\cdots,d_{M})^{T}$ are the solutions to the
minimization problem
\begin{equation}
\label{mini_sin}
{\bf min}\ \frac{1}{n}\|\mbf{y}-S\mbf{d}-\tilde{Q}\mbf{c}\|^{2}
+\lambda\mbf{c}^{T}\tilde{Q}\mbf{c},
\end{equation}
where
\begin{eqnarray}
\tilde{Q} & = & (<P_{1}\xi_{j1},P_{1}\xi_{j2}>)  \nonumber \\
\label{gram_sin}
S & = & (L_{j}\phi_{\nu}) ,
\end{eqnarray}
with $<\!\cdot,\cdot\! >$ indicating the inner product in ${\cal H}$.
It can be shown (Wahba, 1984) that the solution to the linear system
\begin{eqnarray}
(\tilde{Q}+n\lambda I)\mbf{c}+S\mbf{d} & = & \mbf{y} \nonumber \\
\label{lsys}
S^{T}\mbf{c} & = & 0
\end{eqnarray}
is a minimizer of (\ref{mini_sin}), and when $\tilde{Q}$ is of full
rank, it is the unique minimizer.  As a matter of fact, the minimizer
of (\ref{mini_sin}) will always give a unique \mbf{d} and a unique
$\tilde{Q}\mbf{c}$, provided that $S$ is of full column rank.

For the partial spline model, we assume
\[
y_{j}=\mbf{u}_{j}^{T}\mbf{\beta}+L_{j}f+\epsilon ,
\]
where $\mbf{u}_{j}^{T}$ is known covariate at the $j$th observation
and \mbf{\beta} is the associated coefficient.  We solve
\[
{\bf min}\
\frac{1}{n}\sum_{j=1}^{n}(y_{j}-\mbf{u}_{j}^{T}\mbf{\beta}-L_{j}f)^{2}
+\lambda\|P_{1}f\|^{2},
\]
see, e.g., Wahba (1986).  Numerically, this formulation leads to
\begin{eqnarray*}
(\tilde{Q}+n\lambda I)\mbf{c}+S_{*}\mbf{d}_{*} & = & \mbf{y} \\
S_{*}^{T}\mbf{c} & = & 0 ,
\end{eqnarray*}
where $S_{*}=(U,S)$, $U=(\mbf{u}_{1},\cdots,\mbf{u}_{n})^{T}$, and
$\mbf{d}_{*}=(\mbf{\beta}^{T},\mbf{d}^{T})^{T}$.  As long as $S_{*}$
is of full column rank, the partial spline model creates no extra
numerical complexity.

The generalized cross validation (GCV) method works as follows.
Writing
\[
\hat{\mbf{y}}=(L_{1}f_{\lambda},\cdots,L_{n}f_{\lambda})^{T} = A(\lambda)\mbf{y},
\]
the GCV method seeks the $\lambda$ that minimizes
\[
V(\lambda)=\frac{(1/n)\|(I-A(\lambda))\mbf{y}\|^{2}} {[(1/n){\rm
tr}(I-A(\lambda))]^{2}} ,
\]
where $A(\lambda)$ is the so-called influence matrix.  Letting
%\begin{singlespace}
\[
S=FR=(F_{1},F_{2})\left(\begin{array}{c}R_{1}\\ 0\end{array}\right)
\]
%\end{singlespace}
be the QR-decomposition of $S$, it can be shown (Wahba, 1984) that
\[
I-A(\mbf{\lambda})=n\lambda F_{2}(F_{2}^{T}\tilde{Q}F_{2}+n\lambda
I)^{-1}F_{2}^{T},
\]
hence
\begin{equation}
\label{v_sin}
V(\lambda)=
\frac{(1/n)\mbf{z}^{T}(Q+n\lambda I)^{-2}\mbf{z}}
{[(1/n){\rm tr}(Q+n\lambda I)^{-1}]^{2}} ,
\end{equation}
where $\mbf{z}=F_{2}^{T}\mbf{y}$ and $Q=F_{2}^{T}\tilde{Q}F_{2}$.  The
GCV method was first proposed by Craven and Wahba (1979) and shown to
be asymptotically optimal for minimizing predictive mean square error
(Craven and Wahba, 1979; Li, 1986).

\subsection{Algorithm}
Under the above setting, we propose the following algorithm for
minimizing GCV score to select $\lambda$:
\begin{algo}
\label{algo_sin}
Given the inputs of matrices $S$, $\tilde{Q}$, and response vector
\mbf{y}, perform
\begin{enumerate}
\item Initialization:
\begin{enumerate}
\item Compute the QR-decomposition 
%\mbox{\begin{singlespace}
$S=FR=(F_{1},F_{2})\left(\begin{array}{c}R_{1}\\0\end{array}\right)$.
%\end{singlespace}}
\item Compute $\mbf{z}=F_{2}^{T}\mbf{y}$ and $Q=F_{2}^{T}\tilde{Q}F_{2}$.
\end{enumerate}
\item Tridiagonalization and minimization: 
\begin{enumerate}
\item Compute $Q=UTU^{T}$, where $U$ is orthogonal and $T$ is tridiagonal.
\item Compute $\mbf{x}=U^{T}\mbf{z}$.
\item Minimize
\begin{equation}
\label{v_sin_tri}
V(\lambda)=\frac{(1/n)\mbf{x}^{T}(T+n\lambda I)^{-2}\mbf{x}}
{[(1/n){\rm tr}(T+n\lambda I)^{-1}]^{2}}
\end{equation}
with respect to $\lambda$.
\end{enumerate}
\item Compute $(\mbf{c}, \mbf{d})\,|\,\lambda$.
\end{enumerate}
\end{algo}
The algorithm is designed after the problem (\ref{lsys}) instead of
(\ref{mini_sin}), and the numerical requirements on the inputs are
that $S$ be of full column rank and that $Q=F_{2}^{T}\tilde{Q}F_{2}$
be nonnegative definite.  The later condition is equivalent to
\[
S^{T}\mbf{c}=0 \ \ \Longrightarrow \ \ \mbf{c}^{T}\tilde{Q}\mbf{c}\geq
0.
\]
The formulation guarantees a unique numerical solution of coefficients
\mbf{c} and \mbf{d}, even in case the matrix $Q$ is numerically rank
deficient, which will occur when various kinds of replicates or near
replicates are present.  The kernel of the algorithm is step~2(a),
which is implemented via the Householder tridiagonalization with
distributed truncation.  More details about the algorithm can be found
in Gu et al. (1989).  See also Gu (1990).

\subsection{Rkpack driver {\tt dsidr}}
The driver {\tt dsidr} implements Algorithm~\ref{algo_sin}.  It
requires inputs of matrices $S$, $\tilde{Q}$, and a vector \mbf{y} and
solves
\begin{eqnarray*}
(\tilde{Q}+n\lambda I)\mbf{c}+S\mbf{d} & = & \mbf{y} \\ S^{T}\mbf{d} &
= & 0
\end{eqnarray*}
with $\lambda$ selected as the minimizer of $V(\lambda)$ given in
(\ref{v_sin}).  The inputs are destroyed on return.  The calling
sequence and the description of input/output arguments are to be found
in the self-documented Ratfor source code in the file {\tt dsidr.r}.

The driver {\tt dsidr} simply comprises three successive calls to the
routines {\tt dstup, dcore, {\rm and} dcoef}.  Table~\ref{atbl3}
describes the organizations of the three routines in parallel to
Algorithm~\ref{algo_sin}, where {\tt l-<string>} stands for Linpack
routines and {\tt r-<string>} stands for Rkpack routines.
\begin{table}
\caption{The structure of {\tt dsidr}}
\label{atbl3}
\begin{center}
\begin{tabular}{c|ll}\hline\hline
 & Building blocks & \hspace{10mm}Tasks\\ \hline {\tt dstup} & {\tt
l-dqrdc} & $S=FR=F_{1}R_{1}$ \\ & {\tt r-dqrslm} &
$Q=F_{2}^{T}\tilde{Q}F_{2}$, also $F_{1}^{T}\tilde{Q}F_{2}$\\ & {\tt
l-dqrsl} & $\mbf{z}=F_{2}^{T}\mbf{y}$, also $F_{1}^{T}\mbf{y}$\\
\hline {\tt dcore} & {\tt r-dsytr} & $Q=UTU^{T}$\\ & {\tt l-dqrsl} &
$\mbf{x}=U^{T}\mbf{z}$\\ & {\tt r-dgold/deval} & Search optimal
$n\lambda$\\ & {\tt r-dtrev} & Compute $V(\lambda)$ from $(T+n\lambda
I)$ and \mbf{x}\\ \hline {\tt dcoef} & {\tt l-dqrsl} &
$\mbf{c}=F_{2}U(T+n\lambda I)^{-1}\mbf{x}$\\ & {\tt l-dtrsl} &
$\mbf{d}=R_{1}^{-1}(F_{1}^{T}\mbf{y}-(F_{1}^{T}\tilde{Q}F_{2})U(T+n\lambda
I)^{-1}\mbf{x})$\\ \hline
\end{tabular}
\end{center}
\end{table}


\section{More smoothing parameters}
\subsection{Formulation}
\label{form_mul}
Under (\ref{formu}), we now specify a richer spline family.  Consider
an orthogonal decomposition of $\cal H$ into more than two components,
${\cal H}=\oplus_{i=0}^{k}{\cal H}_{i}$, $k>1$.  A direct
generalization of (\ref{spli_sin}) is
\begin{equation}
\label{spline_mul}
{\bf min}\ \frac{1}{n} \sum_{j=1}^{n} (y_{j}-L_{j}f)^{2} +
\sum_{i=1}^{k}\lambda_{i}\| P_{i} f \|^{2} ,
\end{equation}
where the $\lambda_{i}$'s are a set of smoothing parameters and
$P_{i}$ is the orthogonal projection operator onto ${\cal H}_{i}$.
Writing $\lambda_{i}=\lambda/\theta_{i}$, We can rewrite
(\ref{spline_mul}) as
\begin{equation}
\label{spline_sin_theta}
{\bf min}\ \frac{1}{n} \sum_{j=1}^{n} (y_{j}-L_{j}f)^{2} + \lambda\|
P_{*} f \|_{\mbf{\theta}}^{2} ,
\end{equation}
where $P_{*}=\sum_{i=1}^{k}P_{i}$ is the projection operator onto
${\cal H}_{*}=\oplus_{i=1}^{k}{\cal H}_{i}$, and
\[
\|f\|_{\mbf{\theta}}^{2}=\|P_{0}f\|^{2}+\sum_{i=1}^{k}\theta_{i}^{-1}\|P_{i}f\|^{2}
\]
is a modified norm indexed by \mbf{\theta}, where $\|\cdot\|$ is the
original norm.  It can be shown that the representer of $L_{j}$ under
the norm $\|\cdot\|_{\mbf{\theta}}$ is
\[
\xi_{j}^{\mbf{\theta}}=(P_{0}\xi_{j})+\sum_{i=1}^{k}\theta_{i}(P_{i}\xi_{j}) ,
\]
where $\xi_{j}$ is its representer under the norm $\|\cdot\|$.
Denoting $<\cdot,\cdot>$, $<\cdot,\cdot>_{\mbf{\theta}}$ as the inner
products corresponding to the norms $\|\cdot\|$,
$\|\cdot\|_{\mbf{\theta}}$ respectively, we have
\begin{equation}
\tilde{Q}_{*}^{\mbf{\theta}}=(<P_{*}\xi_{j_{1}}^{\mbf{\theta}},
P_{*}\xi_{j_{2}}^{\mbf{\theta}}>_{\mbf{\theta}})
=\sum_{i=1}^{k}\theta_{i}\tilde{Q}_{i} ,
\end{equation}
where $\tilde{Q}_{i}=(<P_{i}\xi_{j_{1}},P_{i}\xi_{j_{2}}>)$.  Thus the
solution to (\ref{spline_mul}) can be written as
\[
f_{\mbf{\lambda}}=\sum_{j=1}^{n}c_{j}\xi_{j}^{\mbf{\theta}}+
\sum_{\nu=1}^{M}d_{\nu}\phi_{\nu} ,
\]
with \mbf{c}, \mbf{d} determined by
\begin{equation}
\label{mini_mul}
{\bf min}\ \frac{1}{n} \|
\mbf{y}-S\mbf{d}-\tilde{Q}_{*}^{\mbf{\theta}}\mbf{c}\|^{2} + \lambda
\mbf{c}^{T}\tilde{Q}_{*}^{\mbf{\theta}}\mbf{c} .
\end{equation}
And the counterpart of (\ref{lsys}) is
\begin{eqnarray*}
(\tilde{Q}_{*}^{\mbf{\theta}}+n\lambda I)\mbf{c}+S\mbf{d} & = &
\mbf{y} \\ S^{T}\mbf{c} & = & 0 .
\end{eqnarray*}
With the QR-decomposition of
%\mbox{\begin{singlespace}
$S=FR=(F_{1},F_{2})\left(\begin{array}{c}R_{1}\\ 0\end{array}\right)$,
%\end{singlespace}}
the GCV score can be written as
\begin{equation}
\label{v_mul}
V(\mbf{\lambda})=V(\lambda,\mbf{\theta})=
\frac{(1/n)\mbf{z}^{T}(Q_{*}^{\mbf{\theta}}+n\lambda I)^{-2}\mbf{z}}
{[(1/n){\rm tr}(Q_{*}^{\mbf{\theta}}+n\lambda I)^{-1}]^{2}} ,
\end{equation}
where $\mbf{z}=F_{2}^{T}\mbf{y}$ and
\[
Q_{*}^{\mbf{\theta}}=F_{2}^{T}\tilde{Q}_{*}^{\mbf{\theta}}F_{2}
=\sum_{i=1}^{k}\theta_{i}(F_{2}^{T}\tilde{Q}_{i}F_{2})
=\sum_{i=1}^{k}\theta_{i}{Q}_{i} ,
\]
where $Q_{i}=F_{2}^{T}\tilde{Q}_{i}F_{2}$.

\subsection{Algorithms}
To minimize $V(\lambda,\mbf{\theta})$ with respect to \mbf{\theta} and
$\lambda$, we wish to iterate on the following cycle:
\begin{enumerate}
\item For fixed \mbf{\theta}, minimize 
$V(\lambda|\mbf{\theta})$ with respect to $n\lambda$.
\item Update \mbf{\theta} using information from the current estimates.
\end{enumerate}
Step~1 above can be achieved via Algorithm~\ref{algo_sin}.  To carry
out step 2, we will evaluate the gradient and the Hessian of
$V(\mbf{\theta}|\lambda)$ with respect to
$\mbf{\eta}=\log(\mbf{\theta})$, then apply the modified Newton method
(Gill et al., 1981) to update the \mbf{\eta}.  Choosing \mbf{\eta} as
the variables makes the optimization constraint-free and invariant
under arbitrary scaling of $\theta_{i}$'s.  The algorithm is specified
as follows:
\begin{algo}
\label{algo_mul}
Assuming the inputs of the matrices $S$, $\tilde{Q}_{i}$,
$i=1,\cdots,k$, the response vector \mbf{y}, and the starting values
$\mbf{\eta}_{0}$, perform
\begin{enumerate}
\item {\em Initialization:}
\begin{enumerate}
\item Compute the QR-decomposition of 
%\mbox{\begin{singlespace}
$S=FR=(F_{1}, F_{2})\left(\begin{array}{c} R_{1} \\ 0
\end{array}\right)$.
%\end{singlespace}}
\item Compute $\mbf{z}=F_{2}^{T}\mbf{y}$ and $Q_{i}=F_{2}^{T}\tilde{Q}_{i}F_{2}$.
\item Set $\Delta\mbf{\eta}=0$, $\mbf{\eta}_{-}=\mbf{\eta}_{0}$, $V_{-}=\infty$.
\end{enumerate}
\item {\em Iteration:}
\begin{enumerate}
\item For the current trial values $\mbf{\eta}=\mbf{\eta}_{-}+\Delta\mbf{\eta}$, 
collect $Q_{*}^{\mbf{\theta}}=\sum_{i=1}^{k} e^{\eta_{i}}Q_{i}$.
\item Compute $Q_{*}^{\mbf{\theta}}=U{T}U^{T}$, where $U$ is orthogonal and ${T}$ 
is tridiagonal.  Compute $\mbf{x}=U^{T}\mbf{z}$.
\item Minimize 
\begin{equation}
\label{v_tri}
V(\lambda|\mbf{\eta})=\frac{(1/n)\mbf{x}^{T}({T}+n\lambda
I)^{-2}\mbf{x}} {[(1/n){\rm tr}({T}+n\lambda I)^{-1}]^{2}}
\end{equation}
If $V>V_{-}$, set $\Delta\mbf{\eta}=\Delta\mbf{\eta}/2$, goto (a);
else proceed.
\item Evaluate the gradient $\mbf{g}=(\partial/\partial\mbf{\eta})V(\mbf{\eta}|\lambda)$ 
and the Hessian
$H=(\partial^{2}/\partial\mbf{\eta}\partial\mbf{\eta}^{T})V(\mbf{\eta}|\lambda)$.
Calculate the increment $\Delta\mbf{\eta}=-\tilde{H}^{-1}\mbf{g}$,
where $\tilde{H}=H+{\rm diag}(\mbf{e})$ is positive definite.  If $H$
itself is positive definite ``enough'', \mbf{e} is simply 0.
\item Check convergence conditions.  If the conditions fail, set 
$\mbf{\eta}_{-}=\mbf{\eta}$, $V_{-}=V$, goto (a); else proceed.
\end{enumerate}
\item {\em Calculate the optimal model:}
\begin{enumerate}
\item If $\Delta\eta_{i}<-\gamma$, set $\eta_{i}=-\infty$, where $\gamma$ is a 
``large'' number, say, $\gamma\in(.5,.9)$.
\item Collect $Q_{*}^{\mbf{\theta}}=\sum_{i=1}^{k} e^{\eta_{i}}Q_{i}$.  Calculate the model
minimizing $V(\lambda|\mbf{\eta})$.
\end{enumerate}
\end{enumerate}
\end{algo}
A heuristic starting value procedure is proposed as
\begin{algo}
\label{start}
If no starting values are specified for Algorithm~\ref{algo_mul}, we
perform by default:
\begin{enumerate}
\item Set $\tilde{\theta}_{i}=({\em tr}(Q_{i}))^{-1}$, 
fit the one smoothing parameter spline model by minimizing
$V(\lambda|\mbf{\theta})$, calculate the parameter \mbf{c}.
\item Calculate $\theta_{i0}=\| P_{i}f_{\mbf{\lambda}}\|^{2}
=\tilde{\theta}_{i}^{2}\mbf{c}^{T}\tilde{Q}_{i}\mbf{c}$, and set the
starting values for Algorithm~\ref{algo_mul} to be
$\eta_{i0}=\log(\theta_{i0})$.
\end{enumerate}
\end{algo}
Technical details of the algorithms can be found in Gu and Wahba
(1991).

\subsection{Rkpack driver {\tt dmudr}}
The driver {\tt dmudr} implements Algorithm~\ref{algo_mul}.  It
requires inputs of matrices $S$, $\tilde{Q}_{i}$, and a vector \mbf{y}
and solves
\begin{eqnarray*}
(\tilde{Q}_{*}^{\mbf{\theta}}+n\lambda I)\mbf{c}+S\mbf{d} & = &
\mbf{y} \\ S^{T}\mbf{d} & = & 0
\end{eqnarray*}
with \mbf{\theta} and $\lambda$ selected as the minimizer of
$V(\lambda)$ given in (\ref{v_mul}).  The inputs are destroyed on
return.  The calling sequence and the description of input/output
arguments are to be found in the self-documented Ratfor source code in
the file {\tt dmudr.r}.  The organization of the driver {\tt dmudr} is
sketched in Table~\ref{atbl5}.
\begin{table}[htb]
\caption{The structure of {\tt dmudr}}
\label{atbl5}
\begin{center}
\begin{tabular}{c|ll}\hline\hline
 & Building blocks & \hspace{10mm}Tasks\\ \hline Pre-iteration & {\tt
r-dstup} & $S=FR=F_{1}R_{1}$, $Q_{i}=F_{2}^{T}\tilde{Q}_{i}F_{2}$,
$\mbf{z}=F_{2}^{T}\mbf{y}$\\ & {\tt r-dcore,dcoef} &
Algorithm~\ref{start}\\ \hline Iteration & {\tt r-dcore} & Minimize
$V(\lambda|\mbf{\theta})$\\ & {\tt r-ddeev} & Gradient and Hessian of
$V(\mbf{\theta}|\lambda)$\\ & {\tt r-dmcdc,l-dposl} & Modified Newton
update\\ \hline Post-iteration & {\tt r-dcore,dcoef} & Return
$(\mbf{c}, \mbf{d})\,|\,(\mbf{\theta},\lambda)$\\ \hline
\end{tabular}
\end{center}
\end{table}


\section{Examples}
\subsection{Thin plate splines on $E^{d}$}
On the space of $d$-variable functions which have all $m$th square
integrable derivatives, we define $P_{1}$ in (\ref{spli_sin}) as the
projector to the subspace ${\cal H}_{1}$ of functions with at least
one non-vanishing $m$th derivative.  The null space is composed of
polynomials of up to $(m-1)$ total order, which is of dimension
%\begin{singlespace}
\[
M=\left(
\begin{array}{c} 
d+m-1 \\ d
\end{array} 
\right) .
\]
%\end{singlespace}
We endow the subspace ${\cal H}_{1}$ with the norm
\[
\| P_{1} f \|^{2} = J_{m}^{d}(f) = \sum_{\alpha_{1}+\cdots+\alpha_{d}=m}
\frac{m!}{\alpha_{1}!\cdots\alpha_{d}!} \int \cdots \int 
\left( \frac{\partial^{m}f}{\partial x_{1}^{\alpha_{1}}\cdots
\partial x_{d}^{\alpha_{d}}} \right)^{2} dx_{1}\cdots dx_{d} .
\]
Letting $L_{j}f=f({\bf x}_{j})$ be the evaluation functionals, it is
known that $L_{j}$'s are bounded when $2m-d>0$.  This specialization
of (\ref{spli_sin}) results in the thin plate splines on $E^{d}$.

Given the design points ${\bf x}_{j}$, $j=1,\cdots,n$, which result in
a unique least square solution when $\lambda=\infty$, it can be shown
(see Wahba and Wendelberger (1980)) that the thin plate spline has an
expression
\[
f_{\lambda}(\cdot)=\sum_{\nu=1}^{M}d_{\nu}\phi_{\nu}(\cdot)+
\sum_{j=1}^{n}c_{j}E_{m}(|{\bf x}_{j}-\cdot|),
\]
where $|\cdot|$ is the Euclidean distance in $E^{d}$, and \mbf{c} and
\mbf{d} are the solutions to the constrained minimization problem
\begin{equation}
\label{minc}
{\bf min}\ \frac{1}{n}\|\mbf{y}-S\mbf{d}-K\mbf{c}\|^{2}
+\lambda\mbf{c}^{T}K\mbf{c} \ \ \ \ \ \ \ s.t. \ \ S^{T}\mbf{c}=0 ,
\end{equation}
where $K=(E_{m}(|{\bf x}_{j1}-{\bf x}_{j2}|))$, and
%\begin{singlespace}
\[
E_{m}(\cdot)=
\left\{\begin{array}{ll}
\theta_{m}(\cdot)^{2m-d}\log(\cdot), & d\ {\rm even},\\
 &
\theta_{m}={(-1)^{d/2+m+1}}\,/\,({2^{2m-1}\pi^{d/2}(m-1)!(m-d/2)!})\\
\theta_{m}(\cdot)^{2m-d}, & d\ {\rm odd},\\
 & \theta_{m}={\Gamma(d/2-m)}\,/\,({2^{2m}\pi^{d/2}(m-1)!})
\end{array}\right.
\]
%\end{singlespace}
The null space basis $\{\phi_{\nu}\}_{1}^{M}$ can be chosen as
$\{x_{1}^{a_{1}}\cdots
x_{d}^{a_{d}}\}_{a_{1},\cdots,a_{d}=0}^{a_{1}+\cdots+a_{d}<m}$.  The
solution to the linear system
\begin{eqnarray}
(K+n\lambda I)\mbf{c}+S\mbf{d} & = & \mbf{y} \nonumber \\
\label{lsys_tp}
S^{T}\mbf{c} & = & 0
\end{eqnarray}
is a solution to the constrained minimization problem (\ref{minc}).
Since
\begin{equation}
\label{k}
S^{T}\mbf{c}=0 \ \ \Longrightarrow \ \ \mbf{c}^{T}K\mbf{c}\geq 0 ,
\end{equation}
Algorithm~\ref{algo_sin} applies with $K$ replacing $\tilde{Q}$.  Thin
plate splines can be equivalently specified via
(\ref{solu_sin})--(\ref{gram_sin}), see Wahba and Wendelberger (1980),
though the above formulation is easier to work with.

\subsection{Splines in reproducing kernel Hilbert spaces}
\label{rkhs_spli}
The thin plate splines on $E^{d}$ are instances of splines in the
reproducing kernel Hilbert spaces (r.k.h.s.).  A r.k.h.s. is a Hilbert
space of functions on some domain ${\cal T}$ where all the evaluation
functionals are bounded.  As a direct consequence of the Riesz
representation theorem, a r.k.h.s. ${\cal H}$ has a unique reproducing
kernel (r.k.) $R(\cdot,\cdot)$ such that $R(t,\cdot)$ is the
representer of $[t](\cdot)$, $\forall t\in{\cal T}$.  For ${\cal
H}={\cal H}_{0}\oplus{\cal H}_{1}$, each of ${\cal H}_{i}$, $i=0,1$,
is also a r.k.h.s., and
$R(\cdot,\cdot)=R_{0}(\cdot,\cdot)+R_{1}(\cdot,\cdot)$, where
$R_{i}(\cdot,\cdot)$, $i=0,1$, are r.k.'s of ${\cal H}_{i}$.  If the
$L_{j}$'s in (\ref{formu}) are taken as $[t_{j}](\cdot)$, it is easy
to see that $P_{1}\xi_{j}=R_{1}(t_{j},\cdot)$ and
$\tilde{Q}=(R_{1}(t_{j1},t_{j2}))$.  Hence the computation will be
very straightforward given $R_{1}(\cdot,\cdot)$.  More details about
r.k.h.s.  can be found in Aronszajn (1950).  Since most practical
applications of the smoothing spline technique is in the context of
r.k.h.s., we have named our minipackage as Rkpack.

\subsection{Additive/interaction splines}
We sketch a formulation by Gu et al. (1989) and Gu and Wahba (1991),
which is originated in Wahba (1986) based on the tensor product
r.k.h.s.


Taking the component space (on $[0,1]$) as the r.k.h.s.
\[
W_{2}^{m}=\{f:f^{(\nu)} abs. cont., \nu=0,\cdots,m-1,\int
(f^{(m)})^{2}<\infty\}
\]
with norm
\[
\|f\|^{2}=\sum_{\nu=0}^{m-1}(\int_{0}^{1}f^{(\nu)})^{2}+\int_{0}^{1}(f^{(m)})^{2},
\]
we let ${\cal H}$ be the tensor product Hilbert space ${\cal
H}=\otimes_{l=1}^{d}{\cal H}^{l}$, with ${\cal H}^{l}=W_{2}^{m}$.  See
Aronszajn (1950) for technical details on forming tensor product
spaces.  We decompose $W_{2}^{m}={\cal N}\oplus{\cal
P}_{m-1}\oplus{\cal S}_{m}$, where ${\cal N}$ is the space of constant
with square norm $(\int_{0}^{1}f)^{2}$, ${\cal P}_{m-1}$ is the space
of polynomials of degrees less than $m$ which integrate to zero, with
square norm $\sum_{\nu=1}^{m-1}(\int_{0}^{1}f^{(\nu)})^{2}$, and
${\cal S}_{m}$ is the space of functions with square integrable $m$th
derivative and satisfy $\int_{0}^{1}f^{(\nu)}=0$, $\nu=0,\cdots,m-1$,
with square norm $\int_{0}^{1}(f^{(m)})^{2}$.  When $m=1$, the space
${\cal P}_{0}$ vanishes.  The space
\[
{\cal H}=\otimes_{l=1}^{d}{\cal H}^{l}=
\otimes_{l=1}^{d}({\cal N}^{l}\oplus{\cal P}_{m-1}^{l}\oplus{\cal S}_{m}^{l})
\]
can be represented as the direct sum of $3^{d}$ orthogonal subspaces
when $m>1$, and $2^{d}$ subspaces when $m=1$.  These components can be
interpreted as main effects and interaction effects, and they can be
grouped to share common smoothing parameters at convenience.  When the
smoothing parameters for all interaction effects are set to $\infty$,
we obtain the additive models.

To calculate the $\tilde{Q}_{i}$'s, we recall the results (see Craven
and Wahba (1979)) that the reproducing kernels for subspaces $\cal N$,
${\cal P}_{m-1}$, and ${\cal S}_{m}$ are $R_{\cal N}(s,t)=1$,
$R_{{\cal P}_{m-1}}(s,t)=\sum_{\nu=1}^{m-1}k_{\nu}(s)k_{\nu}(t)$, and
$R_{{\cal S}_{m}}(s,t)=k_{m}(s)k_{m}(t)+(-1)^{m-1}k_{2m}(s-t)$
respectively, where $k_{\nu}(\cdot)=B_{\nu}(\cdot)/\nu!$ and
$B_{\nu}(\cdot)$ is the $\nu$th Bernoulli polynomial.  Since the r.k.
of the tensor product space is the product of the r.k.'s of the
component spaces (Aronszajn, 1950), using the results cited in
Subsection~\ref{rkhs_spli}, the computation formulas for the
$\tilde{Q}_{i}$'s are in order.  For example, the $\tilde{Q}$
corresponding to the space ${\cal S}_{2}^{1}\otimes{\cal P}_{1}^{2}
\otimes{\cal N}^{3}$ will be 
$(R_{{\cal S}_{2}}(x_{1,j1},x_{1,j2})R_{{\cal
P}_{1}}(x_{2,j1},x_{2,j2}))$, where $x_{i,j}$ denotes the $i$th
coordinate of the $j$th ``design point''.

Numerical examples applying Algorithm~\ref{algo_mul} and
Algorithm~\ref{start} to fit additive/interaction spline models can be
found in Gu and Wahba (1991).


\section{Miscellaneous}
\begin{enumerate}
\item {\em Generalized maximum likelihood:} Rkpack provides an option for using
the GML criterion instead of the GCV criterion to select the
$\lambda_{i}$'s.  The GML criterion is based on the Bayesian
interpretation of the smoothing spline models (Wahba, 1978; Wahba,
1985).  It minimizes
\[
M(\mbf{\lambda})=M(\lambda,\mbf{\theta})=
\frac{\mbf{y}^{T}(I-A(\mbf{\lambda}))\mbf{y}/n}
{[{\rm det}^{+}(I-A(\mbf{\lambda}))]^{1/(n-M)}}=
\frac{\mbf{z}^{T}(Q_{*}^{\mbf{\theta}}+n\lambda I)^{-1}\mbf{z}/n}
{[{\rm det}(Q_{*}^{\mbf{\theta}}+n\lambda I)^{-1}]^{1/(n-M)}},
\]
where ${\rm det}^{+}(\cdot)$ indicates the product of nonzero
eigenvalues.  For a derivation of GML, see Wahba (1985).  For
algorithmic details, see Gu et al. (1989) and Gu and Wahba (1991), or
Gu (1989).
\item {\em Variance estimate:} Rkpack drivers also return a variance estimate
$\hat{\sigma}^{2}$.  For the GCV criterion, it is
\[
\hat{\sigma}^{2}=\frac{(1/n)\|(I-A(\mbf{\lambda}))\mbf{y}\|^{2}}
{(1/n){\rm tr}(I-A(\mbf{\lambda}))}=
\frac{(n\lambda)\mbf{z}^{T}(Q_{*}^{\mbf{\theta}}+n\lambda I)^{-2}\mbf{z}}
{{\rm tr}(Q_{*}^{\mbf{\theta}}+n\lambda I)^{-1}} ,
\]
and for the GML criterion it is
\[
\hat{\sigma}^{2}=\mbf{y}^{T}(I-A(\mbf{\lambda}))\mbf{y}/(n-M)=
(n\lambda)\mbf{z}^{T}(Q_{*}^{\mbf{\theta}}+n\lambda
I)^{-1}\mbf{z}/(n-M).
\]
See Wahba (1983) and Wahba (1985) for the derivations of these
estimates.
\item {Generalized spline models:} The algorithms implemented in Rkpack can also
be applied to efficiently perform the iterations for minimizing the
penalized likelihood score encountered in the generalized spline
models, with the smoothing parameter(s) adjusted optimally along the
iterations via the GCV criterion.  See Gu (1990) for details.
\item {\em Generalized ridge regression:} The Rkpack drivers are {\em not} designed
for the more general settings of the generalized ridge regression
problems, which include the penalized regression spline models.  This
is because that in the algorithms we have used some unique structure
of the smoothing spline models which is not shared by the more general
problems.  To solve the generalized ridge regression problems, Gcvpack
(Bates et al., 1987) is recommended.  However, the numerical
efficiency of Gcvpack can be improved via the kernel algorithm used in
Rkpack, i.e., step~2 of Algorithm~\ref{algo_sin}, see Gu et al. (1989)
and Gu (1989).
\end{enumerate}

The self-documented source code is available from the author at {\tt
gu@stat.wisc.edu}.  The code is provided {\it as is} and bears {\it
absolutely no guarantee}.  The users are encouraged to forward
complaints and suggestions to the author at the above address.


\section*{Acknowledgements}
Some of the works summarized here are joint with Doug Bates, Zehua
Chen, and Grace Wahba, to whom I owe thanks.  I also thank Fred Reames
for his help in testing the routines against Gcvpack.


\begin{thebibliography}{}

\item
Aronszajn, N. (1950).
\newblock ``Theory of reproducing kernels''.
\newblock {\em Trans. Amer. Math. Soc.}, 68, 337 -- 404.

\item
Barry, D. (1986).
\newblock ``Nonparametric {B}ayesian regression''.
\newblock {\em Ann. Statist.}, 14, 934 -- 953.

\item
Bates, D. M., M.~Lindstrom, G.~Wahba, and B.~Yandell (1987).
\newblock ``Gcvpack -- routines for generalized cross validation''.
\newblock {\em Commun. Statist.-Simula.}, 16, 263 -- 297.

\item
Craven, P. and G.~Wahba (1979).
\newblock ``Smoothing noisy data with spline functions: estimating the correct
  degree of smoothing by the method of generalized cross-validation''.
\newblock {\em Numer. Math.}, 31, 377 -- 403.

\item
Dongarra, J. J., C. B. Moler, J. R. Bunch, and G. W. Stewart (1979).
\newblock {\em LINPACK User's Guide}.
\newblock SIAM, Philadelphia.

\item
Dongarra, J.~J., J.~{Du Croz}, S.~Hammarling, and R.~J. Hanson (1986).
\newblock ``An extended set of {F}ortran basic linear algebra subroutines''.
\newblock Technical Report~41, Mathematics and Computer Science Division,
  Argonne National Laboratory, Argonne.

\item
Gill, P. E., W.~Murray, and M. H. Wright (1981).
\newblock {\em Practical Optimization}.
\newblock Academic Press.

\item
Gu, C. (1989).
\newblock {\em Computing Smoothing Spline Models}.
\newblock PhD thesis, University of Wisconsin-Madison.

\item
\leavevmode\vrule height 2pt depth -1.6pt width 23pt\  (1990).
\newblock ``Adaptive spline smoothing in non Gaussian regression models''.
\newblock {\it J. Amer. Statist. Assoc.}, 85, 801 -- 807.

\item
Gu, C., D. M. Bates, Z.~Chen, and G.~Wahba (1989).
\newblock ``The computation of {GCV} functions through householder
  tridiagonalization with application to the fitting of interaction
spline models''.
\newblock {\em SIAM J. Matrix Anal. Applic.}, 10, 457 -- 480

\item
Gu, C. and G.~Wahba (1991).
\newblock ``Minimizing {GCV/GML} scores with multiple smoothing parameters via
  the {N}ewton method''.
\newblock {\em SIAM J. Sci. Statist. Comput.}, 12, 383 -- 398.

\item
Hutchinson, M. and F.~de~Hoog (1985).
\newblock ``Smoothing noisy data with spline functions''.
\newblock {\em Numer. Math.}, 47, 99 -- 106.

\item
Kimeldorf, G. and G.~Wahba (1971).
\newblock ``Some results on {T}chebycheffian spline functions''.
\newblock {\em J. Math. Anal. Applic.}, 33, 82--85.

\item
Li, {K.-C.} (1986).
\newblock ``Asymptotic optimality of $c_{L}$ and generalized cross-validation
  in the ridge regression with application to spline smoothing''.
\newblock {\em Ann. Statist.}, 14, 1101 -- 1112.

\item
O'Sullivan, F. (1985).
\newblock ``Comment on '{S}ome aspects of the spline smoothing approach to
  nonparametric regression curve fitting' by {B}.~{S}ilverman''.
\newblock {\em J. R. Statist. Soc. Ser. {B}}, 47, 39 -- 40.

\item
O'Sullivan, F., B.~Yandell, and W.~Raynor (1986).
\newblock ``Automatic smoothing of regression functions in generalized linear
  models''.
\newblock {\em J. Amer. Statist. Assoc.}, 81, 96 -- 103.

\item
Wahba, G. (1978).
\newblock ``Improper priors, spline smoothing and the problem of guarding
  against model errors in regression''.
\newblock {\em J. R. Statist. Soc., Ser. {B}}, 40, 364 -- 372.

\item
\leavevmode\vrule height 2pt depth -1.6pt width 23pt\  (1983).
\newblock ``Bayesian ``confidence intervals'' for the cross-validated smoothing
  spline''.
\newblock {\em J. R. Statist. Soc. Ser. B}, 45, 133--150.

\item
\leavevmode\vrule height 2pt depth -1.6pt width 23pt\  (1984).
\newblock ``Surface fitting with scattered noisy data on {E}uclidean $d$-space
  and on the sphere''.
\newblock {\em Rocky Mountain J. Math.}, 14, 281 -- 299.

\item
\leavevmode\vrule height 2pt depth -1.6pt width 23pt\  (1985).
\newblock ``A comparison of {GCV} and {GML} for choosing the smoothing
  parameter in the generalized spline smoothing problem''.
\newblock {\em Ann. Statist.}, 13, 1378 -- 1402.

\item
\leavevmode\vrule height 2pt depth -1.6pt width 23pt\  (1986).
\newblock ``Partial and interaction splines for the semiparametric estimation
  of functions of several variables''.
\newblock In Boardman, T.~J., ed., {\em Computer Science and Statistics:
  Proceedings of the 18th Symposium on the interface}, pp.~75 -- 80,
Washington, D.C. Amer. Statist. Assoc.

\item
Wahba, G. and J.~Wendelberger (1980).
\newblock ``Some new mathematical methods for variational objective analysis
  using splines and cross validation''.
\newblock {\em Monthly Weather Review}, 108, 1122 -- 1145.

\end{thebibliography}


\section*{Notes added April 1992}
It has been almost three years since the first public release of
Rkpack via netlib in the summer of 1989.  The version in my personal
archive has been changing ever since as I am learning better ways of
coding, fixing/creating bugs discovered by users (including myself),
and finding new applications for the code.  A revised version with
simpler calling sequences for {\tt dsidr} and {\tt dmudr} was released
to netlib in June 1991, in which two utility routines were also added
to facilitate the calculation of Bayesian ``confidence intervals'' as
a precision assessment for the models fitted by using {\tt dsidr} and
{\tt dmudr}.  Some further changes have been made since then,
including a new option of smoothing parameter selection method and the
fixing of newly found bugs.  In response to users'
suggestions/complaints about the user-friendly-ness of the package, I
also collected a few sample application programs to illustrate the use
of Rkpack drivers and utility routines.  This release incorporates
these changes/additions and the following notes are intended to
supplement/update the information presented in the main body of this
document.
\begin{enumerate}
\item {\it Changes in the main body of this document:}  No attempt was 
made to update the TR except that the calling sequences of {\tt dsidr}
and {\tt dmudr} appearing in the original version are removed and that
some references are updated.  The calling sequences in the original
version of this document caused some confusion to the users of the
revised code, and probably shouldn't have been in this document in the
first place since the information changes as the code evolves and
should be obtained directly from the self-documented code.
\item {\it New option for smoothing parameter selection:}  When the
variance $\sigma^{2}$ is known in the model, the smoothing parameters
minimizing the unbiased risk estimate of Craven and Wahba (1979)
\[
U(\lambda)=\|(I-A(\lambda))\mbf{y}\|^{2}/n+2\sigma^{2}{\rm{tr}}A(\lambda)/n
\]
perform as well as or better than the GCV selected smoothing
parameters.  A new option URE is added to the existing list of GCV and
GML.  This option could be useful when using the code to fit non
Gaussian models; see Gu (1991).
\item {\it Utility routines for calculating posterior covariances:}
Two utility routines {\tt dcrdr} and {\tt dsms}, which are to be used
in conjunction with {\tt dsidr}, are added to facilitate the
calculation of posterior covariances of spline components useful for
assessing the accuracy of the fit.  Technical details are to be found
in Gu and Wahba (1992).  See also Wahba (1983) and Nychka (1988).
\item {\it Application programs and public domain routines:}  This 
release has three subdirectories: {\tt rkpk}, {\tt demo}, and {\tt
lib}.  {\tt rkpk} collects Rkpack routines.  {\tt demo} collects a set
of application programs illustrating the use of Rkpack drivers and
utility routines, including a program for cubic splines on $[0,1]$ (in
{\tt cubic.r}), a program for thin-plate splines on $E^{2}$ with $m=2$
(in {\tt thin.r}), a program for a tensor-product spline model on
$[0,1]^3$ (in {\tt tensor.r}) and its supplement (in {\tt tensor1.r}),
and a program for tensor-product thin-plate spline model of Gu and
Wahba (1993) on $E\times{E}^{2}$ (in {\tt tptp.r}) and its supplement
(in {\tt tptp1.r}).  The programs are all briefly commented.  To
facilitate the installation of Rkpack, {\tt lib} collects the Blas,
Blas2, and Linpack routines called upon directly or indirectly by
Rkpack routines, together with two Cmlib random number generators used
in the application programs.  Makefiles are provided in all the
subdirectories.  This documents sits in the main directory.
\end{enumerate}

I am currently at Department of Statistics, Purdue University, {\tt
chong@stat.purdue.edu}.  I thank Grace Wahba and Feng Gao for the many
suggestions which helped to shape the current version.  Listed below
are the extra references quoted in the notes and an authoritative
monograph on spline smoothing by Wahba (1990).

\begin{thebibliography}{}

\item
Gu, C. (1991).
\newblock ``A note on cross-validating non Gaussian data''.
\newblock {\em J. Comp. Graph. Statist.}, 1, 000 -- 000.

\item
Gu, C. and Wahba, G. (1992).
\newblock ``Smoothing spline ANOVA with component-wise Bayesian
  `confidence intervals' ''.
\newblock Technical Report 881 (Rev.), Dept. of Statistics, University
  of Wisconsin, Madison.

\item
\leavevmode\vrule height 2pt depth -1.6pt width 23pt\  (1993).
\newblock ``Semiparametric ANOVA with Tensor Product Thin Plate Splines''.
\newblock {\em J. R. Statist. Soc. Ser. {B}}, 55, 000 -- 000.
\newblock (Purdue TR 90-61)

\item
Nychka, D. (1988).
\newblock ``Bayesian confidence intervals for smoothing splines''.
\newblock {\em J. Amer. Statist. Assoc.}, 83, 1134 -- 1143.

\item
Wahba, G. (1990).
\newblock {\em Spline Models for Observational Data}.
\newblock CBMS-NSF Regional Conference Series in Applied Mathematics, Vol. 59.
\newblock SIAM.

\end{thebibliography}

\end{document}
SHAR_EOF
cd ..
#	End of shell archive
exit 0



.