ccft i=vec,b=bvec,l=lvec
cldr bin=bvec,x=xvec
      program vector
CVD$G NOCONCUR
c  This program times a series of vector operations.
      include 'vector.h'
      character*80 arg
      external testem
      dimension iota(16)
      data iota /1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16/
c
c     Initilize.
c
c      call link('unit6=(vec.out,create,hc)//')
c      open( 6, file='vec.out')
      if( iargc().ne.2 ) then
         call getarg(0,arg)
         write(0,1000) arg
         call exit( 1 ) 
      endif
      call getarg(1,arg)
      read(arg,*) nproc
      call getarg(2,arg)
      read(arg,*) lenvec
      if( nproc.lt.1 .or. nproc.gt.8 .or.
     $     lenvec.lt.nproc .or. lenvec.gt.LEN ) then
         write(0,1010) nproc, lenvec
         call exit( 1 ) 
      endif
c
c         Set up the multiprocessing environment.
c
      CALL LGENABLE
      CALL LGOPEN( logbufr, LENBUF, 'trace.vector', -1 )
      CALL LGON
      call barasgn( ibar1, nproc )
      call barasgn( ibar2, nproc )
      call lockasgn( lokvec )
c
c         Initilize the data
c
      nd = 5
      do 10 i = 1, 1025
         x(i) = 1.0
         y(i) = 1.0
         a0(i) = 1.0
         a1(i) = 1.0
         a2(i) = 1.0
         a3(i) = 1.0
         a4(i) = 1.0
 10   continue
c
c         Do the fork.
c
CVD$ NOCONCUR
CVD$ NOVECTOR
      do iproc = 2, nproc
         my_tskarry(iproc,1) = 3
         call tskstart( my_tskarry(iproc,1), testem,
     $        lenvec, iota(iproc), nproc )
      enddo
      call testem( lenvec, iota(1), nproc )
c
c         Do the Join.
c
CVD$ NOCONCUR
CVD$ NOVECTOR
      do iproc = 2, nproc
         call tskwait( my_tskarry(iproc,1) )
      enddo
c
c         Release the multiprocessing resources and exit.
c
      call barrel( ibar1 )
      call barrel( ibar2 )
      call lockrel( lokvec )
      call lgoff
      call lgclose
      call exit( 0 )
 1000 format(1x,a,': usage: vector nproc (1,8)',
     $     ' vec_length (nproc,1025)')
 1010 format(' Input error nproc = ',i10,' vec_len =',i10)
      end
      subroutine testem(lenvec, iproc, nproc)
      common /VecTime/ ttot, lokvec, ibar1, ibar2
      integer info(10)
c
      ttot = 0.0
      call barsync( ibar2 )
      call op1m3(lenvec, iproc, nproc)
      info(1) = 10
      call lgentry( 10, info, 1 )
      call barsync( ibar1 )
      if( iproc.eq.1 ) then
         ttot = 0.01*ttot
         write(6,3000) lenvec, ttot, float(lenvec)*1.0e-6/ttot
         ttot = 0.0
      endif
      call op2m3(lenvec, iproc, nproc)
      info(1) = 11
      call lgentry( 11, info, 1 )
      call barsync( ibar1 )
      if( iproc.eq.1 ) then
         ttot = 0.01*ttot
         write(6,3010) lenvec, ttot, float(lenvec)*2.0e-6/ttot
         ttot = 0.0
      endif
      call op5m3(lenvec, iproc, nproc)
      info(1) = 12
      call lgentry( 12, info, 1 )
      call barsync( ibar2 )
      if( iproc.eq.1 ) then
         ttot = 0.01*ttot
         write(6,3020) lenvec, ttot, float(lenvec)*5.0e-6/ttot
         ttot = 0.0
      endif
      call mmult( lenvec, iproc, nproc)
      info(1) = 13
      call lgentry( 13, info, 1 )
      call barsync( ibar1 )
      if( iproc.eq.1 ) then
         ttot = 0.01*ttot
         iflop = 5*lenvec + 12*(lenvec-(n1+1))
         write(6,3030) lenvec, ttot, float(iflop)*1.0e-6/ttot
         ttot = 0.0
      endif
      return
 3000 format(' 3M1O    Length ',i4,' Time ',e16.7,' Mflops ',e16.7)
 3010 format(' 3M2O    Length ',i4,' Time ',e16.7,' Mflops ',e16.7)
 3020 format(' 3M5O    Length ',i4,' Time ',e16.7,' Mflops ',e16.7)
 3030 format(' MatMult Length ',i4,' Time ',e16.7,' Mflops ',e16.7)
      end
      subroutine op1m3(lenvec, iproc, nproc)
      include 'vector.h'
c
c     Simple multiply operation.  3 Mem Ref 1 Operation.
c
CVD$ NOCONCUR
      tin = second( dummy )
      do 30 irep = 1, 100
         do 20 i = iproc, lenvec, nproc
            a0(i) = x(i)*y(i)
 20      continue
 30   continue
      tout = second( dummy )
      call lockon( lokvec )
      ttot = ttot + max(tout-tin,0.0)
      call lockoff( lokvec )
      return
      end
      subroutine op2m3(lenvec,iproc,nproc)
      include 'vector.h'
c
c     Saxpy operation.  3 Mem Ref 2 Operations.
c
CVD$ NOCONCUR
      tin = second( dummy )
      do 130 irep = 1, 100
         do 120 i = iproc, lenvec, nproc
            x(i) = x(i) + 0.001*y(i)
 120     continue
 130  continue
      tout = second( dummy )
      call lockon( lokvec )
      ttot = ttot + max(tout-tin,0.0)
      call lockoff( lokvec )
      return
      end
      subroutine op5m3(lenvec,iproc,nproc)
      include 'vector.h'
c
c     3 Mem Ref 5 Operations.
c
      tin = second( dummy )
CVD$ NOCONCUR
      do 230 irep = 1, 100
         do 220 i = iproc, lenvec, nproc
            a0(i) = (x(i)+y(i))/(x(i)*x(i)+y(i)*y(i))
 220     continue
 230  continue
      tout = second( dummy )
      call lockon( lokvec )
      ttot = ttot + max(tout-tin,0.0)
      call lockoff( lokvec )
      return
      end
      subroutine mmult(lenvec,iproc,nproc)
      include 'vector.h'
c
c     Matrix mulitply
c     
      n1 = ifix( sqrt( float( lenvec ) ) )
      tin = second( dummy )
CVD$ NOCONCUR
CVD$ NOVECTOR
      do 330 irep = 1, 100
         call atimes( lenvec, iproc, nproc )
 330  continue
      tout = second( dummy )
      call lockon( lokvec )
      ttot = ttot + max(tout-tin,0.0)
      call lockoff( lokvec )
      return
      end
      subroutine atimes(n, iproc, nproc)
c********************** multi-processor version ***************************
c  Example routine to multiply the matrix of a symmetric 9- (or 5-)
c  point operator times a vector x and store the result in the vector y.
c----------------------------------------------------------------------
c  The calling arguments IPROC, NPROC are the index of the task calling
c  this routine and the number of total tasks, respectively.
c----------------------------------------------------------------------
      include 'vector.h'
c
c         Initilize.
c
      n1p1  = n1 + 1
      n1m1  = n1 - 1
      nrat  = n/nproc
      nprem = n - nproc*nrat
      if(iproc.le.nprem) then
         ibgn = (iproc - 1)*nrat + iproc
         iend = ibgn + nrat
      else
         ibgn = (iproc - 1)*nrat + nprem + 1
         iend = ibgn + nrat - 1
      endif
c
c        Main three bands.
c
      lstrt = max0( ibgn, 2 )
      lstop = min0( iend, n-1 )
      do 10 i= lstrt, lstop
        y(i) = a0(i)*x(i) + a1(i)*x(i+1) + a1(i-1)*x(i-1)
 10   continue
      if( ibgn.eq.1 ) y(1) = a0(1)*x(1) + a1(1)*x(2)
      if( iend.eq.n ) y(n) = a0(n)*x(n) + a1(n-1)*x(n-1)
      if( nd.eq.2 ) goto 99
c
c       Upper three bands.
c
      lstop = min0( iend, n-n1p1 )
      do 20 i=ibgn,lstop
        y(i) = y(i) + a3(i)*x(i+n1) + a4(i)*x(i+n1p1) + a2(i)*x(i+n1m1)
 20   continue
      if(iend.ge.n-n1) y(n-n1) = y(n-n1)+a3(n-n1)*x(n)+a2(n-n1)*x(n-1)
      if(iend.ge.n-n1m1) y(n-n1m1) = y(n-n1m1)+a2(n-n1m1)*x(n)
      if( nd.eq.3 ) goto 99
c
c        Lower three bands.
c
      lstrt = max0( ibgn, n1+2 )
      do 30 i=lstrt,iend
        y(i) = y(i) + a2(i-n1m1)*x(i-n1m1)
     1              + a3(i-n1)*x(i-n1) + a4(i-n1p1)*x(i-n1p1)
 30   continue
      if( ibgn.le.n1   ) y(n1)   = y(n1)   + a2(1)*x(1)
      if( ibgn.le.n1p1 ) y(n1p1) = y(n1p1) + a3(1)*x(1) + a2(2)*x(2)
c
c        Done.
c
   99 continue
      return
      end
      real function second(dummy)
c
c       Returns the CPU time (seconds) since begining of program.
c       Resolution of 0.01 sec.  Use hcrget for more accurate timmings.
c
      real dummy
      real*4 time(2), etime
      second = etime( time )
      return
      end
