C#######################################################################
C PSTSWM Version 4.0 (12/1/94)                                         #
C  (Stripped down PVM-only version (4/13/95), for use in ParkBench     #
C   benchmark suite)                                                   #
C  A message-passing benchmark code and parallel algorithm testbed     #
C  that solves the nonlinear shallow water equations using the spectral#
C  transform method.                                                   #
C Written by:                                                          #
C  Patrick Worley of Oak Ridge National Laboratory                     #
C  Ian Foster of Argonne National Laboratory                           #
C Based on the sequential code STSWM 2.0 by James Hack and Ruediger    #
C  Jakob of the National Center for Atmospheric Research.              #
C Research and development funded by the Computer Hardware, Advanced   #
C  Mathematics, and Model Physics (CHAMMP) program of the U.S.         #
C  Department of Energy.                                               # 
C                                                                      #
C Questions and comments should be directed to worley@msr.epm.ornl.gov #
C Please notify and acknowledge the authors in any research or         #
C publications utilizing PSTSWM or any part of the code.               #
C                                                                      #
C NOTICE: Neither the institutions nor the authors make any            #
C representations about the suitability of this software for any       #
C purpose. This software is provided "as is", without express or       #
C implied warranty.                                                    #
C#######################################################################
C include precision declaration definitions                            #
#include "precision.i"
C#######################################################################
      SUBROUTINE FTDPIV (DTA, ALP, DALP, WTS, WTACSJ, A2NNP1,
     &                   F, WSA, WSB, DCOEF, VCOEF, PCOEF)
C                                                                             
C This routine performs a forward transform procedure used to evaluate 
C the explicit part of the right hand side for the divergence and 
C geopotential prognostic equations using semi-implicit timestepping 
C (the term M of eq. (8) and the term Q in eq. (9) in Ritchie paper),
C and to evaluate the right hand side for the vorticity prognostic 
C equation. The complex coefficient vectors returned by this routine
C are zeroed within the forward transform procedure (i.e., the user 
C can not specify initial state).  
C
C called by: SIMPLIC
C calls: FLTSUM, INFTDPIV, RSFTDPIV, TREESUM
C
C---- Implicit None ----------------------------------------------------
C
      IMPLICIT NONE
C
C---- Parameters -------------------------------------------------------
C
#     include "params.i"
C
C---- Common Blocks ----------------------------------------------------
C
C problem resolution information
#     include "problem.i"
C parallel algorithm information
#     include "algorithm.i"
C domain decomposition information
#     include "spectral.i"
C
C---- Arguments --------------------------------------------------------
C
C     Input
C
C timestep
      REAL DTA
C associated Legendre polynomials
      REAL ALP(NFSPEC_S,NLLATH_S)
C deriv. associated Legendre polynomials
      REAL DALP(NFSPEC_S,NLLATH_S)
C Gaussian weights
      REAL WTS(NLAT)
C 1.0/(A*COS(LAT)**2)
      REAL WTACSJ(NLAT)
C (N*(N+1))/A**2
      REAL A2NNP1(0:KK)
C
C     Input/Work Space
C
C arrays of Fourier coefficients 
C  F(MXLFC_S,NLVER_S,NLLAT_S,1): (UCOS*ZETA)^M
C  F(MXLFC_S,NLVER_S,NLLAT_S,2): (VCOS*ZETA)^M
C  F(MXLFC_S,NLVER_S,NLLAT_S,3): (UCOS*PHI)^M
C  F(MXLFC_S,NLVER_S,NLLAT_S,4): (VCOS*PHI)^M
C  F(MXLFC_S,NLVER_S,NLLAT_S,5): (UCOS**2+VCOS**2)/FAC:  
C  F(MXLFC_S,NLVER_S,NLLAT_S,6): (ZETA^M)^(TAU-1)
C  F(MXLFC_S,NLVER_S,NLLAT_S,7): (DIV^(TAU-1))^M
C  F(MXLFC_S,NLVER_S,NLLAT_S,8): (PHI^(TAU-1))^M
      COMPLEX F(MXLFC_S,NLVER_S,NLLAT_S,8)
C
C     Work Space
C
C work arrays
      COMPLEX WSA(8,MXLFC_S,NLVER_S,NLLATH_S,2)
      COMPLEX WSB(MXLSPEC_S,NLVER_S,3,BUFSFLT+NLLATH_S)
C (note: WSB will have space allocated only for 
C  WSB(MXLSPEC_S,NLVER_S,3,BUFSFLT+1) when (SUMOPT .EQ. 0))
C
C     Output
C
C computed divergence for new timelevel
      COMPLEX DCOEF(MXLSPEC_S,NLVER_S)
C computed vorticity for new timelevel
      COMPLEX VCOEF(MXLSPEC_S,NLVER_S)                        
C computed geopotential for new timelevel
      COMPLEX PCOEF(MXLSPEC_S,NLVER_S)                        
C (DCOEF, VCOEF, AND PCOEF assumed to be contiguous, i.e.
C DCOEF(1,1,2) == VCOEF(1,1) and DCOEF(1,1,3) = PCOEF(1,1)
C
C---- Local Variables --------------------------------------------------
C
C parallel stage
      INTEGER P
C polynomial degree transition index
      INTEGER JNTRNS
C
C---- Executable Statements --------------------------------------------
C
      IF (NLMM_S .GT. 0) THEN
C
C       Calculate local contribution to vector sum.
        IF (SUMOPT .EQ. 0) THEN
C
C         Calculate local contribution to vector sum using in-place
C         summation ordering:
C         1) (P .EQ. NLTSTEPS) case: put contribution in output vectors.
C
C         Initialize polynomial degree transition index.
          JNTRNS = NTRNS_S - JME_S(NLTSTEPS)
C
C         Initialize and add contribution vector to running sum.
          CALL RSFTDI(NLTSTEPS, JNTRNS, DTA, ALP, DALP, WTS,
     &                WTACSJ, A2NNP1, F, WSA(1,1,1,1,1), DCOEF)
          CALL RSFTPIV(NLTSTEPS, JNTRNS, DTA, ALP, DALP, 
     &                 WTS, WTACSJ, A2NNP1, F, WSA(1,1,1,1,2), DCOEF)
C
C         Communicate partial results with rest of logical column.
          CALL FLTSUM(NLTSTEPS, NLTSTEPS, NLSPEC_S(NLTSTEPS), MXLSPEC_S,
     &                3*NLVER_S, WSB(1,1,1,BUFSFLT+1), WSB, DCOEF)
C
C         2) (P .LT. NLTSTEPS) case: use WSB for local contribution.
          DO P=NLTSTEPS-1,1,-1
C
C           Update polynomial degree transition index.
            JNTRNS = JNTRNS + (JMB_S(P+1)-1) - JME_S(P)     
C
C           Initialize and add contribution vector to running sum.
            CALL RSFTDI(P, JNTRNS, DTA, ALP, DALP, WTS, 
     &                  WTACSJ, A2NNP1, F, WSA(1,1,1,1,1), 
     &                  WSB(1,1,1,BUFSFLT+1))
            CALL RSFTPIV(P, JNTRNS, DTA, ALP, DALP, WTS, 
     &                   WTACSJ, A2NNP1, F, WSA(1,1,1,1,2), 
     &                   WSB(1,1,1,BUFSFLT+1))
C
C           Communicate partial results with rest of logical column.
            CALL FLTSUM(P, NLTSTEPS, NLSPEC_S(P), MXLSPEC_S, 3*NLVER_S,
     &                  WSB(1,1,1,BUFSFLT+1), WSB, DCOEF)
C
          ENDDO
C
        ELSE
C
C         Calculate local contribution to vector sum using binary tree
C         summation ordering:
C         1) (P .EQ. NLTSTEPS) case: put contribution in output vectors.
C
C         Initialize polynomial degree transition index.
          JNTRNS = NTRNS_S - JME_S(NLTSTEPS)
C
C         Compute individual components of sum.
          CALL INFTDI(NLTSTEPS, JNTRNS, DTA, ALP, DALP, WTS, 
     &                WTACSJ, A2NNP1, F, WSA(1,1,1,1,1), 
     &                WSB(1,1,1,BUFSFLT+1))
          CALL INFTPIV(NLTSTEPS, JNTRNS, DTA, ALP, DALP, WTS, 
     &                 WTACSJ, A2NNP1, F, WSA(1,1,1,1,2), 
     &                 WSB(1,1,1,BUFSFLT+1))
C
C         Compute local binary tree sum (doubling length because TREESUM
C         expects real vectors).
          CALL TREESUM(1, 2*NLSPEC_S(NLTSTEPS), 2*MXLSPEC_S, 3*NLVER_S, 
     &                 NLLATH_S, WSB(1,1,1,BUFSFLT+1), DCOEF)
C
C         Communicate partial results with rest of logical column.
          CALL FLTSUM(NLTSTEPS, NLTSTEPS, NLSPEC_S(NLTSTEPS), MXLSPEC_S,
     &                3*NLVER_S, WSB(1,1,1,BUFSFLT+1), WSB, DCOEF)
C
C         2) (P .LT. NLTSTEPS) case: use WSB for local contribution,
          DO P=NLTSTEPS-1,1,-1
C
C           Update polynomial degree transition index.
            JNTRNS = JNTRNS + (JMB_S(P+1)-1) - JME_S(P)     
C
C           Compute individual components of sum.
            CALL INFTDI(P, JNTRNS, DTA, ALP, DALP, WTS, 
     &                  WTACSJ, A2NNP1, F, WSA(1,1,1,1,1), 
     &                  WSB(1,1,1,BUFSFLT+1))
            CALL INFTPIV(P, JNTRNS, DTA, ALP, DALP, WTS, 
     &                   WTACSJ, A2NNP1, F, WSA(1,1,1,1,2), 
     &                   WSB(1,1,1,BUFSFLT+1))
C
C           Compute local binary tree sum (doubling length because TREESUM
C           expects real vectors).
            CALL TREESUM(0, 2*NLSPEC_S(P), 2*MXLSPEC_S, 3*NLVER_S, 
     &                   NLLATH_S, WSB(1,1,1,BUFSFLT+1), 
     &                   WSB(1,1,1,BUFSFLT+1))
C
C           Communicate partial results with rest of logical column.
            CALL FLTSUM(P, NLTSTEPS, NLSPEC_S(P), MXLSPEC_S, 3*NLVER_S,
     &                  WSB(1,1,1,BUFSFLT+1), WSB, DCOEF)
C
          ENDDO
C
        ENDIF
C
      ENDIF
C
      RETURN
      END
C
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
C
      SUBROUTINE RSFTDI(P, JNTRNS, DTA, ALP, DALP, WTS, 
     &                  WTACSJ, A2NNP1, F, WS, SUM)
C                                                                             
C This routine performs one parallel stage of the forward transform
C procedure described in FTDPIV for the divergence, where the 
C contributions to the spectral coefficients are summed in-place.
C
C called by: FTDPIV
C calls: INDI, RSDI, TMPDI
C
C---- Implicit None ----------------------------------------------------
C
      IMPLICIT NONE
C
C---- Parameters -------------------------------------------------------
C
#     include "params.i"
C
C---- Common Blocks ----------------------------------------------------
C
C problem resolution information
#     include "problem.i"
C parallel algorithm information
#     include "algorithm.i"
C domain decomposition information
#     include "spectral.i"
C
C---- Arguments --------------------------------------------------------
C
C     Input
C
C parallel stage
      INTEGER P
C current polynomial degree transition index
      INTEGER JNTRNS
C timestep
      REAL DTA
C associated Legendre polynomials
      REAL ALP(NFSPEC_S,NLLATH_S)
C deriv. associated Legendre polynomials
      REAL DALP(NFSPEC_S,NLLATH_S)
C Gaussian weights
      REAL WTS(NLAT)
C 1.0/(A*COS(LAT)**2)
      REAL WTACSJ(NLAT)
C (N*(N+1))/A**2
      REAL A2NNP1(0:KK)
C
C     Input/Work Space
C
C arrays of Fourier coefficients 
C  F(MXLFC_S,NLVER_S,NLLAT_S,1): (UCOS*ZETA)^M
C  F(MXLFC_S,NLVER_S,NLLAT_S,2): (VCOS*ZETA)^M
C  F(MXLFC_S,NLVER_S,NLLAT_S,3): (UCOS*PHI)^M
C  F(MXLFC_S,NLVER_S,NLLAT_S,4): (VCOS*PHI)^M
C  F(MXLFC_S,NLVER_S,NLLAT_S,5): (UCOS**2+VCOS**2)/FAC:  
C  F(MXLFC_S,NLVER_S,NLLAT_S,6): (ZETA^M)^(TAU-1)
C  F(MXLFC_S,NLVER_S,NLLAT_S,7): (DIV^(TAU-1))^M
C  F(MXLFC_S,NLVER_S,NLLAT_S,8): (PHI^(TAU-1))^M
      COMPLEX F(MXLFC_S,NLVER_S,NLLAT_S,8)
C
C     Work Space/Output
C
C intermediate quantities
      COMPLEX WS(8,MXLFC_S,NLVER_S,NLLATH_S)
C
C     Output
C
C contribution to forward transform "sum"
      COMPLEX SUM(MXLSPEC_S,NLVER_S,3)
C
C---- Executable Statements --------------------------------------------
C
C     Calculate intermediate quantities from arrays of Fourier 
C     coefficients for computational efficiency.
      CALL TRACEEVENTF('entry', 302, 0, 0)
      IF (P .EQ. NLTSTEPS) 
     &  CALL TMPDI(DTA, WTS, WTACSJ, F, WS)
      CALL TRACEEVENTF('exit', 302, 0, 0)
C
      CALL TRACEEVENTF('entry', 301, 0, 0)
C     Initialize contribution vector.
      CALL INDI(P, JNTRNS, 1, 1, ALP, DALP, A2NNP1, WS, SUM)
C
C     Add contribution to running sum.
      CALL RSDI(P, JNTRNS, 2, NLLATH_S, ALP, DALP, A2NNP1, WS, SUM)
      CALL TRACEEVENTF('exit', 301, 0, 0)
C
      RETURN
      END
C
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
C
      SUBROUTINE INFTDI (P, JNTRNS, DTA, ALP, DALP, WTS, 
     &                   WTACSJ, A2NNP1, F, WS, SUM)
C                                                                             
C This routine performs one parallel stage of the forward transform
C procedure described in FTDPIV for the divergence, where the
C contributions to the spectral coefficients are generated first, and 
C summed later. 
C
C called by: FTDPIV
C calls: INDI, TMPDI
C
C---- Implicit None ----------------------------------------------------
C
      IMPLICIT NONE
C
C---- Parameters -------------------------------------------------------
C
#     include "params.i"
C
C---- Common Blocks ----------------------------------------------------
C
C problem resolution information
#     include "problem.i"
C parallel algorithm information
#     include "algorithm.i"
C domain decomposition information
#     include "spectral.i"
C
C---- Arguments --------------------------------------------------------
C
C     Input
C
C parallel stage
      INTEGER P
C current polynomial degree transition index
      INTEGER JNTRNS
C timestep
      REAL DTA
C associated Legendre polynomials
      REAL ALP(NFSPEC_S,NLLATH_S)
C deriv. associated Legendre polynomials
      REAL DALP(NFSPEC_S,NLLATH_S)
C Gaussian weights
      REAL WTS(NLAT)
C 1.0/(A*COS(LAT)**2)
      REAL WTACSJ(NLAT)
C (N*(N+1))/A**2
      REAL A2NNP1(0:KK)
C
C     Input/Work Space
C
C arrays of Fourier coefficients 
C  F(MXLFC_S,NLVER_S,NLLAT_S,1): (UCOS*ZETA)^M
C  F(MXLFC_S,NLVER_S,NLLAT_S,2): (VCOS*ZETA)^M
C  F(MXLFC_S,NLVER_S,NLLAT_S,3): (UCOS*PHI)^M
C  F(MXLFC_S,NLVER_S,NLLAT_S,4): (VCOS*PHI)^M
C  F(MXLFC_S,NLVER_S,NLLAT_S,5): (UCOS**2+VCOS**2)/FAC:  
C  F(MXLFC_S,NLVER_S,NLLAT_S,6): (ZETA^M)^(TAU-1)
C  F(MXLFC_S,NLVER_S,NLLAT_S,7): (DIV^(TAU-1))^M
C  F(MXLFC_S,NLVER_S,NLLAT_S,8): (PHI^(TAU-1))^M
      COMPLEX F(MXLFC_S,NLVER_S,NLLAT_S,8)
C
C     Work Space
C
C work arrays
      COMPLEX WS(8,MXLFC_S,NLVER_S,NLLATH_S)
C
C     Output
C
C contribution to forward transform "sum"
      COMPLEX SUM(MXLSPEC_S,NLVER_S,3,NLLATH_S)
C
C---- Executable Statements --------------------------------------------
C
C     Calculate intermediate quantities from arrays of Fourier 
C     coefficients for computational efficiency.
      IF (P .EQ. NLTSTEPS) 
     &  CALL TMPDI(DTA, WTS, WTACSJ, F, WS)
C
C     Compute individual components of sum.
      CALL INDI(P, JNTRNS, 1, NLLATH_S, ALP, DALP, A2NNP1, WS, SUM)
C
      RETURN
      END
C
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
C
      SUBROUTINE TMPDI(DTA, WTS, WTACSJ, F, WS)
C                                                                              
C This routine calculates intermediate quantities needed by INFTDI
C and RSFTDI.
C
C called by: INFTDI, RSFTDI
C calls: 
C
C---- Implicit None ----------------------------------------------------
C
      IMPLICIT NONE
C
C---- Parameters -------------------------------------------------------
C
#     include "params.i"
C
C---- Common Blocks ----------------------------------------------------
C
C problem resolution information
#     include "problem.i"
C domain decomposition information
#     include "spectral.i"
C
C---- Arguments --------------------------------------------------------
C
C     Input
C
C timestep
      REAL DTA
C Gaussian weights
      REAL WTS(NLAT)
C 1.0/(A*COS(LAT)**2)
      REAL WTACSJ(NLAT)
C arrays of Fourier coefficients:
C  F(MXLFC_S,NLVER_S,NLLAT_S,1): (UCOS*ZETA)^M
C  F(MXLFC_S,NLVER_S,NLLAT_S,2): (VCOS*ZETA)^M
C  F(MXLFC_S,NLVER_S,NLLAT_S,3): (UCOS*PHI)^M
C  F(MXLFC_S,NLVER_S,NLLAT_S,4): (VCOS*PHI)^M
C  F(MXLFC_S,NLVER_S,NLLAT_S,5): (UCOS**2+VCOS**2)/FAC:  
C  F(MXLFC_S,NLVER_S,NLLAT_S,6): (ZETA^M)^(TAU-1)
C  F(MXLFC_S,NLVER_S,NLLAT_S,7): (DIV^(TAU-1))^M
C  F(MXLFC_S,NLVER_S,NLLAT_S,8): (PHI^(TAU-1))^M
      COMPLEX F(MXLFC_S,NLVER_S,NLLAT_S,8)
C
C     Output
C
C intermediate quantities
      COMPLEX WS(8,MXLFC_S,NLVER_S,NLLATH_S)
C
C---- Local Variables --------------------------------------------------
C
C real zero, to allow easy precision modifications
      REAL ZERO
C latitude (northern, southern, & global) indices
      INTEGER NL, SL, GNL
C vertical layer index
      INTEGER JV
C untruncated and truncated wavenumber indices
      INTEGER IM, JM
C precalculated coefficients
      REAL FAC0, FAC1, FAC3
C
C---- Executable Statements -------------------------------------------
C                                                                              
      ZERO = 0.0
C
C     Calculate DIV-related intermediate quantities
      DO NL=1,NLLATH_S
        SL   = NLLAT_S-NL+1
        GNL  = LATTRUE_S(NL)
        FAC0 = WTS(GNL)
        FAC3 = (DTA*WTACSJ(GNL))*WTS(GNL)
C
        DO JV=1,NLVER_S
          DO JM=1,NLMM_S
            IM = JMLTRUE_S(JM)
C
            WS(1,JM,JV,NL) = (F(IM,JV,NL,2)+F(IM,JV,SL,2))
     &                     * CMPLX(ZERO,(MTRUE_S(JM))*FAC3)
     &                     + (F(IM,JV,NL,7)+F(IM,JV,SL,7))*FAC0
            WS(2,JM,JV,NL) = (F(IM,JV,NL,2)-F(IM,JV,SL,2))
     &                     * CMPLX(ZERO,(MTRUE_S(JM))*FAC3)
     &                     + (F(IM,JV,NL,7)-F(IM,JV,SL,7))*FAC0
C
          ENDDO
        ENDDO
C
      ENDDO
C
      DO NL=1,NLLATH_S
        SL   = NLLAT_S-NL+1
        GNL  = LATTRUE_S(NL)
        FAC1 = DTA*WTS(GNL)
C
        DO JV=1,NLVER_S
          DO JM=1,NLMM_S
            IM = JMLTRUE_S(JM)
C
            WS(3,JM,JV,NL) = (F(IM,JV,NL,5)+F(IM,JV,SL,5))*FAC1
            WS(4,JM,JV,NL) = (F(IM,JV,NL,5)-F(IM,JV,SL,5))*FAC1
C
          ENDDO
        ENDDO
C
      ENDDO
C
      DO NL=1,NLLATH_S
        SL   = NLLAT_S-NL+1
        GNL  = LATTRUE_S(NL)
        FAC3 = (DTA*WTACSJ(GNL))*WTS(GNL)
C
        DO JV=1,NLVER_S
          DO JM=1,NLMM_S
            IM = JMLTRUE_S(JM)
C
            WS(5,JM,JV,NL) = (F(IM,JV,NL,1)-F(IM,JV,SL,1))*FAC3
            WS(6,JM,JV,NL) = (F(IM,JV,NL,1)+F(IM,JV,SL,1))*FAC3
C
          ENDDO
        ENDDO
C
      ENDDO
C
      RETURN
      END
C
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
C
      SUBROUTINE INDI(P, JNTRNS, NLB, NLE, ALP, DALP, A2NNP1, DATA,
     &                SUM)
C                                                                             
C This routine calculates contributions to the forward transform 
C described in routine FTDPIV corresponding to a range of latitudes
C (NLB-NLE) and a single stage (P) for the divergence. INDI overwrites 
C the output vector with these values, and is used to initialize the 
C output vector as the first step in a running sum calculation 
C (SUMOPT .EQ. 0), or when delaying the summation over latitude (NL) to
C allow a (reproducible) binary tree sum ordering (SUMOPT .EQ. 1).
C 
C called by: INFTDI, RSFTDI
C calls: 
C
C---- Implicit None ----------------------------------------------------
C
      IMPLICIT NONE
C
C---- Parameters -------------------------------------------------------
C
#     include "params.i"
C
C---- Common Blocks ----------------------------------------------------
C
C problem resolution information
#     include "problem.i"
C domain decomposition information
#     include "spectral.i"
C
C---- Arguments --------------------------------------------------------
C
C     Input
C
C parallel stage
      INTEGER P
C current polynomial degree transition index
      INTEGER JNTRNS
C first and last latitude index to use in calculation
      INTEGER NLB, NLE
C associated Legendre polynomials
      REAL ALP(NFSPEC_S,NLLATH_S)
C deriv. associated Legendre polynomials
      REAL DALP(NFSPEC_S,NLLATH_S)
C (N*(N+1))/A**2
      REAL A2NNP1(0:KK)
C data (computed in TMPDI from arrays of Fourier coefficients)
      COMPLEX DATA(8,MXLFC_S,NLVER_S,NLLATH_S)
C
C     Output
C
C contribution to forward transform "sum"
      COMPLEX SUM(MXLSPEC_S,NLVER_S,3,NLE)
C
C---- Local Variables --------------------------------------------------
C
C latitude, vertical layer, and wavenumber indices
      INTEGER NL, JV, JM
C polynomial degree index and index bounds
      INTEGER JN, JNFIRST, JNLAST
C spectral coefficient index
      INTEGER L
C spectral coefficient offset for wavenumber index JM
      INTEGER IS
C true wavenumber
      INTEGER M
C
C---- Executable Statements --------------------------------------------
C
C     Vary M and N so that procedure moves along columns denoted
C     by index JM.  M is given by (JM-1); N is given by (JN+JM-2).
C     Take advantage of symmetric character of Legendre polynomials.
C     (Procedure assumes even number of Gaussian latitudes ...)
C
      DO NL=NLB,NLE
C
        DO JV=1,NLVER_S
C
          L = 1
          DO JM=JMB_S(P),JME_S(P)
C
            IS = LLCOL_S(JM,2)
            M  = MTRUE_S(JM)
C
            JNFIRST = JNB_S(JNTRNS+JM)
            JNLAST = JNE_S(JNTRNS+JM)
            L = L - JNFIRST
C
            IF (MOD(JNFIRST,2) .EQ. 0) THEN
C             Compute contributions for first JN.
C             N = M + JNFIRST - 1
C
              SUM(L+JNFIRST,JV,1,NL)
     &             = ALP(IS+JNFIRST,NL)*(DATA(2,JM,JV,NL) 
     &             + DATA(4,JM,JV,NL)*A2NNP1(M+JNFIRST-1))
     &             + DALP(IS+JNFIRST,NL)*DATA(6,JM,JV,NL)
C
              JNFIRST = JNFIRST + 1
            ENDIF
C
C           Calculate paired JNs.
            DO JN=JNFIRST,JNLAST-1,2
C             N = M + JN - 1
C
              SUM(L+JN,JV,1,NL)
     &             = ALP(IS+JN,NL)*(DATA(1,JM,JV,NL) 
     &             + DATA(3,JM,JV,NL)*A2NNP1(M+JN-1))
     &             + DALP(IS+JN,NL)*DATA(5,JM,JV,NL)
              SUM(L+JN+1,JV,1,NL) 
     &             = ALP(IS+JN+1,NL)*(DATA(2,JM,JV,NL) 
     &             + DATA(4,JM,JV,NL)*A2NNP1(M+JN))
     &             + DALP(IS+JN+1,NL)*DATA(6,JM,JV,NL)
C
            ENDDO
C                                                                              
            IF (MOD((JNLAST-JNFIRST),2) .EQ. 0) THEN
C             Compute contributions for last JN.
C             N = M + JNLAST - 1
C
              SUM(L+JNLAST,JV,1,NL)
     &             = ALP(IS+JNLAST,NL)*(DATA(1,JM,JV,NL) 
     &             + DATA(3,JM,JV,NL)*A2NNP1(M+JNLAST-1))
     &             + DALP(IS+JNLAST,NL)*DATA(5,JM,JV,NL)
C
            ENDIF
C
C           Update local spectral coefficient offset index.
            L = L + JNLAST + 1
C
          ENDDO
C
        ENDDO
C
      ENDDO
C
      RETURN
      END
C
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
C
      SUBROUTINE RSDI(P, JNTRNS, NLB, NLE, ALP, DALP, A2NNP1, DATA, SUM)
C                                                                             
C This routine calculates a contribution to the forward transform 
C described in routine FTDPIV corresponding to a range of latitudes 
C (NLB-NLE) and a single stage (P) of the divergence. RSDI adds these 
C values to the current contents of the output vector, as part of a 
C running sum calculation (SUMOPT .EQ. 0).
C 
C called by: INFTDI, RSFTDI
C calls: 
C
C---- Implicit None ----------------------------------------------------
C
      IMPLICIT NONE
C
C---- Parameters -------------------------------------------------------
C
#     include "params.i"
C
C---- Common Blocks ----------------------------------------------------
C
C problem resolution information
#     include "problem.i"
C domain decomposition information
#     include "spectral.i"
C
C---- Arguments --------------------------------------------------------
C
C     Input
C
C parallel stage
      INTEGER P
C current polynomial degree transition index
      INTEGER JNTRNS
C first and last latitude index to use in calculation
      INTEGER NLB, NLE
C associated Legendre polynomials
      REAL ALP(NFSPEC_S,NLLATH_S)
C deriv. associated Legendre polynomials
      REAL DALP(NFSPEC_S,NLLATH_S)
C (N*(N+1))/A**2
      REAL A2NNP1(0:KK)
C data (computed in TMPDI from arrays of Fourier coefficients)
      COMPLEX DATA(8,MXLFC_S,NLVER_S,NLLATH_S)
C
C     Output
C
C contribution to forward transform SUM
      COMPLEX SUM(MXLSPEC_S,NLVER_S)
C
C---- Local Variables --------------------------------------------------
C
C latitude, vertical layer, and wavenumber indices
      INTEGER NL, JV, JM
C polynomial degree index and index bounds
      INTEGER JN, JNFIRST, JNLAST
C spectral coefficient index
      INTEGER L
C spectral coefficient offset for wavenumber index JM
      INTEGER IS
C true wavenumber
      INTEGER M
C
C---- Executable Statements --------------------------------------------
C                                                                              
C     Vary M and N so that procedure moves along columns denoted
C     by index JM.  M is given by (JM-1); N is given by (JN+JM-2).
C     Take advantage of symmetric character of Legendre polynomials.
C     (Procedure assumes even number of Gaussian latitudes ...)
C
      DO NL=NLB,NLE
C
        DO JV=1,NLVER_S
C
          L = 1
          DO JM=JMB_S(P),JME_S(P)
C
            IS = LLCOL_S(JM,2)
            M  = MTRUE_S(JM)
C
            JNFIRST = JNB_S(JNTRNS+JM)
            JNLAST = JNE_S(JNTRNS+JM)
            L = L - JNFIRST
C
            IF (MOD(JNFIRST,2) .EQ. 0) THEN
C             Compute contributions for first JN.
C             N = M + JNFIRST - 1
C
              SUM(L+JNFIRST,JV)
     &             = SUM(L+JNFIRST,JV)
     &             + ALP(IS+JNFIRST,NL)*(DATA(2,JM,JV,NL) 
     &             + DATA(4,JM,JV,NL)*A2NNP1(M+JNFIRST-1))
     &             + DALP(IS+JNFIRST,NL)*DATA(6,JM,JV,NL)
C
              JNFIRST = JNFIRST + 1
            ENDIF
C
C           Calculate paired JNs.
            DO JN=JNFIRST,JNLAST-1,2
C             N = M + JN - 1
C
              SUM(L+JN,JV)
     &             = SUM(L+JN,JV)
     &             + ALP(IS+JN,NL)*(DATA(1,JM,JV,NL) 
     &             + DATA(3,JM,JV,NL)*A2NNP1(M+JN-1))
     &             + DALP(IS+JN,NL)*DATA(5,JM,JV,NL)
C
              SUM(L+JN+1,JV) 
     &             = SUM(L+JN+1,JV) 
     &             + ALP(IS+JN+1,NL)*(DATA(2,JM,JV,NL) 
     &             + DATA(4,JM,JV,NL)*A2NNP1(M+JN))
     &             + DALP(IS+JN+1,NL)*DATA(6,JM,JV,NL)
C
            ENDDO
C                                                                              
            IF (MOD((JNLAST-JNFIRST),2) .EQ. 0) THEN
C             Compute contributions for last JN.
C             N = M + JNLAST - 1
C
              SUM(L+JNLAST,JV)
     &             = SUM(L+JNLAST,JV)
     &             + ALP(IS+JNLAST,NL)*(DATA(1,JM,JV,NL) 
     &             + DATA(3,JM,JV,NL)*A2NNP1(M+JNLAST-1))
     &             + DALP(IS+JNLAST,NL)*DATA(5,JM,JV,NL)
C
            ENDIF
C
C           Update local spectral coefficient offset index.
            L = L + JNLAST + 1
C
          ENDDO
C
        ENDDO
C
      ENDDO
C
      RETURN
      END
C
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
C
      SUBROUTINE RSFTPIV(P, JNTRNS, DTA, ALP, DALP, WTS, 
     &                   WTACSJ, A2NNP1, F, WS, SUM)
C                                                                             
C This routine performs one parallel stage of the forward transform
C procedure described in FTDPIV for the vorticity and the
C geopotential, where the contributions to the spectral coefficients
C are summed in-place.
C
C called by: FTDPIV
C calls: INPIV, RSPIV, TMPPIV
C
C---- Implicit None ----------------------------------------------------
C
      IMPLICIT NONE
C
C---- Parameters -------------------------------------------------------
C
#     include "params.i"
C
C---- Common Blocks ----------------------------------------------------
C
C problem resolution information
#     include "problem.i"
C parallel algorithm information
#     include "algorithm.i"
C domain decomposition information
#     include "spectral.i"
C
C---- Arguments --------------------------------------------------------
C
C     Input
C
C parallel stage
      INTEGER P
C current polynomial degree transition index
      INTEGER JNTRNS
C timestep
      REAL DTA
C associated Legendre polynomials
      REAL ALP(NFSPEC_S,NLLATH_S)
C deriv. associated Legendre polynomials
      REAL DALP(NFSPEC_S,NLLATH_S)
C Gaussian weights
      REAL WTS(NLAT)
C 1.0/(A*COS(LAT)**2)
      REAL WTACSJ(NLAT)
C (N*(N+1))/A**2
      REAL A2NNP1(0:KK)
C
C     Input/Work Space
C
C arrays of Fourier coefficients 
C  F(MXLFC_S,NLVER_S,NLLAT_S,1): (UCOS*ZETA)^M
C  F(MXLFC_S,NLVER_S,NLLAT_S,2): (VCOS*ZETA)^M
C  F(MXLFC_S,NLVER_S,NLLAT_S,3): (UCOS*PHI)^M
C  F(MXLFC_S,NLVER_S,NLLAT_S,4): (VCOS*PHI)^M
C  F(MXLFC_S,NLVER_S,NLLAT_S,5): (UCOS**2+VCOS**2)/FAC:  
C  F(MXLFC_S,NLVER_S,NLLAT_S,6): (ZETA^M)^(TAU-1)
C  F(MXLFC_S,NLVER_S,NLLAT_S,7): (DIV^(TAU-1))^M
C  F(MXLFC_S,NLVER_S,NLLAT_S,8): (PHI^(TAU-1))^M
      COMPLEX F(MXLFC_S,NLVER_S,NLLAT_S,8)
C
C     Work Space/Output
C
C intermediate quantities
      COMPLEX WS(8,MXLFC_S,NLVER_S,NLLATH_S)
C
C     Output
C
C contribution to forward transform "sum"
      COMPLEX SUM(MXLSPEC_S,NLVER_S,3)
C
C---- Executable Statements --------------------------------------------
C
C     Calculate intermediate quantities from arrays of Fourier 
C     coefficients for computational efficiency.
      CALL TRACEEVENTF('entry', 304, 0, 0)
      IF (P .EQ. NLTSTEPS) 
     &  CALL TMPPIV(DTA, WTS, WTACSJ, F, WS)
      CALL TRACEEVENTF('exit', 304, 0, 0)
C
      CALL TRACEEVENTF('entry', 303, 0, 0)
C     Initialize contribution vector.
      CALL INPIV(P, JNTRNS, 1, 1, ALP, DALP, A2NNP1, WS, SUM)
C
C     Add contributions to running sum.
      CALL RSPIV(P, JNTRNS, 2, NLLATH_S, ALP, DALP, A2NNP1, WS, SUM)
      CALL TRACEEVENTF('exit', 303, 0, 0)
C
      RETURN
      END
C
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
C
      SUBROUTINE INFTPIV(P, JNTRNS, DTA, ALP, DALP, WTS, 
     &                   WTACSJ, A2NNP1, F, WS, SUM)
C                                                                             
C This routine performs one parallel stage of the forward transform
C procedure described in FTDPIV for the vorticity and the
C geopotential, where the contributions to the spectral coefficients
C are generated first, and summed later.
C
C called by: FTDPIV
C calls: INDPIV, TMPDPIV
C
C---- Implicit None ----------------------------------------------------
C
      IMPLICIT NONE
C
C---- Parameters -------------------------------------------------------
C
#     include "params.i"
C
C---- Common Blocks ----------------------------------------------------
C
C problem resolution information
#     include "problem.i"
C parallel algorithm information
#     include "algorithm.i"
C domain decomposition information
#     include "spectral.i"
C
C---- Arguments --------------------------------------------------------
C
C     Input
C
C parallel stage
      INTEGER P
C current polynomial degree transition index
      INTEGER JNTRNS
C timestep
      REAL DTA
C associated Legendre polynomials
      REAL ALP(NFSPEC_S,NLLATH_S)
C deriv. associated Legendre polynomials
      REAL DALP(NFSPEC_S,NLLATH_S)
C Gaussian weights
      REAL WTS(NLAT)
C 1.0/(A*COS(LAT)**2)
      REAL WTACSJ(NLAT)
C (N*(N+1))/A**2
      REAL A2NNP1(0:KK)
C
C     Input/Work Space
C
C arrays of Fourier coefficients 
C  F(MXLFC_S,NLVER_S,NLLAT_S,1): (UCOS*ZETA)^M
C  F(MXLFC_S,NLVER_S,NLLAT_S,2): (VCOS*ZETA)^M
C  F(MXLFC_S,NLVER_S,NLLAT_S,3): (UCOS*PHI)^M
C  F(MXLFC_S,NLVER_S,NLLAT_S,4): (VCOS*PHI)^M
C  F(MXLFC_S,NLVER_S,NLLAT_S,5): (UCOS**2+VCOS**2)/FAC:  
C  F(MXLFC_S,NLVER_S,NLLAT_S,6): (ZETA^M)^(TAU-1)
C  F(MXLFC_S,NLVER_S,NLLAT_S,7): (DIV^(TAU-1))^M
C  F(MXLFC_S,NLVER_S,NLLAT_S,8): (PHI^(TAU-1))^M
      COMPLEX F(MXLFC_S,NLVER_S,NLLAT_S,8)
C
C     Work Space
C
C work arrays
      COMPLEX WS(8,MXLFC_S,NLVER_S,NLLATH_S)
C
C     Output
C
C contribution to forward transform "sum"
      COMPLEX SUM(MXLSPEC_S,NLVER_S,3,NLLATH_S)
C
C---- Executable Statements --------------------------------------------
C
C     Calculate intermediate quantities from arrays of Fourier 
C     coefficients for computational efficiency.
      IF (P .EQ. NLTSTEPS) 
     &  CALL TMPPIV(DTA, WTS, WTACSJ, F, WS)
C
C     Compute individual components of sum.
      CALL INPIV(P, JNTRNS, 1, NLLATH_S, ALP, DALP, A2NNP1, WS, SUM)
C
      RETURN
      END
C
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
C
      SUBROUTINE TMPPIV(DTA, WTS, WTACSJ, F, WS)
C                                                                              
C This routine calculates intermediate quantities needed by INFTPIV
C and RSFTPIV.
C
C called by: INFTPIV, RSFTPIV
C calls: 
C
C---- Implicit None ----------------------------------------------------
C
      IMPLICIT NONE
C
C---- Parameters -------------------------------------------------------
C
#     include "params.i"
C
C---- Common Blocks ----------------------------------------------------
C
C problem resolution information
#     include "problem.i"
C domain decomposition information
#     include "spectral.i"
C
C---- Arguments --------------------------------------------------------
C
C     Input
C
C timestep
      REAL DTA
C Gaussian weights
      REAL WTS(NLAT)
C 1.0/(A*COS(LAT)**2)
      REAL WTACSJ(NLAT)
C arrays of Fourier coefficients:
C  F(MXLFC_S,NLVER_S,NLLAT_S,1): (UCOS*ZETA)^M
C  F(MXLFC_S,NLVER_S,NLLAT_S,2): (VCOS*ZETA)^M
C  F(MXLFC_S,NLVER_S,NLLAT_S,3): (UCOS*PHI)^M
C  F(MXLFC_S,NLVER_S,NLLAT_S,4): (VCOS*PHI)^M
C  F(MXLFC_S,NLVER_S,NLLAT_S,5): (UCOS**2+VCOS**2)/FAC:  
C  F(MXLFC_S,NLVER_S,NLLAT_S,6): (ZETA^M)^(TAU-1)
C  F(MXLFC_S,NLVER_S,NLLAT_S,7): (DIV^(TAU-1))^M
C  F(MXLFC_S,NLVER_S,NLLAT_S,8): (PHI^(TAU-1))^M
      COMPLEX F(MXLFC_S,NLVER_S,NLLAT_S,8)
C
C     Output
C
C intermediate quantities
      COMPLEX WS(8,MXLFC_S,NLVER_S,NLLATH_S)
C
C---- Local Variables --------------------------------------------------
C
C real zero, to allow easy precision modifications
      REAL ZERO
C latitude (northern, southern, & global) indices
      INTEGER NL, SL, GNL
C vertical layer index
      INTEGER JV
C untruncated and truncated wavenumber indices
      INTEGER IM, JM
C precalculated coefficients
      REAL FAC0, FAC2, FAC3
C
C---- Executable Statements -------------------------------------------
C                                                                              
      ZERO = 0.0
C
C     Calculate intermediate quantities.
      DO NL=1,NLLATH_S
        SL   = NLLAT_S-NL+1
        GNL  = LATTRUE_S(NL)
        FAC0 = WTS(GNL)
        FAC2 = DTA*WTACSJ(GNL)
C
        DO JV=1,NLVER_S
          DO JM=1,NLMM_S
            IM = JMLTRUE_S(JM)
C
            WS(1,JM,JV,NL) = ((F(IM,JV,NL,1)+F(IM,JV,SL,1))
     &                     *  CMPLX(ZERO,-(MTRUE_S(JM))*FAC2)
     &                     +  (F(IM,JV,NL,6)+F(IM,JV,SL,6)))*FAC0
            WS(3,JM,JV,NL) = ((F(IM,JV,NL,1)-F(IM,JV,SL,1))
     &                     *  CMPLX(ZERO,-(MTRUE_S(JM))*FAC2)
     &                     +  (F(IM,JV,NL,6)-F(IM,JV,SL,6)))*FAC0
C
          ENDDO
        ENDDO
C
      ENDDO
C
      DO NL=1,NLLATH_S
        SL   = NLLAT_S-NL+1
        GNL  = LATTRUE_S(NL)
        FAC3 = (DTA*WTACSJ(GNL))*WTS(GNL)
C
        DO JV=1,NLVER_S
          DO JM=1,NLMM_S
            IM = JMLTRUE_S(JM)
C
            WS(2,JM,JV,NL) = (F(IM,JV,NL,2)-F(IM,JV,SL,2))*FAC3
            WS(4,JM,JV,NL) = (F(IM,JV,NL,2)+F(IM,JV,SL,2))*FAC3
C
          ENDDO
        ENDDO
C
      ENDDO
C
      DO NL=1,NLLATH_S
        SL   = NLLAT_S-NL+1
        GNL  = LATTRUE_S(NL)
        FAC0 = WTS(GNL)
        FAC3 = (DTA*WTACSJ(GNL))*WTS(GNL)
C
        DO JV=1,NLVER_S
          DO JM=1,NLMM_S
            IM = JMLTRUE_S(JM)
C
            WS(5,JM,JV,NL) = (F(IM,JV,NL,3)+F(IM,JV,SL,3))
     &                     * CMPLX(ZERO,-(MTRUE_S(JM))*FAC3)
     &                     + (F(IM,JV,NL,8)+F(IM,JV,SL,8))*FAC0
            WS(7,JM,JV,NL) = (F(IM,JV,NL,3)-F(IM,JV,SL,3))
     &                     * CMPLX(ZERO,-(MTRUE_S(JM))*FAC3)
     &                     + (F(IM,JV,NL,8)-F(IM,JV,SL,8))*FAC0
C
          ENDDO
        ENDDO
C
      ENDDO
C
      DO NL=1,NLLATH_S
        SL   = NLLAT_S-NL+1
        GNL  = LATTRUE_S(NL)
        FAC3 = (DTA*WTACSJ(GNL))*WTS(GNL)
C
        DO JV=1,NLVER_S
          DO JM=1,NLMM_S
            IM = JMLTRUE_S(JM)
C
            WS(6,JM,JV,NL) = (F(IM,JV,NL,4)-F(IM,JV,SL,4))*FAC3
            WS(8,JM,JV,NL) = (F(IM,JV,NL,4)+F(IM,JV,SL,4))*FAC3
C
          ENDDO
        ENDDO
C
      ENDDO
C
      RETURN
      END
C
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
C
      SUBROUTINE INPIV(P, JNTRNS, NLB, NLE, ALP, DALP, A2NNP1, DATA,
     &                 SUM)
C                                                                             
C This routine calculates a contribution to the forward transform 
C described in routine FTDPIV corresponding to a range of latitudes
C (NLB-NLE) and a single stage (P) for the vorticity and geopotential.
C INPIV overwrites the output vector with these values, and is used to
C initialize the output vector as the first step in a running sum
C calculation (SUMOPT .EQ. 0), or when delaying the summation over
C latitude (NL) to allow a (reproducible) binary tree sum ordering
C (SUMOPT .EQ. 1). 
C 
C called by: INFTPIV, RSFTPIV
C calls: 
C
C---- Implicit None ----------------------------------------------------
C
      IMPLICIT NONE
C
C---- Parameters -------------------------------------------------------
C
#     include "params.i"
C
C---- Common Blocks ----------------------------------------------------
C
C problem resolution information
#     include "problem.i"
C domain decomposition information
#     include "spectral.i"
C
C---- Arguments --------------------------------------------------------
C
C     Input
C
C parallel stage
      INTEGER P
C current polynomial degree transition index
      INTEGER JNTRNS
C first and last latitude index to use in calculation
      INTEGER NLB, NLE
C associated Legendre polynomials
      REAL ALP(NFSPEC_S,NLLATH_S)
C deriv. associated Legendre polynomials
      REAL DALP(NFSPEC_S,NLLATH_S)
C (N*(N+1))/A**2
      REAL A2NNP1(0:KK)
C data (computed in TMPPIV from arrays of Fourier coefficients)
      COMPLEX DATA(8,MXLFC_S,NLVER_S,NLLATH_S)
C
C     Output
C
C contribution to forward transform "sum"
      COMPLEX SUM(MXLSPEC_S,NLVER_S,3,NLE)
C
C---- Local Variables --------------------------------------------------
C
C latitude, vertical layer, and wavenumber indices
      INTEGER NL, JV, JM
C polynomial degree index and index bounds
      INTEGER JN, JNFIRST, JNLAST
C spectral coefficient index
      INTEGER L
C spectral coefficient offset for wavenumber index JM
      INTEGER IS
C true wavenumber
      INTEGER M
C
C---- Executable Statements --------------------------------------------
C
C     Vary M and N so that procedure moves along columns denoted
C     by index JM.  M is given by (JM-1); N is given by (JN+JM-2).
C     Take advantage of symmetric character of Legendre polynomials.
C     (Procedure assumes even number of Gaussian latitudes ...)
C
      DO NL=NLB,NLE
C
        DO JV=1,NLVER_S
C
          L = 1
          DO JM=JMB_S(P),JME_S(P)
C
            IS = LLCOL_S(JM,2)
            M  = MTRUE_S(JM)
C
            JNFIRST = JNB_S(JNTRNS+JM)
            JNLAST = JNE_S(JNTRNS+JM)
            L = L - JNFIRST
C
            IF (MOD(JNFIRST,2) .EQ. 0) THEN
C             Compute contributions for first JN.
C             N = M + JNFIRST - 1
C
              SUM(L+JNFIRST,JV,2,NL)
     &             = ALP(IS+JNFIRST,NL)*DATA(3,JM,JV,NL) 
     &             + DALP(IS+JNFIRST,NL)*DATA(4,JM,JV,NL)
              SUM(L+JNFIRST,JV,3,NL)
     &             = ALP(IS+JNFIRST,NL)*DATA(7,JM,JV,NL)
     &             + DALP(IS+JNFIRST,NL)*DATA(8,JM,JV,NL)
C
              JNFIRST = JNFIRST + 1
            ENDIF
C
C           Calculate paired JNs.
            DO JN=JNFIRST,JNLAST-1,2
C             N = M + JN - 1
C
              SUM(L+JN,JV,2,NL)
     &             = ALP(IS+JN,NL)*DATA(1,JM,JV,NL) 
     &             + DALP(IS+JN,NL)*DATA(2,JM,JV,NL)
              SUM(L+JN+1,JV,2,NL) 
     &             = ALP(IS+JN+1,NL)*DATA(3,JM,JV,NL) 
     &             + DALP(IS+JN+1,NL)*DATA(4,JM,JV,NL)
C
              SUM(L+JN,JV,3,NL)
     &             = ALP(IS+JN,NL)*DATA(5,JM,JV,NL) 
     &             + DALP(IS+JN,NL)*DATA(6,JM,JV,NL)
              SUM(L+JN+1,JV,3,NL) 
     &             = ALP(IS+JN+1,NL)*DATA(7,JM,JV,NL) 
     &             + DALP(IS+JN+1,NL)*DATA(8,JM,JV,NL)
C
            ENDDO
C                                                                              
            IF (MOD((JNLAST-JNFIRST),2) .EQ. 0) THEN
C             Compute contributions for last JN.
C             N = M + JNLAST - 1
C
              SUM(L+JNLAST,JV,2,NL)
     &             = ALP(IS+JNLAST,NL)*DATA(1,JM,JV,NL)  
     &             + DALP(IS+JNLAST,NL)*DATA(2,JM,JV,NL)
              SUM(L+JNLAST,JV,3,NL)
     &             = ALP(IS+JNLAST,NL)*DATA(5,JM,JV,NL)
     &             + DALP(IS+JNLAST,NL)*DATA(6,JM,JV,NL)
C
            ENDIF
C
C           Update local spectral coefficient offset index.
            L = L + JNLAST + 1
C
          ENDDO
C
        ENDDO
C
      ENDDO
C
      RETURN
      END
C
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
C
      SUBROUTINE RSPIV(P, JNTRNS, NLB, NLE, ALP, DALP, A2NNP1, DATA, 
     &                 SUM)
C                                                                             
C This routine calculates contributions to the forward transform 
C described in routine FTDPIV corresponding to a range of latitudes 
C (NLB-NLE) and stage (P) for the vorticity and geopotential. RSPIV 
C adds these values to the current contents of the output vector, as 
C part of a running sum calculation (SUMOPT .EQ. 0). 
C 
C called by: RSFTPIV
C calls: 
C
C---- Implicit None ----------------------------------------------------
C
      IMPLICIT NONE
C
C---- Parameters -------------------------------------------------------
C
#     include "params.i"
C
C---- Common Blocks ----------------------------------------------------
C
C problem resolution information
#     include "problem.i"
C domain decomposition information
#     include "spectral.i"
C
C---- Arguments --------------------------------------------------------
C
C     Input
C
C parallel stage
      INTEGER P
C current polynomial degree transition index
      INTEGER JNTRNS
C first and last latitude index to use in calculation
      INTEGER NLB, NLE
C associated Legendre polynomials
      REAL ALP(NFSPEC_S,NLLATH_S)
C deriv. associated Legendre polynomials
      REAL DALP(NFSPEC_S,NLLATH_S)
C (N*(N+1))/A**2
      REAL A2NNP1(0:KK)
C data (computed in TMPPIV from arrays of Fourier coefficients)
      COMPLEX DATA(8,MXLFC_S,NLVER_S,NLLATH_S)
C
C     Output
C
C contribution to forward transform SUM
      COMPLEX SUM(MXLSPEC_S,NLVER_S,3)
C
C---- Local Variables --------------------------------------------------
C
C latitude, vertical layer, and wavenumber indices
      INTEGER NL, JV, JM
C polynomial degree index and index bounds
      INTEGER JN, JNFIRST, JNLAST
C spectral coefficient index
      INTEGER L
C spectral coefficient offset for wavenumber index JM
      INTEGER IS
C true wavenumber
      INTEGER M
C
C---- Executable Statements --------------------------------------------
C                                                                              
C     Vary M and N so that procedure moves along columns denoted
C     by index JM.  M is given by (JM-1); N is given by (JN+JM-2).
C     Take advantage of symmetric character of Legendre polynomials.
C     (Procedure assumes even number of Gaussian latitudes ...)
C
      DO NL=NLB,NLE
C
        DO JV=1,NLVER_S
C
          L = 1
          DO JM=JMB_S(P),JME_S(P)
C
            IS = LLCOL_S(JM,2)
            M  = MTRUE_S(JM)
C
            JNFIRST = JNB_S(JNTRNS+JM)
            JNLAST = JNE_S(JNTRNS+JM)
            L = L - JNFIRST
C
            IF (MOD(JNFIRST,2) .EQ. 0) THEN
C             Compute contributions for first JN.
C             N = M + JNFIRST - 1
C
              SUM(L+JNFIRST,JV,2)
     &             = SUM(L+JNFIRST,JV,2)
     &             + ALP(IS+JNFIRST,NL)*DATA(3,JM,JV,NL) 
     &             + DALP(IS+JNFIRST,NL)*DATA(4,JM,JV,NL)
              SUM(L+JNFIRST,JV,3)
     &             = SUM(L+JNFIRST,JV,3)
     &             + ALP(IS+JNFIRST,NL)*DATA(7,JM,JV,NL)
     &             + DALP(IS+JNFIRST,NL)*DATA(8,JM,JV,NL)
C
              JNFIRST = JNFIRST + 1
            ENDIF
C
C           Calculate paired JNs.
            DO JN=JNFIRST,JNLAST-1,2
C             N = M + JN - 1
C
              SUM(L+JN,JV,2)
     &             = SUM(L+JN,JV,2)
     &             + ALP(IS+JN,NL)*DATA(1,JM,JV,NL) 
     &             + DALP(IS+JN,NL)*DATA(2,JM,JV,NL)
              SUM(L+JN+1,JV,2) 
     &             = SUM(L+JN+1,JV,2) 
     &             + ALP(IS+JN+1,NL)*DATA(3,JM,JV,NL) 
     &             + DALP(IS+JN+1,NL)*DATA(4,JM,JV,NL)
C
              SUM(L+JN,JV,3)
     &             = SUM(L+JN,JV,3)
     &             + ALP(IS+JN,NL)*DATA(5,JM,JV,NL) 
     &             + DALP(IS+JN,NL)*DATA(6,JM,JV,NL)
              SUM(L+JN+1,JV,3) 
     &             = SUM(L+JN+1,JV,3) 
     &             + ALP(IS+JN+1,NL)*DATA(7,JM,JV,NL) 
     &             + DALP(IS+JN+1,NL)*DATA(8,JM,JV,NL)
C
            ENDDO
C                                                                              
            IF (MOD((JNLAST-JNFIRST),2) .EQ. 0) THEN
C             Compute contributions for last JN.
C             N = M + JNLAST - 1
C
              SUM(L+JNLAST,JV,2)
     &             = SUM(L+JNLAST,JV,2)
     &             + ALP(IS+JNLAST,NL)*DATA(1,JM,JV,NL)  
     &             + DALP(IS+JNLAST,NL)*DATA(2,JM,JV,NL)
              SUM(L+JNLAST,JV,3)
     &             = SUM(L+JNLAST,JV,3)
     &             + ALP(IS+JNLAST,NL)*DATA(5,JM,JV,NL)
     &             + DALP(IS+JNLAST,NL)*DATA(6,JM,JV,NL)
C
            ENDIF
C
C           Update local spectral coefficient offset index.
            L = L + JNLAST + 1
C
          ENDDO
C
        ENDDO
C
      ENDDO
C
      RETURN
      END
