C#######################################################################
C PSTSWM Version 4.0 (12/1/94)                                         #
C  A message-passing benchmark code and parallel algorithm testbed     #
C  that solves the nonlinear shallow water equations using the spectral#
C  transform method.                                                   #
C Written by:                                                          #
C  Patrick Worley of Oak Ridge National Laboratory                     #
C  Ian Foster of Argonne National Laboratory                           #
C Based on the sequential code STSWM 2.0 by James Hack and Ruediger    #
C  Jakob of the National Center for Atmospheric Research.              #
C Research and development funded by the Computer Hardware, Advanced   #
C  Mathematics, and Model Physics (CHAMMP) program of the U.S.         #
C  Department of Energy.                                               # 
C                                                                      #
C Questions and comments should be directed to worley@msr.epm.ornl.gov #
C Please notify and acknowledge the authors in any research or         #
C publications utilizing PSTSWM or any part of the code.               #
C                                                                      #
C NOTICE: Neither the institutions nor the authors make any            #
C representations about the suitability of this software for any       #
C purpose. This software is provided "as is", without express or       #
C implied warranty.                                                    #
C#######################################################################
C include precision declaration definitions                            #
#include "precision.i"
C#######################################################################
      SUBROUTINE FTRNPE (DTA, ALP, DALP, WTS, WTACSJ, F, WSA, WSB, 
     &                   PCOEF)
C                                                                             
C This subroutine performs a forward transform procedure used to 
C evaluate the right hand side of the shallow water equations for the 
C geopotential prognostic equation using explicit timestepping. The 
C complex coefficient vector returned by this routine is zeroed within 
C the forward transform procedure (i.e., the user can not specify 
C initial state). 
C                                                                              
C called by: ADVECT
C calls: FLTSUM, INPE, RSPE, TMPPE, TREESUM
C
C---- Implicit None ----------------------------------------------------
C
      IMPLICIT NONE
C
C---- Parameters -------------------------------------------------------
C
#     include "params.i"
C
C---- Common Blocks ----------------------------------------------------
C
C problem resolution information
#     include "problem.i"
C parallel algorithm information
#     include "algorithm.i"
C domain decomposition information
#     include "spectral.i"
C
C---- Arguments --------------------------------------------------------
C
C     Input
C
C timestep
      REAL DTA
C associated Legendre polynomials
      REAL ALP(NFSPEC_S,NLLATH_S)
C deriv. associated Legendre polynomials
      REAL DALP(NFSPEC_S,NLLATH_S)
C Gaussian weights
      REAL WTS(NLAT)
C 1.0/(A*COS(LAT)**2)
      REAL WTACSJ(NLAT)
C
C     Input/Work Arrays
C
C arrays of Fourier coefficients 
C  F(MXLFC_S,NLVER_S,NLLAT_S,1): (UCOS*PHI)^M
C  F(MXLFC_S,NLVER_S,NLLAT_S,2): (VCOS*PHI)^M
C  F(MXLFC_S,NLVER_S,NLLAT_S,3): (PHI^TAU-1)^M 
      COMPLEX F(MXLFC_S,NLVER_S,NLLAT_S,3)
C
C     Work Space
C
C work arrays
      COMPLEX WSA(4,MXLFC_S,NLVER_S,NLLATH_S)
      COMPLEX WSB(MXLSPEC_S,NLVER_S,BUFSFLT+NLLATH_S)
C (note: WSB will only have space allocated for 
C  WSB(MXLSPEC_S,NLVER_S,BUFSFLT+1) when (SUMOPT .EQ. 0)
C
C     Output
C
C computed geopotential for new timelevel
      COMPLEX PCOEF(MXLSPEC_S,NLVER_S)                        
C
C---- Local Variables --------------------------------------------------
C
C latitude index and parallel stage
      INTEGER NL, P
C polynomial degree transition index
      INTEGER JNTRNS
C
C---- Executable Statements --------------------------------------------
C
      IF (NLMM_S .GT. 0) THEN
C
C       Calculate intermediate quantities from arrays of Fourier 
C       coefficients for computational efficiency.
        CALL TMPPE(DTA, WTS, WTACSJ, F, WSA)
C
C       Calculate local contribution to vector sum.
        IF (SUMOPT .EQ. 0) THEN
C
C         Calculate local contribution to vector sum using in-place
C         summation ordering:
C         1) (P .EQ. NLTSTEPS) case: put contribution in output vectors.
C
C         Initialize contribution vector.
          CALL INPE(NLTSTEPS, JNTRNS, 1, ALP(1,1), DALP(1,1), WSA, 
     &              PCOEF)
C
          DO NL=2,NLLATH_S
C           Add contribution to running sum.
            CALL RSPE(NLTSTEPS, JNTRNS, ALP(1,NL), DALP(1,NL),
     &                WSA(1,1,1,NL), PCOEF)
          ENDDO
C
C         Communicate partial results with rest of logical column.
          CALL FLTSUM(NLTSTEPS, NLTSTEPS, NLSPEC_S(NLTSTEPS), MXLSPEC_S,
     &                NLVER_S, WSB(1,1,BUFSFLT+1), WSB, PCOEF)
C
C         2) (P .LT. NLTSTEPS) case: use WSB for local contribution.
          DO P=NLTSTEPS-1,1,-1
C
C           Initialize contribution vector.
            CALL INPE(P, JNTRNS, 1, ALP(1,1), DALP(1,1),
     &                WSA, WSB(1,1,BUFSFLT+1))
C
            DO NL=2,NLLATH_S
C             Add contribution to running sum.
              CALL RSPE(P, JNTRNS, ALP(1,NL), DALP(1,NL),
     &                  WSA(1,1,1,NL), WSB(1,1,BUFSFLT+1))
            ENDDO
C
C           Communicate partial results with rest of logical column.
            CALL FLTSUM(P, NLTSTEPS, NLSPEC_S(P), MXLSPEC_S, NLVER_S,
     &                  WSB(1,1,BUFSFLT+1), WSB, PCOEF)
C
          ENDDO
C
        ELSE
C
C         Calculate local contribution to vector sum using binary tree
C         summation ordering:
C         1) (P .EQ. NLTSTEPS) case: put contribution in output vectors.
C
C         Compute individual components of sum.
          DO NL=1,NLLATH_S
            CALL INPE(NLTSTEPS, JNTRNS, NL, ALP(1,NL), DALP(1,NL), 
     &                WSA(1,1,1,NL), WSB(1,1,BUFSFLT+NL))
          ENDDO
C
C         Compute local binary tree sum (doubling length because TREESUM
C         expects real vectors).
          CALL TREESUM(1, 2*NLSPEC_S(NLTSTEPS), 2*MXLSPEC_S, NLVER_S, 
     &                 NLLATH_S, WSB(1,1,BUFSFLT+1), PCOEF)
C
C         Communicate partial results with rest of logical column.
          CALL FLTSUM(NLTSTEPS, NLTSTEPS, NLSPEC_S(NLTSTEPS), MXLSPEC_S,
     &                NLVER_S, WSB(1,1,BUFSFLT+1), WSB, PCOEF)
C
C         2) (P .LT. NLTSTEPS) case: use WSB for local contribution.
          DO P=NLTSTEPS-1,1,-1
C
C           Compute individual components of sum.
            DO NL=1,NLLATH_S
              CALL INPE(P, JNTRNS, NL, ALP(1,NL), DALP(1,NL),
     &                  WSA(1,1,1,NL), WSB(1,1,BUFSFLT+NL))
            ENDDO
C
C           Compute local binary tree sum (doubling length because 
C           TREESUM expects real vectors).
            CALL TREESUM(0, 2*NLSPEC_S(P), 2*MXLSPEC_S, NLVER_S, 
     &                   NLLATH_S, WSB(1,1,BUFSFLT+1),
     &                   WSB(1,1,BUFSFLT+1))
C
C           Communicate partial results with rest of logical column.
            CALL FLTSUM(P, NLTSTEPS, NLSPEC_S(P), MXLSPEC_S, NLVER_S,
     &                  WSB(1,1,BUFSFLT+1), WSB, PCOEF)
C
          ENDDO
C
        ENDIF
C
      ENDIF
C
      RETURN
      END
C
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
C
      SUBROUTINE TMPPE (DTA, WTS, WTACSJ, F, WS)
C                                                                              
C This routine calculates intermediate quantities needed by FTRNPE.
C
C called by: FTRNPE
C
C---- Implicit None ----------------------------------------------------
C
      IMPLICIT NONE
C
C---- Parameters -------------------------------------------------------
C
#     include "params.i"
C
C---- Common Blocks ----------------------------------------------------
C
C problem resolution information
#     include "problem.i"
C domain decomposition information
#     include "spectral.i"
C
C---- Arguments --------------------------------------------------------
C
C     Input
C
C timestep
      REAL DTA
C Gaussian weights
      REAL WTS(NLAT)
C 1.0/(A*COS(LAT)**2)
      REAL WTACSJ(NLAT)
C arrays of Fourier coefficients 
C  F(MXLFC_S,NLVER_S,NLLAT_S,1): (UCOS*PHI)^M
C  F(MXLFC_S,NLVER_S,NLLAT_S,2): (VCOS*PHI)^M
C  F(MXLFC_S,NLVER_S,NLLAT_S,3): (PHI^TAU-1)^M 
      COMPLEX F(MXLFC_S,NLVER_S,NLLAT_S,3)
C
C     Output
C
C intermediate quantities
      COMPLEX WS(4,MXLFC_S,NLVER_S,NLLATH_S)
C
C---- Local Variables --------------------------------------------------
C
C real zero, to allow easy precision modifications
      REAL ZERO
C latitude (northern, southern, and global) indices
      INTEGER NL, SL, GNL
C vertical layer index
      INTEGER JV
C untruncated and truncated wavenumber indices
      INTEGER IM, JM
C precalculated coefficients
      REAL FAC0, FAC2, FAC3
C
C---- Executable Statements -------------------------------------------
C                                                                              
      ZERO = 0.0
C
C     Calculate intermediate quantities.
      DO NL=1,NLLATH_S
        SL   = NLLAT_S-NL+1
        GNL  = LATTRUE_S(NL)
        FAC0 = WTS(GNL)
        FAC2 = DTA*WTACSJ(GNL)
C
        DO JV=1,NLVER_S
          DO JM=1,NLMM_S
            IM = JMLTRUE_S(JM)
C
            WS(1,JM,JV,NL) = ((F(IM,JV,NL,1)+F(IM,JV,SL,1)) 
     &                     * CMPLX(ZERO,-MTRUE_S(JM)*FAC2)
     &                     + (F(IM,JV,NL,3)+F(IM,JV,SL,3)))*FAC0
            WS(3,JM,JV,NL) = ((F(IM,JV,NL,1)-F(IM,JV,SL,1)) 
     &                     * CMPLX(ZERO,-MTRUE_S(JM)*FAC2)
     &                     + (F(IM,JV,NL,3)-F(IM,JV,SL,3)))*FAC0
C
          ENDDO
        ENDDO
C
      ENDDO
C
      DO NL=1,NLLATH_S
        SL   = NLLAT_S-NL+1
        GNL  = LATTRUE_S(NL)
        FAC3 = DTA*WTACSJ(GNL)*WTS(GNL)
C
        DO JV=1,NLVER_S
          DO JM=1,NLMM_S
            IM = JMLTRUE_S(JM)
C
            WS(2,JM,JV,NL) = (F(IM,JV,NL,2)-F(IM,JV,SL,2))*FAC3 
            WS(4,JM,JV,NL) = (F(IM,JV,NL,2)+F(IM,JV,SL,2))*FAC3  
C
          ENDDO
        ENDDO
C
      ENDDO
C
      RETURN
      END
C
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
C
      SUBROUTINE INPE (P, JNTRNS, NL, ALP, DALP, DATA, SUM)
C                                                                             
C This routine calculates a contribution to the forward transform 
C described in routine FTRNPE corresponding to a single latitude (NL) 
C and stage (P). INPE overwrites the output vector with these values,
C and is used to initialize the output vector as the first step in a
C running sum calculation (SUMOPT .EQ. 0), or when delaying the
C summation over latitude (NL) to allow a (reproducible) binary tree 
C sum ordering (SUMOPT .EQ. 1).
C 
C called by: FTRNPE
C calls: 
C
C---- Implicit None ----------------------------------------------------
C
      IMPLICIT NONE
C
C---- Model Parameters -------------------------------------------------
C
#     include "params.i"
C
C---- Common Blocks ----------------------------------------------------
C
C domain decomposition information
#     include "spectral.i"
C
C---- Arguments --------------------------------------------------------
C
C     Input
C
C parallel stage
      INTEGER P
C latitude index
      INTEGER NL
C associated Legendre polynomials
      REAL ALP(NFSPEC_S)
C deriv. associated Legendre polynomials
      REAL DALP(NFSPEC_S)
C data (computed in TMPPE from arrays of Fourier coefficients)
      COMPLEX DATA(4,MXLFC_S,NLVER_S)
C
C     Input/Output
C
C current polynomial degree transition index
      INTEGER JNTRNS
C
C     Output
C
C contribution to forward transform "sum"
      COMPLEX SUM(MXLSPEC_S,NLVER_S)
C
C---- Local Variables --------------------------------------------------
C
C wavenumber and vertical layer indices
      INTEGER JM, JV
C polynomial degree index and index bounds
      INTEGER JN, JNFIRST, JNLAST
C spectral coefficient index
      INTEGER L
C spectral coefficient offset for wavenumber index jm
      INTEGER IS
C true wavenumber
      INTEGER M
C
C---- Executable Statements --------------------------------------------
C
C     Initialize/update polynomial degree transition index.
      IF ((P .EQ. NLTSTEPS) .AND. (NL .EQ. 1)) THEN
C       First call in this transform: initialize offset.
        JNTRNS = NTRNS_S - JME_S(P)
      ELSEIF (NL .EQ. 1) THEN
C       First call for this value of P: update offset.
        JNTRNS = JNTRNS + (JMB_S(P+1)-1) - JME_S(P)     
      ENDIF
C
C     Vary M and N so that procedure moves along columns denoted
C     by index JM.  M is given by (JM-1); N is given by (JN+JM-2).
C     Take advantage of symmetric character of Legendre polynomials.
C     (Procedure assumes even number of Gaussian latitudes ...)
      DO JV=1,NLVER_S
C
        L = 1
        DO JM=JMB_S(P),JME_S(P)
C
          IS = LLCOL_S(JM,2)
          M  = MTRUE_S(JM)
C
          JNFIRST = JNB_S(JNTRNS+JM)
          JNLAST = JNE_S(JNTRNS+JM)
          L = L - JNFIRST
C
          IF (MOD(JNFIRST,2) .EQ. 0) THEN
C           Compute contributions for first JN.
C
            SUM(L+JNFIRST,JV)
     &           = ALP(IS+JNFIRST)*DATA(3,JM,JV) 
     &           + DALP(IS+JNFIRST)*DATA(4,JM,JV)
C
            JNFIRST = JNFIRST + 1
          ENDIF
C
C         Calculate paired JNs.
          DO JN=JNFIRST,JNLAST-1,2
C
            SUM(L+JN,JV)
     &           = ALP(IS+JN)*DATA(1,JM,JV) 
     &           + DALP(IS+JN)*DATA(2,JM,JV)
C
            SUM(L+JN+1,JV) 
     &           = ALP(IS+JN+1)*DATA(3,JM,JV) 
     &           + DALP(IS+JN+1)*DATA(4,JM,JV)
C
          ENDDO
C                                                                              
          IF (MOD((JNLAST-JNFIRST),2) .EQ. 0) THEN
C           Compute contributions for last JN.
C
            SUM(L+JNLAST,JV)
     &           = ALP(IS+JNLAST)*DATA(1,JM,JV) 
     &           + DALP(IS+JNLAST)*DATA(2,JM,JV)
C
          ENDIF
C
C         Update local spectral coefficient offset index.
          L = L + JNLAST + 1
C
        ENDDO
C
      ENDDO
C
      RETURN
      END
C
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
C
      SUBROUTINE RSPE (P, JNTRNS, ALP, DALP, DATA, SUM)
C                                                                             
C This routine calculates a contribution to the forward transform 
C described in routine FTRNPE corresponding to a single latitude and
C stage (P). RSPE adds these values to the current contents of the output 
C vector, as part of a running sum calculation (SUMOPT .EQ. 0).
C 
C called by: FTRNPE
C calls: 
C
C---- Implicit None ----------------------------------------------------
C
      IMPLICIT NONE
C
C---- Parameters -------------------------------------------------------
C
#     include "params.i"
C
C---- Common Blocks ----------------------------------------------------
C
C domain decomposition information
#     include "spectral.i"
C
C---- Arguments --------------------------------------------------------
C
C     Input
C
C parallel stage
      INTEGER P
C current polynomial degree transition index
      INTEGER JNTRNS
C associated Legendre polynomials
      REAL ALP(NFSPEC_S)
C deriv. associated Legendre polynomials
      REAL DALP(NFSPEC_S)
C data (computed in TMPPE from arrays of Fourier coefficients)
      COMPLEX DATA(4,MXLFC_S,NLVER_S)
C
C     Output
C
C contribution to forward transform "sum"
      COMPLEX SUM(MXLSPEC_S,NLVER_S)
C
C---- Local Variables --------------------------------------------------
C
C wavenumber and vertical layer indices
      INTEGER JM, JV
C polynomial degree index and index bounds
      INTEGER JN, JNFIRST, JNLAST
C spectral coefficient index
      INTEGER L
C spectral coefficient offset for wavenumber index jm
      INTEGER IS
C true wavenumber
      INTEGER M
C
C---- Executable Statements --------------------------------------------
C                                                                              
C     Vary M and N so that procedure moves along columns denoted
C     by index JM.  M is given by (JM-1); N is given by (JN+JM-2).
C     Take advantage of symmetric character of Legendre polynomials.
C     (Procedure assumes even number of Gaussian latitudes ...)
C
      DO JV=1,NLVER_S
C
        L = 1
        DO JM=JMB_S(P),JME_S(P)
C
          IS = LLCOL_S(JM,2)
          M  = MTRUE_S(JM)
C
          JNFIRST = JNB_S(JNTRNS+JM)
          JNLAST = JNE_S(JNTRNS+JM)
          L = L - JNFIRST
C
          IF (MOD(JNFIRST,2) .EQ. 0) THEN
C           Compute contributions for first JN.
C
            SUM(L+JNFIRST,JV)
     &           = SUM(L+JNFIRST,JV)
     &           + ALP(IS+JNFIRST)*DATA(3,JM,JV) 
     &           + DALP(IS+JNFIRST)*DATA(4,JM,JV)
C
            JNFIRST = JNFIRST + 1
          ENDIF
C
C         Calculate paired JNs.
          DO JN=JNFIRST,JNLAST-1,2
C
            SUM(L+JN,JV)
     &           = SUM(L+JN,JV)
     &           + ALP(IS+JN)*DATA(1,JM,JV) 
     &           + DALP(IS+JN)*DATA(2,JM,JV)
C
            SUM(L+JN+1,JV) 
     &           = SUM(L+JN+1,JV) 
     &           + ALP(IS+JN+1)*DATA(3,JM,JV) 
     &           + DALP(IS+JN+1)*DATA(4,JM,JV)
C
          ENDDO
C                                                                              
          IF (MOD((JNLAST-JNFIRST),2) .EQ. 0) THEN
C           Compute contributions for last JN.
C
            SUM(L+JNLAST,JV)
     &           = SUM(L+JNLAST,JV)
     &           + ALP(IS+JNLAST)*DATA(1,JM,JV) 
     &           + DALP(IS+JNLAST)*DATA(2,JM,JV)
C
          ENDIF
C
C         Update local spectral coefficient offset index.
          L = L + JNLAST + 1
C
        ENDDO
C
      ENDDO
C
      RETURN
      END
