C#######################################################################
C PSTSWM Version 4.0 (12/1/94)                                         #
C  A message-passing benchmark code and parallel algorithm testbed     #
C  that solves the nonlinear shallow water equations using the spectral#
C  transform method.                                                   #
C Written by:                                                          #
C  Patrick Worley of Oak Ridge National Laboratory                     #
C  Ian Foster of Argonne National Laboratory                           #
C Based on the sequential code STSWM 2.0 by James Hack and Ruediger    #
C  Jakob of the National Center for Atmospheric Research.              #
C Research and development funded by the Computer Hardware, Advanced   #
C  Mathematics, and Model Physics (CHAMMP) program of the U.S.         #
C  Department of Energy.                                               # 
C                                                                      #
C Questions and comments should be directed to worley@msr.epm.ornl.gov #
C Please notify and acknowledge the authors in any research or         #
C publications utilizing PSTSWM or any part of the code.               #
C                                                                      #
C NOTICE: Neither the institutions nor the authors make any            #
C representations about the suitability of this software for any       #
C purpose. This software is provided "as is", without express or       #
C implied warranty.                                                    #
C#######################################################################
C include precision declaration definitions                            #
#include "precision.i"
C#######################################################################
      SUBROUTINE SHTRNS(ITYPE, ALP, WTS, TRIGS, WS1, WS2, WS3, PDATA,
     &                  SCOEF)
C
C This subroutine performs spherical harmonic transforms and inverse
C tranforms of arbitrary scalar data on a Gaussian grid. (See the
C Gaussian latitudes and weights.)
C
C SHTRNS is used only during initialization. To save space, only a
C single vertical layer is assumed in the physical and spectral data,
C and only a single vertical level of spectral coefficients is
C generated. In order to produce a partitioning of the spectral  
C coefficients that corresponds to that produced when there are 
C vertical levels, vertical levels are added before calling RFTLON 
C (where all the spatial transforms are hidden). Only one vertical 
C level in the output from RFTLON is used in the rest of the 
C calculation.
C
C called by: INIT, PSC
C calls: ADDLVL, FLTRNS, ILTRNS, RFTLON, RMLVL
C
C---- Implicit None ----------------------------------------------------
C
      IMPLICIT NONE
C
C---- Parameters -------------------------------------------------------
C
#     include "params.i"
C
C---- Common Blocks ----------------------------------------------------
C
C problem resolution information
#     include "problem.i"
C parallel algorithm information
#     include "algorithm.i"
C domain decomposition information
#     include "physical.i"
#     include "spectral.i"
C transform arrays
#     include "trnsfm.i"
C
C---- Arguments --------------------------------------------------------
C
C     Input
C
C type of transform desired
C  -1 => forward transform PDATA -> SCOEF
C  +1 => inverse transform SCOEF -> PDATA
      INTEGER ITYPE
C associated legendre polynomials
      REAL ALP(NFSPEC_S,NLLATH_S)
C Gaussian weights
      REAL WTS(NLAT)
C trigonometric function values used by RFTLON 
      COMPLEX TRIGS(NTRIGS)
C
C     Work Space
C
C work array 1
      REAL WS1(MXLLON_P,NLVER_P,NLLAT_P)
C work array 2
C (big enough for REAL (MXLLON_P,MXLVER_P,MXLLAT_P,BUFSWS2)
C             and REAL (MXLLON_F,MXLVER_F,MXLLAT_F,BUFSWS2)
C             and COMPLEX (MXLFC_S,MXLVER_S,MXLLAT_S,BUFSWS2))
      REAL WS2(1)
C work array 3
      COMPLEX WS3(MXLSPEC_S,BUFSWS3)
C
C     Input/Output
C
C scalar data (on Gaussian grid)
      COMPLEX PDATA(NLLON_P,NLLAT_P)
C spherical harmonic coefficient array
      COMPLEX SCOEF(MXLSPEC_S)
C
C---- Executable Statements --------------------------------------------
C
C     First, check for valid arguments; invalid arguments=>fatal error.
      IF ((ITYPE .NE. +1) .AND. (ITYPE .NE. -1)) THEN
        WRITE (0,900) ITYPE
  900   FORMAT(/,' PSTSWM: FATAL ERROR IN SUBROUTINE SHTRNS:',/,
     &           ' UNKNOWN TYPE OF TRANSFORM SPECIFIED',/, 
     &           ' ITYPE = ',I3)
        STOP
      ENDIF
C
      IF (ITYPE .EQ. -1) THEN
C       forward transform from physical to wavenumber space
C
C       Add vertical levels before calling RFTLON.
        CALL ADDLVL(NLLON_P, NLLAT_P, MXLLON_P, NLVER_P, PDATA, WS1)
C
C       Fourier transform data on Gaussian grid.
C       (data and results in WS1)
        CALL RFTLON(-1, 1, TRIGS, WS2, WS1)
C
C       Remove levels from work array.
        CALL RMLVL(2*NLFC_S, NLLAT_S, 2*MXLFC_S, NLVER_S, WS1, WS1)
C
C       procedure for forward Gauss-Legendre transform
C       (data in WS1)
        CALL FLTRNS(ALP, WTS, WS1, WS2, WS3, SCOEF)
C
C       transformation to wavenumber space (forward transform) complete
C
      ELSE
C
C       inverse transform from wavenumber to physical space (itype=+1)
C       (results in WS1)
        CALL ILTRNS(ALP, SCOEF, WS3, WS1)
C
C       Add vertical levels before calling RFTLON.
        CALL ADDLVL(2*NLFC_S, NLLAT_S, 2*MXLFC_S, NLVER_S, WS1, WS1)
C
C       inverse fast Fourier transform to Gaussian grid
C       (data and results in WS1)
        CALL RFTLON(+1, 1, TRIGS, WS2, WS1)
C
C       Extract data from work array.
        CALL RMLVL(NLLON_P, NLLAT_P, MXLLON_P, NLVER_P, WS1, PDATA)
C
C       transformation to physical space (inverse transform) complete
C
      ENDIF
C
      RETURN
      END
C
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
C
      SUBROUTINE ADDLVL(ISIZE, JSIZE, MXISIZE, NLEVELS, INPUT, OUTPUT)
C
C This subroutine fills the output array with NLEVELS copies of the
C input array, where NLEVELS is the middle index of output.
C Routine still works correctly if INPUT and OUTPUT are the same arrays.
C
C called by: DZSC, SHTRNS
C
C---- Arguments --------------------------------------------------------
C
C     Input
C
C size of input array
      INTEGER ISIZE, JSIZE
C size of first dimension of output array
      INTEGER MXISIZE
C number of levels in output array
      INTEGER NLEVELS
C input array
      REAL INPUT(ISIZE,JSIZE)
C
C     Output
C
C output array
      REAL OUTPUT(MXISIZE,NLEVELS,JSIZE)
C
C---- Local Variables --------------------------------------------------
C
C     first / - / second indices of input array
C     first / second / third indices of output array
      INTEGER I, L, J
C
C---- Executable Statements --------------------------------------------
C
      IF (NLEVELS .GT. 0) THEN
C       Fill last level of output array, in reverse column and order, to
C       guarantee that no input values are overwritten before they are
C       used.
        DO J=JSIZE,1,-1
          DO I=ISIZE,1,-1
            OUTPUT(I,NLEVELS,J) = INPUT(I,J)
          ENDDO
        ENDDO
C
C       Fill rest of levels of output array.
        DO J=1,JSIZE
          DO L=1,NLEVELS-1
            DO I=1,ISIZE
              OUTPUT(I,L,J) = OUTPUT(I,NLEVELS,J)
            ENDDO
          ENDDO
        ENDDO
C
      ENDIF
C
      RETURN
      END
C
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
C
      SUBROUTINE RMLVL(ISIZE, JSIZE, MXISIZE, NLEVELS, INPUT, OUTPUT)
C
C This subroutine extracts level 1 from the INPUT array to fill the
C OUTPUT array. If INPUT and OUTPUT are the same array, the routine 
C "correctly" overwrites the first ISIZE*JSIZE elements of the array.
C
C called by: DZSC, SHTRNS
C calls:
C
C---- Arguments --------------------------------------------------------
C
C     Input
C
C size of output array
      INTEGER ISIZE, JSIZE
C size of first dimension of input array
      INTEGER MXISIZE
C number of levels in input array
      INTEGER NLEVELS
C input array
      REAL INPUT(MXISIZE,NLEVELS,JSIZE)
C
C     Output
C
C output array
      REAL OUTPUT(ISIZE,JSIZE)
C
C---- Local Variables --------------------------------------------------
C
C     first / third indices of input array
C     first / second indices of output array
      INTEGER I, J
C
C---- Executable Statements --------------------------------------------
C
C     Fill output array.
      IF (NLEVELS .GT. 0) THEN
        DO J=1,JSIZE
          DO I=1,ISIZE
            OUTPUT(I,J) = INPUT(I,1,J)
          ENDDO
        ENDDO
      ELSE
        DO J=1,JSIZE
          DO I=1,ISIZE
            OUTPUT(I,J) = 0.0
          ENDDO
        ENDDO
      ENDIF
C
      RETURN
      END
C
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
C
      SUBROUTINE FLTRNS(ALP, WTS, F, WSA, WSB, SCOEF)
C
C This routine computes spectral coefficients from Fourier coefficients
C by computing the forward Legendre transform (i.e., the Gauss 
C quadrature approximation to the forward Legendre transform).
C
C called by: SHTRNS
C calls: FLTSUM, TREESUM
C
C---- Implicit None ----------------------------------------------------
C
      IMPLICIT NONE
C
C---- Parameters -------------------------------------------------------
C
#     include "params.i"
C
C---- Common Blocks ----------------------------------------------------
C
C problem resolution information
#     include "problem.i"
C parallel algorithm information
#     include "algorithm.i"
C domain decomposition definition variables
#     include "spectral.i"
C
C---- Arguments --------------------------------------------------------
C
C     Input
C
C associated Legendre polynomials
      REAL ALP(NFSPEC_S,NLLATH_S)
C Gaussian weights
      REAL WTS(NLAT)
C Fourier coefficients 
      COMPLEX F(NLFC_S,NLLAT_S)
C
C     Work Space
C
C work arrays
      COMPLEX WSA(2,NLFC_S,NLLATH_S)
      COMPLEX WSB(MXLSPEC_S,BUFSFLT+NLLATH_S)
C (note: WSB will only have space allocated for 
C  WSB(MXLSPEC_S,BUFSFLT+1) when (SUMOPT .EQ. 0)
C
C     Output
C
C computed spectral coefficients
      COMPLEX SCOEF(MXLSPEC_S)
C
C---- Local Variables --------------------------------------------------
C
C parallel stage and latitude index
      INTEGER P, NL
C polynomial degree transition index
      INTEGER JNTRNS
C
C---- Executable Statements --------------------------------------------
C
      IF (NLMM_S .GT. 0) THEN
C
C       Calculate intermediate quantities from array of Fourier 
C       coefficients for computational efficiency.
        CALL TMPFLT(WTS, F, WSA)
C
C       Calculate local contribution to vector sum.
        IF (SUMOPT .EQ. 0) THEN
C
C         Calculate local contribution to vector sum using in-place
C         summation ordering:
C         1) (P. EQ. NLTSTEPS) case: put contribution in output vectors.
C
C         Initialize contribution vector.
          CALL INFLT(NLTSTEPS, JNTRNS, 1, ALP(1,1), WSA, SCOEF)
C
          DO NL=2,NLLATH_S
C           Add contribution to running sum.
            CALL RSFLT(NLTSTEPS, JNTRNS, ALP(1,NL), WSA(1,1,NL), SCOEF)
          ENDDO
C
C         Communicate partial results with rest of logical column.
          CALL FLTSUM(NLTSTEPS, NLTSTEPS, NLSPEC_S(NLTSTEPS), MXLSPEC_S,
     &                1, WSB(1,BUFSFLT+1), WSB, SCOEF)
C
C         2) (P .LT. NLTSTEPS) case: use WSB for local contribution.
          DO P=NLTSTEPS-1,1,-1
C
C           Initialize contribution vector.
            CALL INFLT(P, JNTRNS, 1, ALP(1,1), WSA, WSB(1,BUFSFLT+1))
C
            DO NL=2,NLLATH_S
C             Add contribution to running sum.
              CALL RSFLT(P, JNTRNS, ALP(1,NL), WSA(1,1,NL), 
     &                   WSB(1,BUFSFLT+1))
            ENDDO
C
C           Communicate partial results with rest of logical column.
            CALL FLTSUM(P, NLTSTEPS, NLSPEC_S(P), MXLSPEC_S, 1,
     &                  WSB(1,BUFSFLT+1), WSB, SCOEF)
C
          ENDDO
C
        ELSE
C
C         Calculate local contribution to vector sum using binary tree
C         summation ordering:
C         1) (P. EQ. NLTSTEPS) case: put contribution in output vectors
C
C         Compute individual components of sum.
          DO NL=1,NLLATH_S
            CALL INFLT(NLTSTEPS, JNTRNS, NL, ALP(1,NL), 
     &                 WSA(1,1,NL), WSB(1,BUFSFLT+NL))
          ENDDO
C
C         Compute local binary tree sum (doubling length because TREESUM
C         expects real vectors).
          CALL TREESUM(1, 2*NLSPEC_S(NLTSTEPS), 2*MXLSPEC_S, 1, 
     &                 NLLATH_S, WSB(1,BUFSFLT+1), SCOEF)
C
C         Communicate partial results with rest of logical column.
          CALL FLTSUM(NLTSTEPS, NLTSTEPS, NLSPEC_S(NLTSTEPS), MXLSPEC_S,
     &                1, WSB(1,BUFSFLT+1), WSB, SCOEF)
C
C         2) (P .LT. NLTSTEPS) case: use WSB for local contribution.
          DO P=NLTSTEPS-1,1,-1
C
C           Compute individual components of sum.
            DO NL=1,NLLATH_S
              CALL INFLT(P, JNTRNS, NL, ALP(1,NL),
     &                   WSA(1,1,NL), WSB(1,BUFSFLT+NL))
            ENDDO
C
C           Compute local binary tree sum (doubling length because 
C           TREESUM expects real vectors).
            CALL TREESUM(0, 2*NLSPEC_S(P), 2*MXLSPEC_S, 1, NLLATH_S, 
     &                   WSB(1,BUFSFLT+1), WSB(1,BUFSFLT+1))
C
C           Communicate partial results with rest of logical column.
            CALL FLTSUM(P, NLTSTEPS, NLSPEC_S(P), MXLSPEC_S, 1,
     &                  WSB(1,BUFSFLT+1), WSB, SCOEF)
C
          ENDDO
C
        ENDIF
C
      ENDIF
C
      RETURN
      END
C
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
C
      SUBROUTINE TMPFLT (WTS, F, WS)
C                                                                              
C This routine calculates intermediate quantities needed by FLTRNS.
C
C called by: FLTRNS
C
C---- Implicit None ----------------------------------------------------
C
      IMPLICIT NONE
C
C---- Parameters -------------------------------------------------------
C
#     include "params.i"
C
C---- Common Blocks ----------------------------------------------------
C
C problem resolution information
#     include "problem.i"
C domain decomposition information
#     include "spectral.i"
C
C---- Arguments --------------------------------------------------------
C
C     Input
C
C Gaussian weights
      REAL WTS(NLAT)
C Fourier coefficients 
      COMPLEX F(NLFC_S,NLLAT_S)
C
C     Output
C
C intermediate quantities
      COMPLEX WS(2,NLFC_S,NLLATH_S)
C
C---- Local Variables --------------------------------------------------
C
C latitude (northern, southern, & global) indices
      INTEGER NL, SL, GNL
C untruncated and truncated wavenumber indices
      INTEGER IM, JM
C
C---- Executable Statements -------------------------------------------
C                                                                              
C     Calculate intermediate quantities.
      DO NL=1,NLLATH_S
        SL  = NLLAT_S-NL+1
        GNL = LATTRUE_S(NL)
C
        DO JM=1,NLMM_S
          IM   = JMLTRUE_S(JM)
C
          WS(1,JM,NL) = (F(IM,NL) + F(IM,SL))*WTS(GNL)
          WS(2,JM,NL) = (F(IM,NL) - F(IM,SL))*WTS(GNL)
C
        ENDDO
C
      ENDDO
C
      RETURN
      END
C
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
C
      SUBROUTINE INFLT (P, JNTRNS, NL, ALP, DATA, SUM)
C                                                                             
C This routine calculates a contribution to the forward Legendre 
C transform described in routine FLTRNS corresponding to a single 
C latitude (NL) and stage (P). INFLT overwrites the output vector with 
C these values, and is used to initialize the output vector as the first
C step in a running sum calculation (SUMOPT .EQ. 0), or when delaying 
C the summation over latitude (NL) to allow a (reproducible) binary tree 
C sum ordering (SUMOPT .EQ. 1).
C 
C called by: FLTRNS
C calls: 
C
C---- Implicit None ----------------------------------------------------
C
      IMPLICIT NONE
C
C---- Parameters -------------------------------------------------------
C
#     include "params.i"
C
C---- Common Blocks ----------------------------------------------------
C
C domain decomposition information
#     include "spectral.i"
C
C---- Arguments --------------------------------------------------------
C
C     Input
C
C parallel stage
      INTEGER P
C latitude index
      INTEGER NL
C associated Legendre polynomials
      REAL ALP(NFSPEC_S)
C data (computed in TMPFLT from array of Fourier coefficients)
      COMPLEX DATA(2,NLFC_S)
C
C     Input/Output
C
C current polynomial degree transition index
      INTEGER JNTRNS
C
C     Output
C
C contribution to forward transform "sum"
      COMPLEX SUM(MXLSPEC_S)
C
C---- Local Variables --------------------------------------------------
C
C wavenumber and wavenumber index
      INTEGER M, JM
C polynomial degree index and index bounds
      INTEGER JN, JNFIRST, JNLAST
C spectral coefficient index
      INTEGER L
C spectral coefficient offset for wavenumber index JM
      INTEGER IS
C
C---- Executable Statements --------------------------------------------
C
C     Initialize/update polynomial degree transition index.
      IF ((P .EQ. NLTSTEPS) .AND. (NL .EQ. 1)) THEN
C       First call in this transform: initialize offset.
        JNTRNS = NTRNS_S - JME_S(P)
      ELSEIF (NL .EQ. 1) THEN
C       First call for this value of p: update offset.
        JNTRNS = JNTRNS + (JMB_S(P+1)-1) - JME_S(P)     
      ENDIF
C
C     Vary M and N so that procedure moves along columns denoted
C     by index JM.  M is given by (JM-1); N is given by (JN+JM-2).
C     Take advantage of symmetric character of Legendre polynomials.
C     (Procedure assumes even number of Gaussian latitudes ...)
      L = 1
      DO JM=JMB_S(P),JME_S(P)
C
        IS = LLCOL_S(JM,2)
        M  = MTRUE_S(JM)
C
        JNFIRST = JNB_S(JNTRNS+JM)
        JNLAST = JNE_S(JNTRNS+JM)
        L = L - JNFIRST
C
        IF (MOD(JNFIRST,2) .EQ. 0) THEN
C         compute contributions for first JN
          SUM(L+JNFIRST) = ALP(IS+JNFIRST)*DATA(2,JM) 
          JNFIRST = JNFIRST + 1
        ENDIF
C
C       Calculate paired JNs.
        DO JN=JNFIRST,JNLAST-1,2
          SUM(L+JN)   = ALP(IS+JN)*DATA(1,JM) 
          SUM(L+JN+1) = ALP(IS+JN+1)*DATA(2,JM) 
        ENDDO
C                                                                              
        IF (MOD((JNLAST-JNFIRST),2) .EQ. 0) THEN
C         Compute contributions for last JN.
          SUM(L+JNLAST) = ALP(IS+JNLAST)*DATA(1,JM)  
        ENDIF
C
C       Update local spectral coefficient offset index.
        L = L + JNLAST + 1
C
      ENDDO
C
      RETURN
      END
C
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
C
      SUBROUTINE RSFLT (P, JNTRNS, ALP, DATA, SUM)
C                                                                             
C This routine calculates a contribution to the forward Legendre 
C transform described in routine FLTRNS corresponding to a single 
C latitude and stage (P). RSFLT adds these values to the current 
C contents of the output vector, as part of a running sum calculation 
C (SUMOPT .EQ. 0).
C 
C called by: FLTRNS
C calls: 
C
C---- Implicit None ----------------------------------------------------
C
      IMPLICIT NONE
C
C---- Parameters -------------------------------------------------------
C
#     include "params.i"
C
C---- Common Blocks ----------------------------------------------------
C
C domain decomposition information
#     include "spectral.i"
C
C---- Arguments --------------------------------------------------------
C
C     Input
C
C parallel stage
      INTEGER P
C current polynomial degree transition index
      INTEGER JNTRNS
C associated Legendre polynomials
      REAL ALP(NFSPEC_S)
C data (computed in TMPFLT from array of Fourier coefficients)
      COMPLEX DATA(2,NLFC_S)
C
C     Output
C
C contribution to forward transform "sum"
      COMPLEX SUM(MXLSPEC_S)
C
C---- Local Variables --------------------------------------------------
C
C wavenumber and wavenumber index
      INTEGER M, JM
C polynomial degree index and index bounds
      INTEGER JN, JNFIRST, JNLAST
C spectral coefficient index
      INTEGER L
C spectral coefficient offset for wavenumber index JM
      INTEGER IS
C
C---- Executable Statements --------------------------------------------
C                                                                              
C     Vary M and N so that procedure moves along columns denoted
C     by index JM.  M is given by (JM-1); N is given by (JN+JM-2).
C     Take advantage of symmetric character of Legendre polynomials.
C     (Procedure assumes even number of Gaussian latitudes ...)
      L = 1
      DO JM=JMB_S(P),JME_S(P)
C
        IS = LLCOL_S(JM,2)
        M  = MTRUE_S(JM)
C
        JNFIRST = JNB_S(JNTRNS+JM)
        JNLAST = JNE_S(JNTRNS+JM)
        L = L - JNFIRST
C
        IF (MOD(JNFIRST,2) .EQ. 0) THEN
C         Compute contributions for first JN.
C         N = M + JNFIRST - 1
          SUM(L+JNFIRST) = SUM(L+JNFIRST) + ALP(IS+JNFIRST)*DATA(2,JM)
          JNFIRST = JNFIRST + 1
        ENDIF
C
C       Calculate paired JNs.
        DO JN=JNFIRST,JNLAST-1,2
C         n = m + JN - 1
          SUM(L+JN)   = SUM(L+JN)   + ALP(IS+JN)*DATA(1,JM)
          SUM(L+JN+1) = SUM(L+JN+1) + ALP(IS+JN+1)*DATA(2,JM) 
        ENDDO
C                                                                              
        IF (MOD((JNLAST-JNFIRST),2) .EQ. 0) THEN
C         Compute contributions for last JN.
C         N = M + JNLAST - 1
          SUM(L+JNLAST) = SUM(L+JNLAST) + ALP(IS+JNLAST)*DATA(1,JM)
        ENDIF
C
C       Update local spectral coefficient offset index.
        L = L + JNLAST + 1
C
      ENDDO
C
      RETURN
      END
C
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
C
      SUBROUTINE ILTRNS(ALP, SCOEF, WS, F)
C
C This routine computes Fourier coefficients from spectral coefficients
C by computing the inverse Legendre transform (i.e., evaluating
C the Legendre expansion)
C
C called by: SHTRNS
C calls: ILTCAST, ILTRNS2
C
C---- Implicit None ----------------------------------------------------
C
      IMPLICIT NONE
C
C---- Parameters -------------------------------------------------------
C
#     include "params.i"
C
C---- Common Blocks ----------------------------------------------------
C
C parallel algorithm information
#     include "algorithm.i"
C domain decomposition definition variables
#     include "spectral.i"
C
C---- Arguments --------------------------------------------------------
C
C     Input
C
C associated Legendre polynomials
      REAL ALP(NFSPEC_S,NLLATH_S)
C spectral coefficients
      COMPLEX SCOEF(MXLSPEC_S)
C
C     Work Space
C
C work array
      COMPLEX WS(MXLSPEC_S,BUFSILT)
C
C     Output
C
C computed Fourier coefficients 
      COMPLEX F(NLFC_S,NLLAT_S)
C
C---- Local Variables --------------------------------------------------
C
C polynomial degree transition index
      INTEGER JNTRNS
C flag array to indicate whether a given Fourier coefficient
C has been "visited" in previous stages of the computation
      INTEGER JMFLAG(MMX+1)
C parallel stage / index of active buffer in multiple buffer algorithm
      INTEGER P, PM
C
C---- Executable Statements --------------------------------------------
C
C     Determine Fourier coefficients by inverse Legendre transform.
C     Vary M and N so procedure moves along columns denoted by
C     index JM.  M is given by (JM-1) while N is given by (JN+M-1).
C
      IF (NLMM_S .GT. 0) THEN
C
C       Send local segment of data array on.
        CALL ILTCAST(1, NLTSTEPS, MXLSPEC_S, SCOEF, WS, PM)
C
C       Calculate using local data.
        CALL ILTRNS2(1, JNTRNS, JMFLAG, ALP, SCOEF, F)
C
C       Finish calculation using external data.
        DO P=2,NLTSTEPS
C
C         Send current segment of data array on and get the next 
C         segment.
          CALL ILTCAST(P, NLTSTEPS, MXLSPEC_S, SCOEF, WS, PM)
C
C         Calculate using new data.
          CALL ILTRNS2(P, JNTRNS, JMFLAG, ALP, WS(1,PM), F)
C
        ENDDO
C
      ENDIF
C
      IF (NLMM_S .LT. NLFC_S) THEN
C       Zero the tail of the complex coefficient spectrum.
        CALL ZEROFC(NLMM_S, NLFC_S, MXLFC_S, NLLAT_S,
     &              JMLTRUE_S, F)
      ENDIF
C
      RETURN
      END
C
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
C
      SUBROUTINE ILTRNS2(P, JNTRNS, JMFLAG, ALP, SCOEF, F)
C                                                                              
C This routine computes contributions to the Fourier coefficients 
C using a segment of the spectral coefficients.
C                                                                              
C called by: ILTRNS
C calls:
C
C---- Implicit None ----------------------------------------------------
C
      IMPLICIT NONE
C
C---- Parameters -------------------------------------------------------
C
#     include "params.i"
C
C---- Common Blocks ----------------------------------------------------
C
C domain decomposition information
#     include "spectral.i"
C
C---- Arguments --------------------------------------------------------
C
C     Input
C
C parallel stage
      INTEGER P
C associated Legendre polynomials
      REAL ALP(NFSPEC_S,NLLATH_S)
C spectral coefficients
      COMPLEX SCOEF(MXLSPEC_S)                        
C
C     Input/Output
C
C current polynomial degree transition index
      INTEGER JNTRNS
C flag array to indicate whether a given Fourier coefficient
C has been "visited" in previous stages of the computation
      INTEGER JMFLAG(MMX+1)
C
C     Work Space/Output
C
C Fourier coefficients
      COMPLEX F(NLFC_S,NLLAT_S)
C
C---- Local Variables --------------------------------------------------
C
C even and odd contributions to PHIFC
      COMPLEX CTMP1, CTMP2
C latitude (northern and southern) indices
      INTEGER NL, SL
C untruncated and truncated wavenumber indices
      INTEGER IM, JM
C polynomial degree index and index bounds
      INTEGER JN, JNFIRST, JNLAST
C spectral coefficient
      INTEGER L
C spectral coefficient offset for wavenumber index JM
      INTEGER IS
C
C---- Executable Statements --------------------------------------------
C
C     Determine Fourier coefficients by inverse Legendre transform.
C     Vary M and N so procedure moves along columns denoted by
C     index JM.  M is given by (JM-1) while N is given by (JN+M-1).
C
C     Initialize/update polynomial degree transition index and flag
C     array.
      IF (P .EQ. 1) THEN
C
C       First call in this transform: initialize offset
        JNTRNS = 1 - JMB_S(P)
C
C       and flag array.
        DO JM=1,NLMM_S
          JMFLAG(JM) = 0
        ENDDO
C
      ELSE
C
C       Unpdate offset
        JNTRNS = JNTRNS + (JME_S(P-1)+1) - JMB_S(P)     
C
C       and flag array.
        DO JM=JMB_S(P-1),JME_S(P-1)
          JMFLAG(JM) = 1
        ENDDO
C
      ENDIF
C
C     Compute contribution to Fourier coefficients.
      DO NL=1,NLLATH_S
        SL = NLLAT_S-NL+1
C
        L = 1
        DO JM=JMB_S(P),JME_S(P)
          IS = LLCOL_S(JM,2)
          IM = JMLTRUE_S(JM)
C
          JNFIRST = JNB_S(JNTRNS+JM)
          JNLAST = JNE_S(JNTRNS+JM)
          L = L - JNFIRST
C
C         Initialize temporaries.
          IF (MOD(JNFIRST,2) .EQ. 0) THEN
            CTMP1 =  (0.0,0.0)
            CTMP2 =  SCOEF(L+JNFIRST)*ALP(IS+JNFIRST,NL) 
            JNFIRST = JNFIRST + 1
          ELSE
            CTMP1 = (0.0,0.0)
            CTMP2 = (0.0,0.0)
          ENDIF
C
C         Compute contributions for paired JNs.
          DO JN=JNFIRST,JNLAST-1,2
            CTMP1 = CTMP1 + SCOEF(L+JN)*ALP(IS+JN,NL)
            CTMP2 = CTMP2 + SCOEF(L+JN+1)*ALP(IS+JN+1,NL)
          ENDDO
C
          IF (MOD((JNLAST-JNFIRST),2) .EQ. 0) THEN
C           Compute contributions for last JN.
            CTMP1 = CTMP1 + SCOEF(L+JNLAST)*ALP(IS+JNLAST,NL)
          ENDIF
C
C         Combine contributions of even and odd wavenumbers to obtain
C         Fourier coefficients.
          IF (JMFLAG(JM) .EQ. 0) THEN
            F(IM,NL)  = CTMP1 + CTMP2
            F(IM,SL)  = CTMP1 - CTMP2
          ELSE
            F(IM,NL)  = F(IM,NL)  + CTMP1 + CTMP2
            F(IM,SL)  = F(IM,SL)  + CTMP1 - CTMP2
          ENDIF
C
C         Update local spectral coefficient offset index.
          L = L + JNLAST + 1
C
        ENDDO
C
      ENDDO
C
      RETURN
      END
C
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
