C#######################################################################
C PSTSWM Version 4.0 (12/1/94)                                         #
C  A message-passing benchmark code and parallel algorithm testbed     #
C  that solves the nonlinear shallow water equations using the spectral#
C  transform method.                                                   #
C Written by:                                                          #
C  Patrick Worley of Oak Ridge National Laboratory                     #
C  Ian Foster of Argonne National Laboratory                           #
C Based on the sequential code STSWM 2.0 by James Hack and Ruediger    #
C  Jakob of the National Center for Atmospheric Research.              #
C Research and development funded by the Computer Hardware, Advanced   #
C  Mathematics, and Model Physics (CHAMMP) program of the U.S.         #
C  Department of Energy.                                               # 
C                                                                      #
C Questions and comments should be directed to worley@msr.epm.ornl.gov #
C Please notify and acknowledge the authors in any research or         #
C publications utilizing PSTSWM or any part of the code.               #
C                                                                      #
C NOTICE: Neither the institutions nor the authors make any            #
C representations about the suitability of this software for any       #
C purpose. This software is provided "as is", without express or       #
C implied warranty.                                                    #
C#######################################################################
C include precision declaration definitions                            #
#include "precision.i"
C#######################################################################
      SUBROUTINE CALP(CMN, DMN, EMN, EPSIL, ALP, DALP) 
C
C This subroutine calculates the associated Legendre polynomials using
C a highly stable four term recurrence relation given by 
C Belousov (1962). The spectral truncation parameters are defined by 3 
C parameters: MM, the largest Fourier wavenumber, KK, the highest
C degree of the associated Legendre polynomials, and NN, the highest
C degree of the associated Legendre polynomials for M=0. The length of
C the associated Legendre polynomial array ALP is given by the relation
C LEN = ((MM+1)*(NN+1) - (LMN**2 + LMN)/2) where LMN = MM + NN - KK
C variables are stored along columns starting with column M=0.
C The length of each row is stored in the array LROW(0:KK,2).
C The length of each column assigned to a processor is stored in the
C array LLCOL_S(1:MMX+1,1) and is evaluated in subroutine input as 
C (NN+1)-AMAX(M+NN-KK,0) where 0<=M<=MM. Cumulative column lengths
C (cumulative displacements) are also stored in LLCOL_S(1:MMX+1,2) so 
C that the associated Legendre polynomial of order M, degree N, and 
C argument SNJ(NL) is addressed as ALP(LLCOL_S(MTINV_S(M),2)+(N-M+1),NL),
C or using the statement function IDSCP as ALP(IDSPC(M,N),NL). The same 
C form applies for addressing the derivatives (DALP) and recurrence
C coefficients defined in the EPSIL matrix.
C
C For parallelization, all intermediate values are still calculated,
C but only the relevant values for ALP and DALP are saved,
C i.e., values satisfying (MTINV_S(M) .NE. -1) and (NL=1,NLLATH_S).
C   
C called by: INPUT
C calls: GLAT_S
C
C---- Implicit None ----------------------------------------------------
C
      IMPLICIT NONE
C
C---- Parameters -------------------------------------------------------
C
#     include "params.i"
C
C---- Common Blocks ----------------------------------------------------
C
C problem resolution information
#     include "problem.i"
C domain decomposition information
#     include "spectral.i"
C
C---- Arguments --------------------------------------------------------
C
C     Work Space
C
C recurrence coefficients for associated Legendre polynomials
      DOUBLE PRECISION CMN(LRM+1)
      DOUBLE PRECISION DMN(LRM+1)
      DOUBLE PRECISION EMN(LRM+1)
      DOUBLE PRECISION EPSIL(NFSPEC_S)
C
C     Output
C
C associated Legendre polynomials
      REAL ALP(NFSPEC_S,NLLATH_S)
C deriv. associated Legendre polynomials
      REAL DALP(NFSPEC_S,NLLATH_S)
C
C---- Local Variables --------------------------------------------------
C
C "constant" variables: pi/2
      DOUBLE PRECISION PIHALF
C row length and index
      INTEGER LROW(0:KKX,2)
C work array used to evaluate Belousov recurrence
      DOUBLE PRECISION CN2N1N(KKX+1,3)
C polynomial values for first and second columns of ALP array for
C a given latitude
      DOUBLE PRECISION COL1(KKX), COL2(KKX)
C partial sums in trigonometric expansions for evaluating
C first and second columns
      DOUBLE PRECISION SNPSUM(KKX), CSPSUM(KKX)
C factors in trigonometric expansions for first and second columns
      DOUBLE PRECISION SQNP(KKX), AN(KKX), CSFAC(KKX), SNFAC(KKX),
     &                 COSTBL(KKX),  SINTBL(KKX), RNORM
C current number of terms in trigonometric expansion for the first and 
C second columns
      INTEGER JK
C
C latitude, shifted latitude, and sine and cosine of latitude
      DOUBLE PRECISION RLAT, THTA, SNJ, COS2P
C latitude, wavenumber, and polynomial degree indices
      INTEGER NL, JM, JN, K
C spectral coefficient offset for wavenumber index JM
      INTEGER IS
C true wavenumber and limits for loop over wavenumber
      INTEGER M, MSTART, MLIM
C polynomial degree and limit for loop over polynomial degree
      INTEGER N, NSTART
C
C---- External Functions -----------------------------------------------
C
C Gaussian latitudes
      EXTERNAL GLAT_S
      REAL GLAT_S
C
C---- Statement Functions ----------------------------------------------
C
C address computation of ALP, DALP, EPSIL
C e.g., EPSIL(M,N) = EPSIL(IDSPC(M,N))
      INTEGER MDUM, NDUM, IDSPC, IDSPR
      IDSPC(MDUM,NDUM) = LLCOL_S(MTINV_S(MDUM),2)+(NDUM-MDUM+1)
C
C address computation of Belousov recurrence coefficient matrices.  
C e.g., CMN(M,N) = CMN(IDSPR(M,N))                  
      IDSPR(MDUM,NDUM) = 1 + LROW(NDUM,2)+MDUM                                 
C
C---- Executable Statements --------------------------------------------
C
C     Begin by calculating row lengths and cumulative displacements
C     and store the information in LROW(0:KK,1:2). Row info is from M=0
C     to M=MM for each 0 <= N <= KK (i.e., it does not exclude the empty
C     area between (NN+M) and KK when KK > NN). 
      LROW(0,1) = 1
      LROW(0,2) = 0
C
      DO K=1,KK
        LROW(K,1) = MIN(K+1,MM+1)
        LROW(K,2) = LROW(K-1,1) + LROW(K-1,2)
      ENDDO
C
C     Compute local EPSIL matrix.
      DO JM=1,NLMM_S
        M = MTRUE_S(JM)
        IS = LLCOL_S(JM,2)
        EPSIL(IS+1) = 0.0D0
        IF (LLCOL_S(JM,1) .GT. 1) THEN
          EPSIL(IS+2) = 1.0D0/SQRT(DBLE(2*M+3))
          DO JN = 3,LLCOL_S(JM,1)
            N = JN + M - 1
            EPSIL(IS+JN) = SQRT(DBLE((N*N)-M*M)/DBLE(4*(N*N)-1))
          ENDDO
        ENDIF
      ENDDO
C
C     Compute recurrence coefficient matrices CMN, DMN, EMN.
C     Reason for storing by row is to avoid bank conflicts when              
C     evaluating the polynomials.
      DO N = 2,KK
        IS   = LROW(N,2)
        DO M=2,LROW(N,1)-1
          JM = M+1
          CMN(IS+JM) = SQRT(DBLE((2*N+1)*(M+N-1)*(M+N-3))
     &                    /(DBLE((2*N-3)*(M+N)*(M+N-2))))
          DMN(IS+JM) = SQRT(DBLE((2*N+1)*(M+N-1)*(N-M+1))
     &                    /(DBLE((2*N-1)*(M+N)*(M+N-2))))
          EMN(IS+JM) = SQRT(DBLE((2*N+1)*(N-M))
     &                    /(DBLE((2*N-1)*(M+N))))
        ENDDO
      ENDDO
C
C     Compute associated Legendre polynomials and their derivatives for
C     the truncated wavenumber space (defined in LLCOL_S(0:NN,1:2) with
C     arguments given by grid function GLAT_S) using the Belousov
C     algorithm. The procedure involves extra work for any truncation
C     other than triangular because the Belousov recurrence requires
C     polynomial information in part of the region between KK and NN.
C     More elaborate bookkeeping could reduce this extra computation,
C     but by less than a factor of two (probably less than 10% of total
C     work). 
C
C     Precompute some factors.
      AN(1) = SQRT(0.75D0)
      DO N=2,KK
        AN(N) = AN(N-1)*SQRT(1.0D0-(1.0D0/DBLE(4*N*N)))
      ENDDO
      PIHALF = 2.0D0*ATAN(1.0D0)
C                                                                              
C     Begin procedure ... outer loop over latitude (argument).
      DO NL=1,NLLATH_S
C
C       Determine proper index for latitude dependent quantities
C       (provision for doing this computation on the fly, not in use).
        RLAT = GLAT_S(NL)
C
C       Begin by computing 1st two elements in each row (M=0:1;N=0:KK).
C       Evaluate the series expansions (19) and (21) in Belousov (1962).
C       Final results are stored in work arrays COL1 and COL2.
C
        COS2P = COS(RLAT)
        SNJ   = SIN(RLAT)
        THTA  = PIHALF - RLAT
        CN2N1N(1,1)  = SQRT(0.50D0)
        IF (MTINV_S(0) .NE. -1) THEN
          ALP(IDSPC(0,0),NL)  = CN2N1N(1,1)
          DALP(IDSPC(0,0),NL) = 0.0D0
        ENDIF
C
C       Initialize working space.
        DO N=1,KK
          SNPSUM(N) = 0.0D0
          CSPSUM(N) = 0.0D0
          SQNP(N)   = 1.0D0/SQRT(DBLE(N*N + N))
          CSFAC(N)  = 1.0D0
          SNFAC(N)  = DBLE(N)*SQNP(N)
          COSTBL(N) = COS(DBLE(N)*THTA)
          SINTBL(N) = SIN(DBLE(N)*THTA)
        ENDDO
C
C       Each increment in JK evaluates an additional term in expansions.
        JK=1
        DO N=1,KK
          CSPSUM(N) = CSPSUM(N)+COSTBL(N-JK+1)*CSFAC(N)
          SNPSUM(N) = SNPSUM(N)+SINTBL(N-JK+1)*SNFAC(N)
        ENDDO
C
        DO JK=3,KK+1,2
C
          NSTART = MAX(JK-1,1)
          N = NSTART
          CSFAC(N)  = DBLE(JK-2)/DBLE(JK-1)*DBLE(2*N-JK+3)
     &                /DBLE(2*N-JK+2)*CSFAC(N)
          CSPSUM(N) = CSPSUM(N) + CSFAC(N)*0.50D0
C
          DO N=NSTART+1,KK
            CSFAC(N)  = DBLE(JK-2)/DBLE(JK-1)*DBLE(2*N-JK+3)
     &                  /DBLE(2*N-JK+2)*CSFAC(N)
            SNFAC(N)  = CSFAC(N)*DBLE(N-JK+1)*SQNP(N)
            CSPSUM(N) = CSPSUM(N)+COSTBL(N-JK+1)*CSFAC(N)
            SNPSUM(N) = SNPSUM(N)+SINTBL(N-JK+1)*SNFAC(N)
          ENDDO
C
        ENDDO
C
        RNORM = 1.0D0/CN2N1N(1,1)
        DO N=1,KK
          COL1(N) = AN(N)*CSPSUM(N)*RNORM
          COL2(N) = AN(N)*SNPSUM(N)*RNORM
        ENDDO
C
        DO N=1,KK
C
C         Evaluate remaining polynomials by sweeping through rows N=1:KK.
C         First two elements obtained from the above series expansions.
          CN2N1N(1,3) = COL1(N)
          CN2N1N(2,3) = COL2(N)
C
          IF (N .EQ. 1) THEN
C           Necessary detour to "prime the pipeline" (first pass).
            CN2N1N(1,2) = CN2N1N(1,3)
            CN2N1N(2,2) = CN2N1N(2,3)
            IF (MTINV_S(0) .NE. -1) THEN
              ALP(IDSPC(0,1),NL)  = CN2N1N(1,2)
              DALP(IDSPC(0,1),NL) = SQRT(3.0D0)*CN2N1N(1,1)
     &                            - SNJ*CN2N1N(1,2)
            ENDIF
            IF (MTINV_S(1) .NE. -1) THEN
              ALP(IDSPC(1,1),NL)  = CN2N1N(2,2)
              DALP(IDSPC(1,1),NL) = -SNJ*CN2N1N(2,2)
            ENDIF
          ELSE
C           Evaluate the remainder of this row (M = 2, 3, 4, ...)
C           using the Belousov recurrence relation.
            MLIM = MIN(MM,N-1)
            DO M=2,MLIM
              JM = M+1
              CN2N1N(JM,3) = CMN(IDSPR(M,N))*CN2N1N(JM-2,1)
     &                       -SNJ*(DMN(IDSPR(M,N))
     &                       *CN2N1N(JM-2,2) - EMN(IDSPR(M,N))
     &                       *CN2N1N(JM,2))
            ENDDO
C
C           Put values of the polynomials contained in CN2N1N(0:MLIM,3)
C           into the associated Legendre polynomial array ALP.
            MSTART = MAX(N-NN,0)
            DO M=MSTART,MLIM
              JM = M+1
              IF (MTINV_S(M) .NE. -1) THEN
                ALP(IDSPC(M,N),NL) = CN2N1N(JM,3)
              ENDIF
            ENDDO
C
C           Special evaluation required for diagonal element M=N
            IF (N .LE. MM) THEN
              CN2N1N(N+1,3) = SQRT(1.0D0 + (1.0D0/DBLE(2*N)))*COS2P
     &                      * CN2N1N(N,2)
              IF (MTINV_S(N) .NE. -1) THEN
                ALP(IDSPC(N,N),NL)  = CN2N1N(N+1,3)
                DALP(IDSPC(N,N),NL) = -DBLE(N)*SNJ*CN2N1N(N+1,3)
              ENDIF
            ENDIF
C
C           Make room for new polynomial evaluation in CN2N1N(0:MLIM,3).
            DO JM=1,N+1
              CN2N1N(JM,1) = CN2N1N(JM,2)
              CN2N1N(JM,2) = CN2N1N(JM,3)
            ENDDO
C
          ENDIF
C
        ENDDO
C
C       Efficiently evaluate derivatives separately (along columns).
        DO M=0,MM
          IF (MTINV_S(M) .NE. -1) THEN
            IS = LLCOL_S(MTINV_S(M),2)
            DO JN=2,LLCOL_S(MTINV_S(M),1)
              N = JN+M-1
              DALP(IS+JN,NL) = DBLE(2*N+1)*EPSIL(IS+JN)*
     &         ALP(IS+JN-1,NL)-DBLE(N)*SNJ*ALP(IS+JN,NL)
            ENDDO
          ENDIF
        ENDDO
C
      ENDDO
C
      RETURN                                                                 
      END                                                                    
