C#######################################################################
C PSTSWM Version 4.0 (12/1/94)                                         #
C  (Stripped down PVM-only version (4/13/95), for use in ParkBench     #
C   benchmark suite)                                                   #
C  A message-passing benchmark code and parallel algorithm testbed     #
C  that solves the nonlinear shallow water equations using the spectral#
C  transform method.                                                   #
C Written by:                                                          #
C  Patrick Worley of Oak Ridge National Laboratory                     #
C  Ian Foster of Argonne National Laboratory                           #
C Based on the sequential code STSWM 2.0 by James Hack and Ruediger    #
C  Jakob of the National Center for Atmospheric Research.              #
C Research and development funded by the Computer Hardware, Advanced   #
C  Mathematics, and Model Physics (CHAMMP) program of the U.S.         #
C  Department of Energy.                                               # 
C                                                                      #
C Questions and comments should be directed to worley@msr.epm.ornl.gov #
C Please notify and acknowledge the authors in any research or         #
C publications utilizing PSTSWM or any part of the code.               #
C                                                                      #
C NOTICE: Neither the institutions nor the authors make any            #
C representations about the suitability of this software for any       #
C purpose. This software is provided "as is", without express or       #
C implied warranty.                                                    #
C#######################################################################
C include precision declaration definitions                            #
#include "precision.i"
C#######################################################################
      INTEGER FUNCTION INPUT(EPS)
C
C This routine calls routines to input or determine necessary constants
C and problem and algorithm parameters, allocates work space, calls the
C routines which calculate Gaussian latitudes, weights, basis functions
C (associated Legendre polynomials) and derivatives, and calls routines
C to do the initial setup for the fast Fourier transform procedure (in
C this case rftfax). 
C
C called by: PSTSWM
C calls: ALGINP, CALP, GLATS, PRBINP, PRFINP, RFTFAX
C
C---- Implicit None ----------------------------------------------------
C
      IMPLICIT NONE
C
C---- Parameters -------------------------------------------------------
C
#     include "params.i"
C
C---- Common Blocks ----------------------------------------------------
C
C machine architecture information
#     include "machine.i"
C problem resolution information
#     include "problem.i"
C parallel algorithm information
#     include "algorithm.i"
C domain decomposition definition
#     include "physical.i"
#     include "fourier.i"
#     include "spectral.i"
C constants & timesteps
#     include "consts.i"
C initialization data
#     include "finit.i"
C transform arrays
#     include "trnsfm.i"
C workspace
#     include "wrkspc.i"
C
C---- Arguments --------------------------------------------------------
C
C     Input
C
C single precision machine accuracy
      REAL EPS
C
C---- Local Variables --------------------------------------------------
C
C indices into work array used in CALP when calculating associated 
C Legendre polynomials and their derivatives
      INTEGER ICMN, IDMN, IEMN, IEPSIL
C temporary index pointer into work array used when partitioning 
C work array
      INTEGER NEXTPTR
C latitude and latitude index
      REAL RLAT
      INTEGER NL
C degree of associated Legendre polynomial
      INTEGER N
C log2(NPFC_F)
      INTEGER LGPFC
C
C----- External Functions ----------------------------------------------
C
C base for message types
      EXTERNAL MSGBASE
      INTEGER MSGBASE
C spatial latitude/longitude grid
      EXTERNAL GLAT_P, GLON_P
      REAL GLAT_P, GLON_P
C integer base 2 logarithm
      EXTERNAL LOG2
      INTEGER LOG2
C algorithm initialization routine
      EXTERNAL ALGINP
      INTEGER ALGINP
C problem initialization routine
      EXTERNAL PRBINP
      INTEGER PRBINP
C performance measurement initialization routine
      EXTERNAL PRFINP
      INTEGER PRFINP
C
C---- Executable Statements --------------------------------------------
C
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
C
C     Initialize user selectable problem and output parameters.
      IF (PRBINP() .EQ. -1) THEN
        INPUT = -1
        RETURN
      ENDIF
C
C     Initialize parallel algorithm parameters.
      IF (ALGINP() .EQ. -1) THEN
        INPUT = -1
        RETURN
      ENDIF
C
C     Initialize performance measurement parameters.
      IF (PRFINP() .EQ. -1) THEN
        INPUT = -1
        RETURN
      ENDIF
C
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
C
C     Allocate work space, forcing alignment.
C
C     gridpoint solution fields for one vertical level 
C     (extracted from DIV, etc.) 
C
C  REAL D(NLLON_P,NLLAT_P)
      ID = 1
C  REAL Z(NLLON_P,NLLAT_P)
      IZ = ID + NLLON_P*NLLAT_P
      IZ = ALIGN*((IZ+ALIGN-2)/ALIGN) + 1
C  REAL H(NLLON_P,NLLAT_P)
      IH = IZ + NLLON_P*NLLAT_P
      IH = ALIGN*((IH+ALIGN-2)/ALIGN) + 1
C  REAL U(NLLON_P,NLLAT_P)
      IU = IH + NLLON_P*NLLAT_P
      IU = ALIGN*((IU+ALIGN-2)/ALIGN) + 1
C  REAL V(NLLON_P,NLLAT_P)
      IV = IU + NLLON_P*NLLAT_P
      IV = ALIGN*((IV+ALIGN-2)/ALIGN) + 1
C
C     prognostic variables in gridpoint and Fourier spaces
C     (declared "large" (..,6) to implement circular buffer for two time 
C     levels, and large enough to hold
C      REAL (NLLON_P,NLVER_P,NLLAT_P),
C      REAL (NLLON_F,NLVER_F,NLLAT_F),
C      COMPLEX (NLFC_F,NLVER_F,NLLAT_F), and
C      COMPLEX (NLFC_S,NLVER_S,NLLAT_S)
C     without overlapping the different species)
C     (only aligning first - need them in contiguous space)
C
C  REAL (MXLLON_P,NLVER_P,NLLAT_P),
      IDIV  = IV    + NLLON_P*NLLAT_P
      IDIV  = ALIGN*((IDIV+ALIGN-2)/ALIGN) + 1
C  REAL ZETA(MXLLON_P,NLVER_P,NLLAT_P,6),
      IZETA = IDIV  + MXLLON_P*NLVER_P*NLLAT_P
C  REAL PHI(MXLLON_P,NLVER_P,NLLAT_P,6)
      IPHI  = IZETA + MXLLON_P*NLVER_P*NLLAT_P
C  REAL UCOS(MXLLON_P,NLVER_P,NLLAT_P)
      IUCOS = IPHI  + MXLLON_P*NLVER_P*NLLAT_P
C  REAL VCOS(MXLLON_P,NLVER_P,NLLAT_P)
      IVCOS = IUCOS + MXLLON_P*NLVER_P*NLLAT_P
C
C     initial conditions and other initial data (real)
C
C  REAL MOUNT(NLLON_P,NLLAT_P)
C     (making room for 2 time levels of div, zeta, and phi)
      IMOUNT  = IVCOS   + 4*MXLLON_P*NLVER_P*NLLAT_P
      IMOUNT  = ALIGN*((IMOUNT+ALIGN-2)/ALIGN) + 1
C  REAL UIC12(NLLON_P,NLLAT_P)
      IUIC12  = IMOUNT  + NLLON_P*NLLAT_P
      IUIC12  = ALIGN*((IUIC12+ALIGN-2)/ALIGN) + 1
C  REAL VIC12(NLLON_P,NLLAT_P)
      IVIC12  = IUIC12  + NLLON_P*NLLAT_P
      IVIC12  = ALIGN*((IVIC12+ALIGN-2)/ALIGN) + 1
C  REAL PIC12(NLLON_P,NLLAT_P)
      IPIC12  = IVIC12  + NLLON_P*NLLAT_P
      IPIC12  = ALIGN*((IPIC12+ALIGN-2)/ALIGN) + 1
C  REAL DIC12(NLLON_P,NLLAT_P)
      IDIC12  = IPIC12  + NLLON_P*NLLAT_P
      IDIC12  = ALIGN*((IDIC12+ALIGN-2)/ALIGN) + 1
C  REAL EIC12(NLLON_P,NLLAT_P)
      IEIC12  = IDIC12  + NLLON_P*NLLAT_P
      IEIC12  = ALIGN*((IEIC12+ALIGN-2)/ALIGN) + 1
C  REAL PHICON(NLLAT_P)
      IPHICON = IEIC12  + NLLON_P*NLLAT_P
      IPHICON = ALIGN*((IPHICON+ALIGN-2)/ALIGN) + 1
C  REAL UCON(NLLAT_P)
      IUCON   = IPHICON + NLLAT_P
      IUCON   = ALIGN*((IUCON+ALIGN-2)/ALIGN) + 1
C  REAL VCON(NLLAT_P)
      IVCON   = IUCON   + NLLAT_P
      IVCON   = ALIGN*((IVCON+ALIGN-2)/ALIGN) + 1
C
C     work space (input)
C     (overlapped with generic work space)
C
C  DOUBLE PRECISION CMN(LRM+1)
      ICMN   = IVCON + NLLAT_P
      ICMN   = ALIGN*((ICMN+ALIGN-2)/ALIGN) + 1
C  DOUBLE PRECISION DMN(LRM+1)
      IDMN   = ICMN  + 2*(LRM+1)
      IDMN   = ALIGN*((IDMN+ALIGN-2)/ALIGN) + 1
C  DOUBLE PRECISION EMN(LRM+1)
      IEMN   = IDMN  + 2*(LRM+1)
      IEMN   = ALIGN*((IEMN+ALIGN-2)/ALIGN) + 1
C  DOUBLE PRECISION EPSIL(NFSPEC_S)
      IEPSIL = IEMN  + 2*(LRM+1)
      IEPSIL = ALIGN*((IEPSIL+ALIGN-2)/ALIGN) + 1
C
C     work space (generic)
C
C  INIT             - WS1: REAL (MXLLON_P,NLVER_P,NLLAT_P,2) or
C                          COMPLEX (MXLFC_S,NLVER_S,NLLAT_S,2)
C                     WS2: REAL (MXLLON_P,MXLVER_P,MXLLAT_P,2,BUFSWS2) 
C                      or  REAL (MXLLON_F,MXLVER_F,MXLLAT_F,2,BUFSWS2) 
C                      or  COMPLEX (MXLFC_S,MXLVER_S,MXLLAT_S,2,BUFSWS2)
C                      or  COMPLEX (8,NLFC_S,NLLATH_S)
C                     WS3: COMPLEX (MXLSPEC_S,2,BUFSWS3)
C                   [DZSC] (see below)
C                 [SHTRNS] WS1: REAL (MXLLON_P,NLVER_P,NLLAT_P) or
C                               COMPLEX (MXLFC_S,NLVER_S,NLLAT_S)
C                          WS2: REAL (MXLLON_P,MXLVER_P,MXLLAT_P,BUFSWS2) 
C                           or  REAL (MXLLON_F,MXLVER_F,MXLLAT_F,BUFSWS2) 
C                           or  COMPLEX (MXLFC_S,MXLVER_S,MXLLAT_S,BUFSWS2) 
C                           or  COMPLEX (2,NLFC_S,NLLATH_S)
C                          WS3: COMPLEX (MXLSPEC_S,BUFSWS3)
C  SPEED            - WS1: REAL (NLLON_P,NLLAT_P)
C                   - WS2: REAL (NLLON_P)
C  DZSC             - WS1: REAL (MXLLON_P,NLVER_P,NLLAT_P,2) or
C                          COMPLEX (MXLFC_S,NLVER_S,NLLAT_S,2)
C                     WS2: REAL (MXLLON_P,NLVER_P,NLLAT_P,2,BUFSWS2)
C                      or  REAL (MXLLON_F,NLVER_F,NLLAT_F,2,BUFSWS2)
C                      or  COMPLEX (MXLLON_S,NLVER_S,NLLAT_S,2,BUFSWS2)
C                      or  COMPLEX (8,NLFC_S,NLLATH_S)
C                     WS3: COMPLEX (MXLSPEC_S,2,BUFSWS3)
C  PSC              - WS1: REAL (MXLLON_P,NLVER_P,NLLAT_P) or
C                          COMPLEX (MXLFC_S,NLVER_S,NLLAT_S)
C                     WS2: REAL (MXLLON_P,MXLVER_P,MXLLAT_P,BUFSWS2)
C                       or REAL (MXLLON_F,MXLVER_F,MXLLAT_F,BUFSWS2)
C                       or COMPLEX (MXLFC_S,MXLVER_S,MXLLAT_S,BUFSWS2)
C                       or COMPLEX (2,NLFC_S,NLLATH_S)
C                     WS3: COMPLEX (MXLSPEC_S,BUFSWS3)
C                 [SHTRNS] (same as PSC)
C  STEP/COMP1       - WS1: REAL (MXLLON_P,NLVER_P,NLLAT_P,8) or
C                          COMPLEX (MXLFC_S,NLVER_S,NLLAT_S,8) or
C                          COMPLEX (4,MXLSPEC_S,NLVER_S))
C                     WS2: REAL (MXLLON_P,MXLVER_P,MXLLAT_P,8,BUFSWS2) 
C                      or  REAL (MXLLON_F,MXLVER_F,MXLLAT_F,8,BUFSWS2) 
C                      or  COMPLEX (MXLFC_S,MXLVER_S,MXLLAT_S,8,BUFSWS2) 
C                      or  REAL (NLLON_P,NLVER_P,3)
C                      or  REAL (0:KK,3)
C                      or  COMPLEX (16,MXLFC_S,NLVER_S,NLLATH_S)
C                     WS3: COMPLEX (MXLSPEC_S,NLVER_S,3,BUFSWS3)
C                      or  REAL (NLLON_P,NLVER_P,3)
C  NRGTCS           - WS1: REAL (5,NLLON_P,NLLAT_P)
C                     WS2: REAL (5,NLLON_P)
C  ERRANL           - WS1: REAL (NLLON_P,NLLAT_P,6) 
C                     WS2: REAL (10,NLLON_P,NLLAT_P+1)
C  SPCANL           - WS1: REAL (KK,2,NLFC_S+1)
C                     WS2: REAL (KK*2*NLFC_S+1)
      IWS1     = IVCON + NLLAT_P
      IWS1     = ALIGN*((IWS1+ALIGN-2)/ALIGN) + 1
      IWS2     = MAX(IWS1+MXLLON_P*NLVER_P*NLLAT_P*8,
     &               IWS1+2*MXLFC_S*NLVER_S*NLLAT_S*8,
     &               IWS1+2*4*MXLSPEC_S*NLVER_S,
     &               IWS1+NLLON_P*NLLAT_P*6,
     &               IWS1+KK*2*(NLFC_S+1))
      IWS2     = ALIGN*((IWS2+ALIGN-2)/ALIGN) + 1
      IWS3     = MAX(IWS2+MXLLON_P*MXLVER_P*MXLLAT_P*8*BUFSWS2,
     &               IWS2+MXLLON_F*MXLVER_F*MXLLAT_F*8*BUFSWS2,
     &               IWS2+2*MXLFC_S*MXLVER_S*MXLLAT_S*8*BUFSWS2,
     &               IWS2+2*8*NLFC_S*NLLATH_S,
     &               IWS2+5*NLLON_P,
     &               IWS2+NLLON_P*NLVER_P*3,
     &               IWS2+(KK+1)*3,
     &               IWS2+2*16*MXLFC_S*NLVER_S*NLLATH_S,
     &               IWS2+10*NLLON_P*(NLLAT_P+1),
     &               IWS2+KK*2*(NLFC_S+1))
      IWS3     = ALIGN*((IWS3+ALIGN-2)/ALIGN) + 1
      NEXTPTR  = MAX(IWS3+2*MXLSPEC_S*NLVER_S*3*BUFSWS3,
     &               IWS3+2*MXLSPEC_S*2*BUFSWS3,
     &               IWS3+NLLON_P*NLVER_P*3,
     &               IEPSIL+2*NFSPEC_S)
C
C     prognostic variables in spectral space (without levels)
C     (only first and third aligned - need first two in contiguous
C     space)
C
C  COMPLEX DSC(MXLSPEC_S)
      IDSC = NEXTPTR
      IDSC = ALIGN*((IDSC+ALIGN-2)/ALIGN) + 1
C  COMPLEX ZSC(MXLSPEC_S)
      IZSC = IDSC  + 2*MXLSPEC_S
C  COMPLEX PSC(MXLSPEC_S)
      IPSC = IZSC + 2*MXLSPEC_S
      IPSC = ALIGN*((IPSC+ALIGN-2)/ALIGN) + 1
C
C     prognostic variables in spectral space (with levels)
C     (only first aligned - need them in contiguous space)
C
C  COMPLEX DIVSC(MXLSPEC_S,NLVER_S)
      IDIVSC  = IPSC + 2*MXLSPEC_S
      IDIVSC  = ALIGN*((IDIVSC+ALIGN-2)/ALIGN) + 1
C  COMPLEX ZETASC(MXLSPEC_S,NLVER_S)
      IZETASC = IDIVSC  + 2*MXLSPEC_S*NLVER_S
C  COMPLEX PHISC(MXLSPEC_S,NLVER_S)
      IPHISC  = IZETASC + 2*MXLSPEC_S*NLVER_S
C
C     complex values needed for spectral transform
C
C  REAL ALP(NFSPEC_S,NLLATH_S)
      IALP  = IPHISC  + 2*MXLSPEC_S*NLVER_S
      IALP  = ALIGN*((IALP+ALIGN-2)/ALIGN) + 1
C  REAL DALP(NFSPEC_S,NLLATH_S)
      IDALP = IALP    + NFSPEC_S*NLLATH_S
      IDALP = ALIGN*((IDALP+ALIGN-2)/ALIGN) + 1
C
C     initial conditions and other initial data (complex)
C
C  COMPLEX TOPOSC(MXLSPEC_S)
      ITOPOSC = IDALP   + NFSPEC_S*NLLATH_S
      ITOPOSC = ALIGN*((ITOPOSC+ALIGN-2)/ALIGN) + 1
C
C     precalculated twiddle factors for real Fourier transform
C
C  COMPLEX TRIGS((LGPFC+6)*NLLON_F/4 + LGPFC)
      LGPFC  = LOG2(NPFC_F)
      NTRIGS = (LGPFC+6)*NLLON_F/4 + LGPFC
      ITRIGS = ITOPOSC + 2*MXLSPEC_S
      ITRIGS = ALIGN*((ITRIGS+ALIGN-2)/ALIGN) + 1
C
C     Gaussian grid, weights, and other precomputed functions needed in 
C     Legendre transform phase of advancing solution to the shallow
C     water equation
C
C  REAL THTA(NLAT)
      ITHTA   = ITRIGS + 2*NTRIGS
      ITHTA   = ALIGN*((ITHTA+ALIGN-2)/ALIGN) + 1
C  REAL WTS(NLAT)
      IWTS    = ITHTA  + NLAT
      IWTS    = ALIGN*((IWTS+ALIGN-2)/ALIGN) + 1
C  REAL ANNP1(0:KK)
      IANNP1  = IWTS   + NLAT
      IANNP1  = ALIGN*((IANNP1+ALIGN-2)/ALIGN) + 1
C  REAL A2NNP1(0:KK)
      IA2NNP1 = IANNP1 + KK+1
      IA2NNP1 = ALIGN*((IA2NNP1+ALIGN-2)/ALIGN) + 1
C  REAL WTACSJ(NLAT)
      IWTACSJ = IA2NNP1+ KK+1
      IWTACSJ = ALIGN*((IWTACSJ+ALIGN-2)/ALIGN) + 1
C
C     extra communication buffer space, for communication libraries
C     like MPI that allow (require) the specification of user space
C     to implement buffered communication semantics
C
      NEXTPTR = IWTACSJ+NLAT
      IWS4    = ALIGN*((NEXTPTR+ALIGN-2)/ALIGN) + 1
C
C     Check that problem size fits in work heap.
      CALL GMAX0F(NEXTPTR, 1, 2, MSGBASE(), 0)
      CALL GMAX0F(SYSMSGS, 1, 2, MSGBASE(), 0)
      CALL GMAX0F(SYSVOL, 1, 2, MSGBASE(), 0)
C
      CALL SETDATA0F('INTEGER')
      CALL BCAST0F(NEXTPTR, IBYTES, MSGBASE(), 0)
      CALL BCAST0F(SYSMSGS, IBYTES, MSGBASE(), 0)
      CALL BCAST0F(SYSVOL, IBYTES, MSGBASE(), 0)
C
      IF (ME .EQ. 0) THEN
C       Inform user what the memory situation is
        WRITE(6,102) COMPSZ*RBYTES, (NEXTPTR-1)*RBYTES, 
     &               (COMPSZ-IWS4)*RBYTES
 102    FORMAT (/,' AVAILABLE WORK SPACE (IN BYTES):',I14,/,
     &            ' REQUIRED WORK SPACE            :',I14,/,
     &            ' REMAINING WORK SPACE           :',I14)
        IF ((SYSMSGS .NE. 0) .OR. (SYSVOL .NE. 0)) THEN
          WRITE(6,103) SYSMSGS, SYSVOL
 103      FORMAT (/,' -- warning warning warning -- ',/,
     &     ' SYSTEM BUFFER SPACE NEEDED TO AVOID DEADLOCK -',/,
     &     '  NUMBER OF MESSAGES :',I14,/,
     &     '  BUFFER SPACE (IN BYTES) :',I14,/
     &     ' IF SUPPORTED BY COMMUNICATION LIBRARY, WILL TRY TO',
     &     ' ALLOCATE NEEDED SPACE')
          WRITE(6,104)
 104      FORMAT(/,' (THESE ARE NECESSARY ALLOCATIONS, AND MAY NOT',
     &     '  BE SUFFICIENT',/,' IF BUFFERED SENDS DO NOT BLOCK WHEN',
     &     '  SYSTEM BUFFER SPACE IS EXHAUSTED)',/,
     &     ' -- warning warning warning -- ',/)
        ENDIF
      ENDIF
      IF (NEXTPTR .GT. COMPSZ) THEN
        IF (ME .EQ. 0) WRITE(0,101)
 101    FORMAT (/,' PSTSWM: FATAL ERROR IN ROUTINE INPUT:',/,
     &   ' FAILURE IN ALLOCATION OF WORK SPACE',/,
     &   ' PROBLEM SPECIFIED IN INPUT FILE WILL NOT FIT IN',
     &   ' AVAILABLE STORAGE')
        INPUT = -1
        RETURN
      ENDIF
C
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
C
C     Print single precision machine epsilon (machine accuracy of
C     floating point representation).
      IF (ME .EQ. 0) WRITE (6,645) EPS
  645 FORMAT (/,' MACHINE EPSILON (1.0 + EPS > 1.0) = ',1PE16.9)
C
C     Determine Gaussian latitudes and associated weights.
      CALL GLATS(NLAT, WS(ITHTA), WS(IWTS))
C
C     Calculate the associated Legendre polynomials and derivatives.
      CALL CALP(WS(ICMN), WS(IDMN), WS(IEMN), WS(IEPSIL), WS(IALP),
     &          WS(IDALP)) 
C                                                                              
C     Calculate ANNP1(0:KK), A2NNP1(0:KK), and WTACSJ(1:NLAT)
C     for later use in various tranform procedures.
C     Note: ANNP1(0) is "defined" to be 0.0 even though actually
C     undefined. This makes subsequent code cleaner.
      WS(IANNP1) = 0.0 
      WS(IA2NNP1) = 0.0                               
      DO N=1,KK
        WS(IANNP1+N)  = A/(N*(N+1))
        WS(IA2NNP1+N) = (N*(N+1))/A**2
      ENDDO
C
C     Calculate WTACSJ(NLAT)
      DO NL=1,NLAT
        RLAT = WS(ITHTA+NL-1)
        WS(IWTACSJ+NL-1) = 1.0/(A*COS(RLAT)**2)  
      ENDDO
C                                                                            
C     Call routine to factor transform length NLON and calculate TRIGS.
      CALL RFTFAX(NPFC_F, FCDEX_F, NLON, NTRIGS, WS(ITRIGS), IFAX) 
C
      INPUT = 0
      RETURN
      END






