tfill_missing.py - pism - [fork] customized build of PISM, the parallel ice sheet model (tillflux branch)
 (HTM) git clone git://src.adamsgaard.dk/pism
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) LICENSE
       ---
       tfill_missing.py (17216B)
       ---
            1 #!/usr/bin/env python3
            2 
            3 # @package fill_missing
            4 # \brief This script solves the Laplace equation as a method of filling holes in map-plane data.
            5 #
            6 # \details The script is an implementation of the SOR method with Chebyshev
            7 # acceleration for the Laplace equation, as described in 'Numerical Recipes in
            8 # Fortran: the art of scientific computing' by William H. Press et al -- 2nd
            9 # edition.
           10 #
           11 # Note also that this script can be used both from the command line and as a
           12 # Python module -- by adding 'from fill_missing import laplace' to your
           13 # program.
           14 # Uses an approximation to Laplace's equation
           15 #         \f[ \nabla^2 u = 0 \f]
           16 # to smoothly replace missing values in two-dimensional NetCDF variables with the average of the ``nearby'' non-missing values.
           17 # Here is hypothetical example, filling the missing values in the variables \c topg and \c usurf, using a convergence tolerance of \f$10^{-4}\f$ and the initial guess of \f$100\f$, on data in the NetCDF file \c data.nc :
           18 # \code
           19 # fill_missing.py -f data.nc -v topg,usurf --eps=1.0e-4 \
           20 #                 -i 100.0 -o data_smoothed.nc
           21 # \endcode
           22 # Options \c -i and \c -e specify the initial guess and the convergence tolerance for \e all the specified variables, so using these options only makes sense if all the variables have the same units.  Moreover, making a good initial guess can noticeably reduce the time needed to fill in the holes.  Generally variables should be filled one at a time.
           23 #
           24 # Each of the requested variables must have missing values specified
           25 # according to CF Metadata conventions, namely one of the following:
           26 # \c valid_range or both of \c valid_min and
           27 # \c valid_max (if the values are in a specific range); one of
           28 # \c valid_min (\c valid_max) if values are greater (less)
           29 # than some value, or \c _FillValue.  Also \c _FillValue is
           30 # interpreted as \c valid_max if it is positive, and as
           31 # \c valid_min otherwise, and the \c missing_value  attribute is deprecated
           32 # by the NetCDF User's Guide, but is supported for backward compatibility.  For more information see
           33 # <a href="http://www.unidata.ucar.edu/software/netcdf/guide_10.html#SEC76">NetCDF User's Guide: Attributes</a>.
           34 # Run \verbatim fill_missing.py --help \endverbatim for the list of available
           35 # command-line options.
           36 
           37 
           38 # CK, 08/12/2008
           39 
           40 from numpy import *
           41 
           42 # Computes \f$\rho_{Jacobi}\f$, see formula (19.5.24), page 858.
           43 
           44 
           45 def rho_jacobi(dimensions):
           46     (J, L) = dimensions
           47     return (cos(pi / J) + cos(pi / L)) / 2
           48 
           49 # This makes the stencil wrap around the grid. It is unclear if this should be
           50 #  done, but it allows using a 4-point stencil for all points, even if they
           51 #  are on the edge of the grid (otherwise we need to use three points on the
           52 #  sides and two in the corners).
           53 #
           54 #  Is and Js are arrays with row- and column-indices, M and N are the grid
           55 #  dimensions.
           56 
           57 
           58 def fix_indices(Is, Js, dimensions):
           59 
           60     (M, N) = dimensions
           61     Is[Is == M] = 0
           62     Is[Is == -1] = M - 1
           63     Js[Js == N] = 0
           64     Js[Js == -1] = N - 1
           65     return (Is, Js)
           66 
           67 # \brief laplace solves the Laplace equation
           68 # \details laplace solves the Laplace equation using the SOR method with Chebyshev
           69 #    acceleration as described in 'Numerical Recipes in Fortran: the art of
           70 #    scientific computing' by William H. Press et al -- 2nd edition, section
           71 #    19.5.
           72 #
           73 #    data is a 2-d array (computation grid)
           74 #
           75 #    mask is a boolean array; setting mask to 'data == 0', for example, results
           76 #         in only modifying points where 'data' is zero, all the other points
           77 #         are left as is. Intended use: if in an array the value of -9999.0
           78 #         signifies a missing value, then setting mask to 'data == -9999.0'
           79 #         fills in all the missing values.
           80 #
           81 #    eps1 is the first stopping criterion: the iterations stop if the norm of
           82 #         residual becomes less than eps1*initial_norm, where 'initial_norm' is
           83 #         the initial norm of residual. Setting eps1 to zero or a negative
           84 #         number disables this stopping criterion.
           85 #
           86 #    eps2 is the second stopping criterion: the iterations stop if the absolute
           87 #         value of the maximal change in value between successive iterations is
           88 #         less than eps2. Setting eps2 to zero or a negative number disables
           89 #         this stopping criterion.
           90 #
           91 #    initial_guess is the initial guess used for all the values in the domain;
           92 #         the default is 'mean', i.e. use the mean of all the present values as
           93 #         the initial guess for missing values. initial_guess has to be 'mean'
           94 #         or a number.
           95 #
           96 #    max_iter is the maximum number of iterations allowed. The default is 10000.
           97 
           98 
           99 def laplace(data, mask, eps1, eps2, initial_guess='mean', max_iter=10000):
          100 
          101     dimensions = data.shape
          102     rjac = rho_jacobi(dimensions)
          103     i, j = indices(dimensions)
          104     # This splits the grid into 'odd' and 'even' parts, according to the
          105     # checkerboard pattern:
          106     odd = (i % 2 == 1) ^ (j % 2 == 0)
          107     even = (i % 2 == 0) ^ (j % 2 == 0)
          108     # odd and even parts _in_ the domain:
          109     odd_part = list(zip(i[mask & odd], j[mask & odd]))
          110     even_part = list(zip(i[mask & even], j[mask & even]))
          111     # relative indices of the stencil points:
          112     k = array([0, 1, 0, -1])
          113     l = array([-1, 0, 1, 0])
          114     parts = [odd_part, even_part]
          115 
          116     try:
          117         initial_guess = float(initial_guess)
          118     except:
          119         if initial_guess == 'mean':
          120             present = array(ones_like(mask) - mask, dtype=bool)
          121             initial_guess = mean(data[present])
          122         else:
          123             print("""ERROR: initial_guess of '%s' is not supported (it should be a number or 'mean').
          124     Note: your data was not modified.""" % initial_guess)
          125             return
          126 
          127     data[mask] = initial_guess
          128     print("Using the initial guess of %10f." % initial_guess)
          129 
          130     # compute the initial norm of residual
          131     initial_norm = 0.0
          132     for m in [0, 1]:
          133         for i, j in parts[m]:
          134             Is, Js = fix_indices(i + k, j + l, dimensions)
          135             xi = sum(data[Is, Js]) - 4 * data[i, j]
          136             initial_norm += abs(xi)
          137     print("Initial norm of residual =", initial_norm)
          138     print("Criterion is (change < %f) OR (res norm < %f (initial norm))." % (eps2, eps1))
          139 
          140     omega = 1.0
          141     # The main loop:
          142     for n in arange(max_iter):
          143         anorm = 0.0
          144         change = 0.0
          145         for m in [0, 1]:
          146             for i, j in parts[m]:
          147                 # stencil points:
          148                 Is, Js = fix_indices(i + k, j + l, dimensions)
          149                 residual = sum(data[Is, Js]) - 4 * data[i, j]
          150                 delta = omega * 0.25 * residual
          151                 data[i, j] += delta
          152 
          153                 # record the maximal change and the residual norm:
          154                 anorm += abs(residual)
          155                 if abs(delta) > change:
          156                     change = abs(delta)
          157                 # Chebyshev acceleration (see formula 19.5.30):
          158                 if n == 1 and m == 1:
          159                     omega = 1.0 / (1.0 - 0.5 * rjac ** 2)
          160                 else:
          161                     omega = 1.0 / (1.0 - 0.25 * rjac ** 2 * omega)
          162         print("max change = %10f, residual norm = %10f" % (change, anorm))
          163         if (anorm < eps1 * initial_norm) or (change < eps2):
          164             print("Exiting with change=%f, anorm=%f after %d iteration(s)." % (change,
          165                                                                                anorm, n + 1))
          166             return
          167     print("Exceeded the maximum number of iterations.")
          168     return
          169 
          170 
          171 if __name__ == "__main__":
          172     from optparse import OptionParser
          173     from sys import argv, exit
          174     from shutil import copy, move
          175     from tempfile import mkstemp
          176     from os import close
          177     from time import time, asctime
          178     try:
          179         from netCDF4 import Dataset as NC
          180     except:
          181         print("netCDF4 is not installed!")
          182         sys.exit(1)
          183 
          184     parser = OptionParser()
          185 
          186     parser.usage = "%prog [options]"
          187     parser.description = "Fills missing values in variables selected using -v in the file given by -f."
          188     parser.add_option("-f", "--file", dest="input_filename",
          189                       help="input file")
          190     parser.add_option("-v", "--vars", dest="variables",
          191                       help="comma-separated list of variables to process")
          192     parser.add_option("-o", "--out_file", dest="output_filename",
          193                       help="output file")
          194     parser.add_option("-e", "--eps", dest="eps",
          195                       help="convergence tolerance",
          196                       default="1.0")
          197     parser.add_option("-i", "--initial_guess", dest="initial_guess",
          198                       help="initial guess to use; applies to all selected variables",
          199                       default="mean")
          200 
          201     (options, args) = parser.parse_args()
          202 
          203     if options.input_filename == "":
          204         print("""Please specify the input file name
          205 (using the -f or --file command line option).""")
          206         exit(-1)
          207     input_filename = options.input_filename
          208 
          209     if options.variables == "":
          210         print("""Please specify the list of variables to process
          211 (using the -v or --variables command line option).""")
          212         exit(-1)
          213     variables = (options.variables).split(',')
          214 
          215     if options.output_filename == "":
          216         print("""Please specify the output file name
          217 (using the -o or --out_file command line option).""")
          218         exit(-1)
          219     output_filename = options.output_filename
          220 
          221     eps = float(options.eps)
          222 
          223     # Done processing command-line options.
          224 
          225     print("Creating the temporary file...")
          226     try:
          227         (handle, tmp_filename) = mkstemp()
          228         close(handle)  # mkstemp returns a file handle (which we don't need)
          229         copy(input_filename, tmp_filename)
          230     except IOError:
          231         print("ERROR: Can't create %s, Exiting..." % tmp_filename)
          232 
          233     try:
          234         nc = NC(tmp_filename, 'a')
          235     except Exception as message:
          236         print(message)
          237         print("Note: %s was not modified." % output_filename)
          238         exit(-1)
          239 
          240     # add history global attribute (after checking if present)
          241     historysep = ' '
          242     historystr = asctime() + ': ' + historysep.join(argv) + '\n'
          243     if 'history' in nc.ncattrs():
          244         nc.history = historystr + nc.history  # prepend to history string
          245     else:
          246         nc.history = historystr
          247 
          248     t_zero = time()
          249     for name in variables:
          250         print("Processing %s..." % name)
          251         try:
          252             var = nc.variables[name]
          253 
          254             attributes = ["valid_range", "valid_min", "valid_max",
          255                           "_FillValue", "missing_value"]
          256             adict = {}
          257             print("Reading attributes...")
          258             for attribute in attributes:
          259                 print("* %15s -- " % attribute, end=' ')
          260                 if attribute in var.ncattrs():
          261                     adict[attribute] = getattr(var, attribute)
          262                     print("found")
          263                 else:
          264                     print("not found")
          265 
          266             if (var.ndim == 3):
          267                 nt = var.shape[0]
          268                 for t in range(0, nt):
          269                     print("\nInterpolating time step %i of %i\n" % (t, nt))
          270 
          271                     data = asarray(squeeze(var[t, :, :].data))
          272 
          273                     if "valid_range" in adict:
          274                         range = adict["valid_range"]
          275                         mask = ((data >= range[0]) & (data <= range[1]))
          276                         print("Using the valid_range attribute; range = ", range)
          277 
          278                     elif "valid_min" in adict and "valid_max" in adict:
          279                         valid_min = adict["valid_min"]
          280                         valid_max = adict["valid_max"]
          281                         mask = ((data < valid_min) | (data > valid_max))
          282                         print("""Using valid_min and valid_max attributes.
          283         valid_min = %10f, valid_max = %10f.""" % (valid_min, valid_max))
          284 
          285                     elif "valid_min" in adict:
          286                         valid_min = adict["valid_min"]
          287                         mask = data < valid_min
          288                         print("Using the valid_min attribute; valid_min = %10f" % valid_min)
          289 
          290                     elif "valid_max" in adict:
          291                         valid_max = adict["valid_max"]
          292                         mask = data > valid_max
          293                         print("Using the valid_max attribute; valid_max = %10f" % valid_max)
          294 
          295                     elif "_FillValue" in adict:
          296                         fill_value = adict["_FillValue"]
          297                         if fill_value <= 0:
          298                             mask = data <= fill_value + 2 * finfo(float).eps
          299                         else:
          300                             mask = data >= fill_value - 2 * finfo(float).eps
          301                         print("Using the _FillValue attribute; _FillValue = %10f" % fill_value)
          302 
          303                     elif "missing_value" in adict:
          304                         missing = adict["missing_value"]
          305                         mask = abs(data - missing) < 2 * finfo(float).eps
          306                         print("""Using the missing_value attribute; missing_value = %10f
          307         Warning: this attribute is deprecated by the NUG.""" % missing)
          308 
          309                     else:
          310                         print("No missing values found. Skipping this variable...")
          311                         continue
          312 
          313                     count = int(sum(mask))
          314                     if count == 0:
          315                         print("No missing values found. Skipping this variable...")
          316                         continue
          317                     print("Filling in %5d missing values..." % count)
          318                     t0 = time()
          319                     laplace(data, mask, -1, eps, initial_guess=options.initial_guess)
          320                     var[t, :, :] = data
          321 
          322                     # now REMOVE missing_value and _FillValue attributes
          323                     try:
          324                         delattr(var, '_FillValue')
          325                     except:
          326                         pass
          327                     try:
          328                         delattr(var, 'missing_value')
          329                     except:
          330                         pass
          331                     print("This took %5f seconds." % (time() - t0))
          332 
          333             elif (var.ndim == 2):
          334 
          335                 data = asarray(squeeze(var[:]))
          336 
          337                 if "valid_range" in adict:
          338                     range = adict["valid_range"]
          339                     mask = ((data >= range[0]) & (data <= range[1]))
          340                     print("Using the valid_range attribute; range = ", range)
          341 
          342                 elif "valid_min" in adict and "valid_max" in adict:
          343                     valid_min = adict["valid_min"]
          344                     valid_max = adict["valid_max"]
          345                     mask = ((data < valid_min) | (data > valid_max))
          346                     print("""Using valid_min and valid_max attributes.
          347     valid_min = %10f, valid_max = %10f.""" % (valid_min, valid_max))
          348 
          349                 elif "valid_min" in adict:
          350                     valid_min = adict["valid_min"]
          351                     mask = data < valid_min
          352                     print("Using the valid_min attribute; valid_min = %10f" % valid_min)
          353 
          354                 elif "valid_max" in adict:
          355                     valid_max = adict["valid_max"]
          356                     mask = data > valid_max
          357                     print("Using the valid_max attribute; valid_max = %10f" % valid_max)
          358 
          359                 elif "_FillValue" in adict:
          360                     fill_value = adict["_FillValue"]
          361                     if fill_value <= 0:
          362                         mask = data <= fill_value + 2 * finfo(float).eps
          363                     else:
          364                         mask = data >= fill_value - 2 * finfo(float).eps
          365                     print("Using the _FillValue attribute; _FillValue = %10f" % fill_value)
          366 
          367                 elif "missing_value" in adict:
          368                     missing = adict["missing_value"]
          369                     mask = abs(data - missing) < 2 * finfo(float).eps
          370                     print("""Using the missing_value attribute; missing_value = %10f
          371     Warning: this attribute is deprecated by the NUG.""" % missing)
          372 
          373                 else:
          374                     print("No missing values found. Skipping this variable...")
          375                     continue
          376 
          377                 count = int(sum(mask))
          378                 if count == 0:
          379                     print("No missing values found. Skipping this variable...")
          380                     continue
          381                 print("Filling in %5d missing values..." % count)
          382                 t0 = time()
          383                 laplace(data, mask, -1, eps, initial_guess=options.initial_guess)
          384                 var[:] = data
          385 
          386                 # now REMOVE missing_value and _FillValue attributes
          387                 try:
          388                     delattr(var, '_FillValue')
          389                 except:
          390                     pass
          391                 try:
          392                     delattr(var, 'missing_value')
          393                 except:
          394                     pass
          395                 print("This took %5f seconds." % (time() - t0))
          396             else:
          397                 print('wrong shape')
          398 
          399         except Exception as message:
          400             print("ERROR:", message)
          401             print("Note: %s was not modified." % output_filename)
          402             exit(-1)
          403 
          404     print("Processing all the variables took %5f seconds." % (time() - t_zero))
          405     nc.close()
          406     try:
          407         move(tmp_filename, output_filename)
          408     except:
          409         print("Error moving %s to %s. Exiting..." % (tmp_filename,
          410                                                      output_filename))
          411         exit(-1)