tfill_missing.py - pism - [fork] customized build of PISM, the parallel ice sheet model (tillflux branch)
(HTM) git clone git://src.adamsgaard.dk/pism
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) LICENSE
---
tfill_missing.py (17216B)
---
1 #!/usr/bin/env python3
2
3 # @package fill_missing
4 # \brief This script solves the Laplace equation as a method of filling holes in map-plane data.
5 #
6 # \details The script is an implementation of the SOR method with Chebyshev
7 # acceleration for the Laplace equation, as described in 'Numerical Recipes in
8 # Fortran: the art of scientific computing' by William H. Press et al -- 2nd
9 # edition.
10 #
11 # Note also that this script can be used both from the command line and as a
12 # Python module -- by adding 'from fill_missing import laplace' to your
13 # program.
14 # Uses an approximation to Laplace's equation
15 # \f[ \nabla^2 u = 0 \f]
16 # to smoothly replace missing values in two-dimensional NetCDF variables with the average of the ``nearby'' non-missing values.
17 # Here is hypothetical example, filling the missing values in the variables \c topg and \c usurf, using a convergence tolerance of \f$10^{-4}\f$ and the initial guess of \f$100\f$, on data in the NetCDF file \c data.nc :
18 # \code
19 # fill_missing.py -f data.nc -v topg,usurf --eps=1.0e-4 \
20 # -i 100.0 -o data_smoothed.nc
21 # \endcode
22 # Options \c -i and \c -e specify the initial guess and the convergence tolerance for \e all the specified variables, so using these options only makes sense if all the variables have the same units. Moreover, making a good initial guess can noticeably reduce the time needed to fill in the holes. Generally variables should be filled one at a time.
23 #
24 # Each of the requested variables must have missing values specified
25 # according to CF Metadata conventions, namely one of the following:
26 # \c valid_range or both of \c valid_min and
27 # \c valid_max (if the values are in a specific range); one of
28 # \c valid_min (\c valid_max) if values are greater (less)
29 # than some value, or \c _FillValue. Also \c _FillValue is
30 # interpreted as \c valid_max if it is positive, and as
31 # \c valid_min otherwise, and the \c missing_value attribute is deprecated
32 # by the NetCDF User's Guide, but is supported for backward compatibility. For more information see
33 # <a href="http://www.unidata.ucar.edu/software/netcdf/guide_10.html#SEC76">NetCDF User's Guide: Attributes</a>.
34 # Run \verbatim fill_missing.py --help \endverbatim for the list of available
35 # command-line options.
36
37
38 # CK, 08/12/2008
39
40 from numpy import *
41
42 # Computes \f$\rho_{Jacobi}\f$, see formula (19.5.24), page 858.
43
44
45 def rho_jacobi(dimensions):
46 (J, L) = dimensions
47 return (cos(pi / J) + cos(pi / L)) / 2
48
49 # This makes the stencil wrap around the grid. It is unclear if this should be
50 # done, but it allows using a 4-point stencil for all points, even if they
51 # are on the edge of the grid (otherwise we need to use three points on the
52 # sides and two in the corners).
53 #
54 # Is and Js are arrays with row- and column-indices, M and N are the grid
55 # dimensions.
56
57
58 def fix_indices(Is, Js, dimensions):
59
60 (M, N) = dimensions
61 Is[Is == M] = 0
62 Is[Is == -1] = M - 1
63 Js[Js == N] = 0
64 Js[Js == -1] = N - 1
65 return (Is, Js)
66
67 # \brief laplace solves the Laplace equation
68 # \details laplace solves the Laplace equation using the SOR method with Chebyshev
69 # acceleration as described in 'Numerical Recipes in Fortran: the art of
70 # scientific computing' by William H. Press et al -- 2nd edition, section
71 # 19.5.
72 #
73 # data is a 2-d array (computation grid)
74 #
75 # mask is a boolean array; setting mask to 'data == 0', for example, results
76 # in only modifying points where 'data' is zero, all the other points
77 # are left as is. Intended use: if in an array the value of -9999.0
78 # signifies a missing value, then setting mask to 'data == -9999.0'
79 # fills in all the missing values.
80 #
81 # eps1 is the first stopping criterion: the iterations stop if the norm of
82 # residual becomes less than eps1*initial_norm, where 'initial_norm' is
83 # the initial norm of residual. Setting eps1 to zero or a negative
84 # number disables this stopping criterion.
85 #
86 # eps2 is the second stopping criterion: the iterations stop if the absolute
87 # value of the maximal change in value between successive iterations is
88 # less than eps2. Setting eps2 to zero or a negative number disables
89 # this stopping criterion.
90 #
91 # initial_guess is the initial guess used for all the values in the domain;
92 # the default is 'mean', i.e. use the mean of all the present values as
93 # the initial guess for missing values. initial_guess has to be 'mean'
94 # or a number.
95 #
96 # max_iter is the maximum number of iterations allowed. The default is 10000.
97
98
99 def laplace(data, mask, eps1, eps2, initial_guess='mean', max_iter=10000):
100
101 dimensions = data.shape
102 rjac = rho_jacobi(dimensions)
103 i, j = indices(dimensions)
104 # This splits the grid into 'odd' and 'even' parts, according to the
105 # checkerboard pattern:
106 odd = (i % 2 == 1) ^ (j % 2 == 0)
107 even = (i % 2 == 0) ^ (j % 2 == 0)
108 # odd and even parts _in_ the domain:
109 odd_part = list(zip(i[mask & odd], j[mask & odd]))
110 even_part = list(zip(i[mask & even], j[mask & even]))
111 # relative indices of the stencil points:
112 k = array([0, 1, 0, -1])
113 l = array([-1, 0, 1, 0])
114 parts = [odd_part, even_part]
115
116 try:
117 initial_guess = float(initial_guess)
118 except:
119 if initial_guess == 'mean':
120 present = array(ones_like(mask) - mask, dtype=bool)
121 initial_guess = mean(data[present])
122 else:
123 print("""ERROR: initial_guess of '%s' is not supported (it should be a number or 'mean').
124 Note: your data was not modified.""" % initial_guess)
125 return
126
127 data[mask] = initial_guess
128 print("Using the initial guess of %10f." % initial_guess)
129
130 # compute the initial norm of residual
131 initial_norm = 0.0
132 for m in [0, 1]:
133 for i, j in parts[m]:
134 Is, Js = fix_indices(i + k, j + l, dimensions)
135 xi = sum(data[Is, Js]) - 4 * data[i, j]
136 initial_norm += abs(xi)
137 print("Initial norm of residual =", initial_norm)
138 print("Criterion is (change < %f) OR (res norm < %f (initial norm))." % (eps2, eps1))
139
140 omega = 1.0
141 # The main loop:
142 for n in arange(max_iter):
143 anorm = 0.0
144 change = 0.0
145 for m in [0, 1]:
146 for i, j in parts[m]:
147 # stencil points:
148 Is, Js = fix_indices(i + k, j + l, dimensions)
149 residual = sum(data[Is, Js]) - 4 * data[i, j]
150 delta = omega * 0.25 * residual
151 data[i, j] += delta
152
153 # record the maximal change and the residual norm:
154 anorm += abs(residual)
155 if abs(delta) > change:
156 change = abs(delta)
157 # Chebyshev acceleration (see formula 19.5.30):
158 if n == 1 and m == 1:
159 omega = 1.0 / (1.0 - 0.5 * rjac ** 2)
160 else:
161 omega = 1.0 / (1.0 - 0.25 * rjac ** 2 * omega)
162 print("max change = %10f, residual norm = %10f" % (change, anorm))
163 if (anorm < eps1 * initial_norm) or (change < eps2):
164 print("Exiting with change=%f, anorm=%f after %d iteration(s)." % (change,
165 anorm, n + 1))
166 return
167 print("Exceeded the maximum number of iterations.")
168 return
169
170
171 if __name__ == "__main__":
172 from optparse import OptionParser
173 from sys import argv, exit
174 from shutil import copy, move
175 from tempfile import mkstemp
176 from os import close
177 from time import time, asctime
178 try:
179 from netCDF4 import Dataset as NC
180 except:
181 print("netCDF4 is not installed!")
182 sys.exit(1)
183
184 parser = OptionParser()
185
186 parser.usage = "%prog [options]"
187 parser.description = "Fills missing values in variables selected using -v in the file given by -f."
188 parser.add_option("-f", "--file", dest="input_filename",
189 help="input file")
190 parser.add_option("-v", "--vars", dest="variables",
191 help="comma-separated list of variables to process")
192 parser.add_option("-o", "--out_file", dest="output_filename",
193 help="output file")
194 parser.add_option("-e", "--eps", dest="eps",
195 help="convergence tolerance",
196 default="1.0")
197 parser.add_option("-i", "--initial_guess", dest="initial_guess",
198 help="initial guess to use; applies to all selected variables",
199 default="mean")
200
201 (options, args) = parser.parse_args()
202
203 if options.input_filename == "":
204 print("""Please specify the input file name
205 (using the -f or --file command line option).""")
206 exit(-1)
207 input_filename = options.input_filename
208
209 if options.variables == "":
210 print("""Please specify the list of variables to process
211 (using the -v or --variables command line option).""")
212 exit(-1)
213 variables = (options.variables).split(',')
214
215 if options.output_filename == "":
216 print("""Please specify the output file name
217 (using the -o or --out_file command line option).""")
218 exit(-1)
219 output_filename = options.output_filename
220
221 eps = float(options.eps)
222
223 # Done processing command-line options.
224
225 print("Creating the temporary file...")
226 try:
227 (handle, tmp_filename) = mkstemp()
228 close(handle) # mkstemp returns a file handle (which we don't need)
229 copy(input_filename, tmp_filename)
230 except IOError:
231 print("ERROR: Can't create %s, Exiting..." % tmp_filename)
232
233 try:
234 nc = NC(tmp_filename, 'a')
235 except Exception as message:
236 print(message)
237 print("Note: %s was not modified." % output_filename)
238 exit(-1)
239
240 # add history global attribute (after checking if present)
241 historysep = ' '
242 historystr = asctime() + ': ' + historysep.join(argv) + '\n'
243 if 'history' in nc.ncattrs():
244 nc.history = historystr + nc.history # prepend to history string
245 else:
246 nc.history = historystr
247
248 t_zero = time()
249 for name in variables:
250 print("Processing %s..." % name)
251 try:
252 var = nc.variables[name]
253
254 attributes = ["valid_range", "valid_min", "valid_max",
255 "_FillValue", "missing_value"]
256 adict = {}
257 print("Reading attributes...")
258 for attribute in attributes:
259 print("* %15s -- " % attribute, end=' ')
260 if attribute in var.ncattrs():
261 adict[attribute] = getattr(var, attribute)
262 print("found")
263 else:
264 print("not found")
265
266 if (var.ndim == 3):
267 nt = var.shape[0]
268 for t in range(0, nt):
269 print("\nInterpolating time step %i of %i\n" % (t, nt))
270
271 data = asarray(squeeze(var[t, :, :].data))
272
273 if "valid_range" in adict:
274 range = adict["valid_range"]
275 mask = ((data >= range[0]) & (data <= range[1]))
276 print("Using the valid_range attribute; range = ", range)
277
278 elif "valid_min" in adict and "valid_max" in adict:
279 valid_min = adict["valid_min"]
280 valid_max = adict["valid_max"]
281 mask = ((data < valid_min) | (data > valid_max))
282 print("""Using valid_min and valid_max attributes.
283 valid_min = %10f, valid_max = %10f.""" % (valid_min, valid_max))
284
285 elif "valid_min" in adict:
286 valid_min = adict["valid_min"]
287 mask = data < valid_min
288 print("Using the valid_min attribute; valid_min = %10f" % valid_min)
289
290 elif "valid_max" in adict:
291 valid_max = adict["valid_max"]
292 mask = data > valid_max
293 print("Using the valid_max attribute; valid_max = %10f" % valid_max)
294
295 elif "_FillValue" in adict:
296 fill_value = adict["_FillValue"]
297 if fill_value <= 0:
298 mask = data <= fill_value + 2 * finfo(float).eps
299 else:
300 mask = data >= fill_value - 2 * finfo(float).eps
301 print("Using the _FillValue attribute; _FillValue = %10f" % fill_value)
302
303 elif "missing_value" in adict:
304 missing = adict["missing_value"]
305 mask = abs(data - missing) < 2 * finfo(float).eps
306 print("""Using the missing_value attribute; missing_value = %10f
307 Warning: this attribute is deprecated by the NUG.""" % missing)
308
309 else:
310 print("No missing values found. Skipping this variable...")
311 continue
312
313 count = int(sum(mask))
314 if count == 0:
315 print("No missing values found. Skipping this variable...")
316 continue
317 print("Filling in %5d missing values..." % count)
318 t0 = time()
319 laplace(data, mask, -1, eps, initial_guess=options.initial_guess)
320 var[t, :, :] = data
321
322 # now REMOVE missing_value and _FillValue attributes
323 try:
324 delattr(var, '_FillValue')
325 except:
326 pass
327 try:
328 delattr(var, 'missing_value')
329 except:
330 pass
331 print("This took %5f seconds." % (time() - t0))
332
333 elif (var.ndim == 2):
334
335 data = asarray(squeeze(var[:]))
336
337 if "valid_range" in adict:
338 range = adict["valid_range"]
339 mask = ((data >= range[0]) & (data <= range[1]))
340 print("Using the valid_range attribute; range = ", range)
341
342 elif "valid_min" in adict and "valid_max" in adict:
343 valid_min = adict["valid_min"]
344 valid_max = adict["valid_max"]
345 mask = ((data < valid_min) | (data > valid_max))
346 print("""Using valid_min and valid_max attributes.
347 valid_min = %10f, valid_max = %10f.""" % (valid_min, valid_max))
348
349 elif "valid_min" in adict:
350 valid_min = adict["valid_min"]
351 mask = data < valid_min
352 print("Using the valid_min attribute; valid_min = %10f" % valid_min)
353
354 elif "valid_max" in adict:
355 valid_max = adict["valid_max"]
356 mask = data > valid_max
357 print("Using the valid_max attribute; valid_max = %10f" % valid_max)
358
359 elif "_FillValue" in adict:
360 fill_value = adict["_FillValue"]
361 if fill_value <= 0:
362 mask = data <= fill_value + 2 * finfo(float).eps
363 else:
364 mask = data >= fill_value - 2 * finfo(float).eps
365 print("Using the _FillValue attribute; _FillValue = %10f" % fill_value)
366
367 elif "missing_value" in adict:
368 missing = adict["missing_value"]
369 mask = abs(data - missing) < 2 * finfo(float).eps
370 print("""Using the missing_value attribute; missing_value = %10f
371 Warning: this attribute is deprecated by the NUG.""" % missing)
372
373 else:
374 print("No missing values found. Skipping this variable...")
375 continue
376
377 count = int(sum(mask))
378 if count == 0:
379 print("No missing values found. Skipping this variable...")
380 continue
381 print("Filling in %5d missing values..." % count)
382 t0 = time()
383 laplace(data, mask, -1, eps, initial_guess=options.initial_guess)
384 var[:] = data
385
386 # now REMOVE missing_value and _FillValue attributes
387 try:
388 delattr(var, '_FillValue')
389 except:
390 pass
391 try:
392 delattr(var, 'missing_value')
393 except:
394 pass
395 print("This took %5f seconds." % (time() - t0))
396 else:
397 print('wrong shape')
398
399 except Exception as message:
400 print("ERROR:", message)
401 print("Note: %s was not modified." % output_filename)
402 exit(-1)
403
404 print("Processing all the variables took %5f seconds." % (time() - t_zero))
405 nc.close()
406 try:
407 move(tmp_filename, output_filename)
408 except:
409 print("Error moving %s to %s. Exiting..." % (tmp_filename,
410 output_filename))
411 exit(-1)