tMerge pull request #8 from anders-dc/darcy - sphere - GPU-based 3D discrete element method algorithm with optional fluid coupling
 (HTM) git clone git://src.adamsgaard.dk/sphere
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) LICENSE
       ---
 (DIR) commit 2dc04091730193f7b8df4263b0a00ea9f744c9c7
 (DIR) parent e6181dcce46e3632a3f4a18ab0aa4c5db4a008a1
 (HTM) Author: Anders Damsgaard <anders-dc@users.noreply.github.com>
       Date:   Wed,  5 Nov 2014 15:25:07 +0100
       
       Merge pull request #8 from anders-dc/darcy
       
       Darcy solver implemented
       Diffstat:
         M CMakeLists.txt                      |       4 +---
         A output/cube-init.output00029.bin    |       0 
         A output/cube-init.status.dat         |       1 +
         M python/capillary-cohesion.py        |       2 +-
         M python/collapse.py                  |       2 +-
         M python/cube-init.py                 |       2 +-
         A python/halfshear-darcy-starter.py   |      69 ++++++++++++++++++++++++++++++
         M python/segregation.py               |       2 +-
         M python/shear-results-strain.py      |       4 ++--
         M python/shear-results.py             |      10 +++++-----
         M python/shear-test.py                |       2 +-
         M python/shear2-init.py               |       2 +-
         M python/sphere.py                    |     806 +++++++++++++++++++++----------
         M src/CMakeLists.txt                  |       6 +++---
         M src/constants.h                     |       2 +-
         A src/darcy.cpp                       |     311 +++++++++++++++++++++++++++++++
         A src/darcy.cuh                       |    1025 +++++++++++++++++++++++++++++++
         M src/datatypes.h                     |      31 +++++++++++++++++++++++++++++--
         M src/debug.h                         |       8 ++++----
         M src/device.cu                       |    1588 +++++++++++++++++++------------
         M src/file_io.cpp                     |     337 +++++++++++++++++++++----------
         M src/navierstokes.cpp                |       4 ++--
         M src/navierstokes.cuh                |     116 +++++++++++++++++--------------
         M src/sphere.cpp                      |      69 +++++++++++++++++++++++++++----
         M src/sphere.h                        |      49 +++++++++++++++++++++++++++++--
         M src/utility.cu                      |       6 +++++-
         M tests/CMakeLists.txt                |      12 ++++++++++++
         A tests/cfd_tests_darcy.py            |     151 +++++++++++++++++++++++++++++++
         A tests/cfd_tests_darcy_particles.py  |     165 +++++++++++++++++++++++++++++++
         A tests/cfd_tests_neumann_darcy.py    |      41 +++++++++++++++++++++++++++++++
         A tests/fluid_particle_interaction_d… |      31 +++++++++++++++++++++++++++++++
         A tests/highlighttext.py              |      51 +++++++++++++++++++++++++++++++
         M tests/io_tests_fluid.py             |      54 +++++++++++++++++++++++++++++--
         M tests/pytestutils.py                |       5 +++--
       
       34 files changed, 3926 insertions(+), 1042 deletions(-)
       ---
 (DIR) diff --git a/CMakeLists.txt b/CMakeLists.txt
       t@@ -30,9 +30,7 @@ enable_testing()
        
        # Set build type = Debug
        #set(CMAKE_BUILD_TYPE Debug)
       -#if (CUDA_FOUND)
       -#    set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-g -G)
       -#endif()
       +#set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-g -G)
        
        # Set build type = Release
        set(CMAKE_BUILD_TYPE Release)
 (DIR) diff --git a/output/cube-init.output00029.bin b/output/cube-init.output00029.bin
       Binary files differ.
 (DIR) diff --git a/output/cube-init.status.dat b/output/cube-init.status.dat
       t@@ -0,0 +1 @@
       +5.8000e+00 9.6667e+01 29
 (DIR) diff --git a/python/capillary-cohesion.py b/python/capillary-cohesion.py
       t@@ -27,7 +27,7 @@ sim.mu_s[0] = 0.0
        sim.mu_d[0] = 0.0
        sim.k_n[0] = 1.0e7
        sim.k_t[0] = 1.0e7
       -sim.generateRadii(psd='uni', radius_mean=1.0e-3, radius_variance=1.0e-4)
       +sim.generateRadii(psd='uni', mean=1.0e-3, variance=1.0e-4)
        sim.contactModel(1)
        sim.initRandomGridPos([12, 12, 10000])
        sim.initTemporal(5.0, file_dt=0.01, epsilon=0.07)
 (DIR) diff --git a/python/collapse.py b/python/collapse.py
       t@@ -21,7 +21,7 @@ sim_id = 'collapse'
        init = sphere.sim(np = np, nd = 3, nw = 0, sid = sim_id + '-init')
        
        # Set radii
       -init.generateRadii(radius_mean = 0.1)
       +init.generateRadii(mean = 0.1)
        
        # Use default params
        init.defaultParams(
 (DIR) diff --git a/python/cube-init.py b/python/cube-init.py
       t@@ -3,7 +3,7 @@ import sphere
        
        init = sphere.sim('cube-init', np=1e2)
        
       -init.generateRadii(psd='uni', radius_mean=0.01, radius_variance=0.002)
       +init.generateRadii(psd='uni', mean=0.01, variance=0.002)
        
        init.periodicBoundariesXY()
        
 (DIR) diff --git a/python/halfshear-darcy-starter.py b/python/halfshear-darcy-starter.py
       t@@ -0,0 +1,69 @@
       +#!/usr/bin/env python
       +import sphere
       +import numpy
       +import sys
       +
       +# launch with:
       +# $ ipython halfshear-darcy-starter.py <device> <fluid> <c_phi> <k_c> <sigma_0> <mu>
       +
       +device = int(sys.argv[1])
       +wet = int(sys.argv[2])
       +c_phi = float(sys.argv[3])
       +k_c = float(sys.argv[4])
       +sigma0 = float(sys.argv[5])
       +mu = float(sys.argv[6])
       +
       +if wet == 1:
       +    fluid = True
       +else:
       +    fluid = False
       +    
       +sim = sphere.sim('halfshear-sigma0=' + str(sigma0), fluid=False)
       +print('Input: ' + sim.sid)
       +sim.readlast()
       +
       +sim.fluid = fluid
       +if fluid:
       +    sim.id('halfshear-darcy-sigma0=' + str(sigma0) + '-k_c=' + str(k_c) + \
       +            '-mu=' + str(mu) + '-shear')
       +else:
       +    sim.id('halfshear-sigma0=' + str(sigma0) + '-shear')
       +
       +sim.checkerboardColors(nx=6,ny=3,nz=6)
       +sim.cleanup()
       +sim.adjustUpperWall()
       +sim.zeroKinematics()
       +
       +sim.shear(1.0/20.0)
       +
       +if fluid:
       +    #sim.num[2] *= 2
       +    sim.num[:] /= 2
       +    #sim.L[2] *= 2.0
       +    #sim.initFluid(mu = 1.787e-6, p = 600.0e3, cfd_solver = 1)
       +    sim.initFluid(mu = 1.787e-6, p = 0.0, cfd_solver = 1)
       +    sim.setFluidBottomNoFlow()
       +    sim.setFluidTopFixedPressure()
       +    #sim.setDEMstepsPerCFDstep(100)
       +    sim.setMaxIterations(2e5)
       +    sim.beta_f[0] = mu
       +    sim.k_c[0] = k_c
       +
       +sim.initTemporal(total = 20.0, file_dt = 0.01, epsilon=0.07)
       +sim.w_devs[0] = sigma0
       +sim.w_m[0] = numpy.abs(sigma0*sim.L[0]*sim.L[1]/sim.g[2])
       +sim.mu_s[0] = 0.5
       +sim.mu_d[0] = 0.5
       +sim.setDampingNormal(0.0)
       +sim.setDampingTangential(0.0)
       +
       +# Fix lowermost particles
       +#dz = sim.L[2]/sim.num[2]
       +#I = numpy.nonzero(sim.x[:,2] < 1.5*dz)
       +#sim.fixvel[I] = 1
       +
       +sim.run(dry=True)
       +sim.run(device=device)
       +#sim.writeVTKall()
       +#sim.visualize('walls')
       +#sim.visualize('fluid-pressure')
 (DIR) diff --git a/python/segregation.py b/python/segregation.py
       t@@ -27,7 +27,7 @@ devslist = [120e3]
        init = sphere.sim(np = np, nd = 3, nw = 0, sid = sim_id + '-init')
        
        # Save radii
       -init.generateRadii(radius_mean = 0.08)
       +init.generateRadii(mean = 0.08)
        
        # Use default params
        init.defaultParams(gamma_n = 100.0, mu_s = 0.4, mu_d = 0.4)
 (DIR) diff --git a/python/shear-results-strain.py b/python/shear-results-strain.py
       t@@ -14,8 +14,8 @@ import matplotlib.pyplot as plt
        from matplotlib.ticker import MaxNLocator
        
        sigma0 = 20000.0
       -cvals = ['dry', 1.0, 0.1, 0.01]
       -#cvals = ['dry', 1.0, 0.1]
       +#cvals = ['dry', 1.0, 0.1, 0.01]
       +cvals = ['dry', 1.0, 0.1]
        #cvals = ['dry', 1.0]
        #step = 1999
        
 (DIR) diff --git a/python/shear-results.py b/python/shear-results.py
       t@@ -21,8 +21,8 @@ zflow = False
        #sigma0_list = numpy.array([1.0e3, 2.0e3, 4.0e3, 10.0e3, 20.0e3, 40.0e3])
        #sigma0 = 10.0e3
        sigma0 = float(sys.argv[1])
       -#cvals = [1.0, 0.1]
       -cvals = [1.0, 0.1, 0.01]
       +cvals = [1.0, 0.1]
       +#cvals = [1.0, 0.1, 0.01]
        #cvals = [1.0]
        
        # return a smoothed version of in. The returned array is smaller than the
       t@@ -139,9 +139,9 @@ for c in numpy.arange(1,len(cvals)+1):
            #sid = 'shear-sigma0=' + str(sigma0) + '-c_phi=' + \
                            #str(c_phi) + '-c_v=' + str(c_v) + \
                            #'-hi_mu-lo_visc-hw'
       -    #sid = 'halfshear-sigma0=' + str(sigma0) + '-c=' + str(c_v) + '-shear'
       -    sid = 'halfshear-sigma0=' + str(sigma0) + '-c_v=' + str(c_v) +\
       -            '-c_a=0.0-velfac=1.0-shear'
       +    sid = 'halfshear-sigma0=' + str(sigma0) + '-c=' + str(c_v) + '-shear'
       +    #sid = 'halfshear-sigma0=' + str(sigma0) + '-c_v=' + str(c_v) +\
       +            #'-c_a=0.0-velfac=1.0-shear'
            if os.path.isfile('../output/' + sid + '.status.dat'):
        
                sim = sphere.sim(sid, fluid=fluid)
 (DIR) diff --git a/python/shear-test.py b/python/shear-test.py
       t@@ -26,7 +26,7 @@ devslist = [80e3, 10e3, 20e3, 40e3, 60e3, 120e3]
        init = Spherebin(np = np, nd = 3, nw = 0, sid = sim_id + "-init")
        
        # Save radii
       -init.generateRadii(radius_mean = 0.02)
       +init.generateRadii(mean = 0.02)
        
        # Use default params
        init.defaultParams(gamma_n = 100.0, mu_s = 0.6, mu_d = 0.6)
 (DIR) diff --git a/python/shear2-init.py b/python/shear2-init.py
       t@@ -2,7 +2,7 @@
        import sphere
        
        sim = sphere.sim('init2', np=10000)
       -sim.generateRadii(psd='uni', radius_mean=0.02, radius_variance=0.01)
       +sim.generateRadii(psd='uni', mean=0.02, variance=0.01)
        sim.initRandomGridPos([12, 12, 1000])
        sim.initTemporal(10.0, file_dt=0.05, epsilon=0.07)
        sim.gamma_n[0] = 1000.0
 (DIR) diff --git a/python/sphere.py b/python/sphere.py
       t@@ -19,7 +19,7 @@ numpy.seterr(all='warn', over='raise')
        
        # Sphere version number. This field should correspond to the value in
        # `../src/constants.h`.
       -VERSION=1.07
       +VERSION=2.0
        
        class sim:
            '''
       t@@ -40,9 +40,18 @@ class sim:
            :type sid: str
            :param fluid: Setup fluid simulation (default = False)
            :type fluid: bool
       +    :param cfd_solver: Fluid solver to use if fluid == True. 0: Navier-Stokes
       +        (default), 1: Darcy.
       +    :type cfd_solver: int
            '''
        
       -    def __init__(self, sid = 'unnamed', np = 0, nd = 3, nw = 0, fluid = False):
       +    def __init__(self,
       +            sid = 'unnamed',
       +            np = 0,
       +            nd = 3,
       +            nw = 0,
       +            fluid = False,
       +            cfd_solver = 0):
        
                # Sphere version number
                self.version = numpy.ones(1, dtype=numpy.float64)*VERSION
       t@@ -272,6 +281,11 @@ class sim:
        
                if self.fluid:
        
       +            # Fluid solver type
       +            # 0: Navier Stokes (fluid with inertia)
       +            # 1: Stokes-Darcy (fluid without inertia)
       +            self.cfd_solver = numpy.zeros(1, dtype=numpy.int32)
       +
                    # Fluid dynamic viscosity [N/(m/s)]
                    self.mu = numpy.zeros(1, dtype=numpy.float64)
        
       t@@ -311,40 +325,73 @@ class sim:
        
                    ## Solver parameters
        
       -            # Smoothing parameter, should be in the range [0.0;1.0[.
       -            # 0.0 = no smoothing.
       -            self.gamma = numpy.array(0.0)
       +            # Navier-Stokes
       +            if self.cfd_solver[0] == 0:
        
       -            # Under-relaxation parameter, should be in the range ]0.0;1.0].
       -            # 1.0 = no under-relaxation
       -            self.theta = numpy.array(1.0)
       +                # Smoothing parameter, should be in the range [0.0;1.0[.
       +                # 0.0 = no smoothing.
       +                self.gamma = numpy.array(0.0)
        
       -            # Velocity projection parameter, should be in the range [0.0;1.0]
       -            self.beta = numpy.array(0.0)
       +                # Under-relaxation parameter, should be in the range ]0.0;1.0].
       +                # 1.0 = no under-relaxation
       +                self.theta = numpy.array(1.0)
        
       -            # Tolerance criteria for the normalized max. residual
       -            self.tolerance = numpy.array(1.0e-8)
       +                # Velocity projection parameter, should be in the range
       +                # [0.0;1.0]
       +                self.beta = numpy.array(0.0)
        
       -            # The maximum number of iterations to perform per time step
       -            self.maxiter = numpy.array(1e4)
       +                # Tolerance criteria for the normalized max. residual
       +                self.tolerance = numpy.array(1.0e-3)
        
       -            # The number of DEM time steps to perform between CFD updates
       -            self.ndem = numpy.array(1)
       +                # The maximum number of iterations to perform per time step
       +                self.maxiter = numpy.array(1e4)
        
       -            # Porosity scaling factor
       -            self.c_phi = numpy.ones(1, dtype=numpy.float64)
       +                # The number of DEM time steps to perform between CFD updates
       +                self.ndem = numpy.array(1)
        
       -            # Fluid velocity scaling factor
       -            self.c_v = numpy.ones(1, dtype=numpy.float64)
       +                # Porosity scaling factor
       +                self.c_phi = numpy.ones(1, dtype=numpy.float64)
        
       -            # DEM-CFD time scaling factor
       -            self.dt_dem_fac = numpy.ones(1, dtype=numpy.float64)
       +                # Fluid velocity scaling factor
       +                self.c_v = numpy.ones(1, dtype=numpy.float64)
        
       -            ## Interaction forces
       -            self.f_d = numpy.zeros((self.np, self.nd), dtype=numpy.float64)
       -            self.f_p = numpy.zeros((self.np, self.nd), dtype=numpy.float64)
       -            self.f_v = numpy.zeros((self.np, self.nd), dtype=numpy.float64)
       -            self.f_sum = numpy.zeros((self.np, self.nd), dtype=numpy.float64)
       +                # DEM-CFD time scaling factor
       +                self.dt_dem_fac = numpy.ones(1, dtype=numpy.float64)
       +
       +                ## Interaction forces
       +                self.f_d = numpy.zeros((self.np, self.nd), dtype=numpy.float64)
       +                self.f_p = numpy.zeros((self.np, self.nd), dtype=numpy.float64)
       +                self.f_v = numpy.zeros((self.np, self.nd), dtype=numpy.float64)
       +                self.f_sum = numpy.zeros((self.np, self.nd), dtype=numpy.float64)
       +
       +            # Darcy
       +            elif self.cfd_solver[0] == 1:
       +
       +                # Tolerance criteria for the normalized max. residual
       +                self.tolerance = numpy.array(1.0e-3)
       +
       +                # The maximum number of iterations to perform per time step
       +                self.maxiter = numpy.array(1e4)
       +
       +                # The number of DEM time steps to perform between CFD updates
       +                self.ndem = numpy.array(1)
       +
       +                # Porosity scaling factor
       +                self.c_phi = numpy.ones(1, dtype=numpy.float64)
       +
       +                # Interaction forces
       +                self.f_p = numpy.zeros((self.np, self.nd), dtype=numpy.float64)
       +
       +                # Adiabatic fluid compressibility [1/Pa].
       +                # Fluid bulk modulus = 1/self.beta_f
       +                self.beta_f = numpy.ones(1, dtype=numpy.float64)*4.5e-10
       +
       +                # Hydraulic permeability prefactor [m*m]
       +                self.k_c = numpy.ones(1, dtype=numpy.float64)*4.6e-10
       +
       +            else:
       +                raise Exception('Value of cfd_solver not understood (' + \
       +                        str(self.cfd_solver[0]) + ')')
        
                # Particle color marker
                self.color = numpy.zeros(self.np, dtype=numpy.int32)
       t@@ -546,7 +593,10 @@ class sim:
                    return 64
        
                if self.fluid:
       -            if (self.mu != other.mu):
       +            if (self.cfd_solver != other.cfd_solver):
       +                print(91)
       +                return 91
       +            elif (self.mu != other.mu):
                        print(65)
                        return 65
                    elif ((self.v_f != other.v_f).any()):
       t@@ -585,44 +635,69 @@ class sim:
                    elif (self.free_slip_top != other.free_slip_top):
                        print(77)
                        return 77
       -            elif (self.gamma != other.gamma):
       -                print(78)
       -                return 78
       -            elif (self.theta != other.theta):
       -                print(79)
       -                return 79
       -            elif (self.beta != other.beta):
       -                print(80)
       -                return 80
       -            elif (self.tolerance != other.tolerance):
       -                print(81)
       -                return 81
       -            elif (self.maxiter != other.maxiter):
       -                print(82)
       -                return 82
       -            elif (self.ndem != other.ndem):
       -                print(83)
       -                return 83
       -            elif (self.c_phi != other.c_phi):
       -                print(84)
       -                return(84)
       -            elif (self.c_v != other.c_v):
       -                print(85)
       -            elif (self.dt_dem_fac != other.dt_dem_fac):
       -                print(85)
       -                return(85)
       -            elif (self.f_d != other.f_d).any():
       -                print(86)
       -                return(86)
       -            elif (self.f_p != other.f_p).any():
       -                print(87)
       -                return(87)
       -            elif (self.f_v != other.f_v).any():
       -                print(88)
       -                return(88)
       -            elif (self.f_sum != other.f_sum).any():
       -                print(89)
       -                return(89)
       +
       +            if (self.cfd_solver == 0):
       +                if (self.gamma != other.gamma):
       +                    print(78)
       +                    return 78
       +                elif (self.theta != other.theta):
       +                    print(79)
       +                    return 79
       +                elif (self.beta != other.beta):
       +                    print(80)
       +                    return 80
       +                elif (self.tolerance != other.tolerance):
       +                    print(81)
       +                    return 81
       +                elif (self.maxiter != other.maxiter):
       +                    print(82)
       +                    return 82
       +                elif (self.ndem != other.ndem):
       +                    print(83)
       +                    return 83
       +                elif (self.c_phi != other.c_phi):
       +                    print(84)
       +                    return(84)
       +                elif (self.c_v != other.c_v):
       +                    print(85)
       +                elif (self.dt_dem_fac != other.dt_dem_fac):
       +                    print(85)
       +                    return(85)
       +                elif (self.f_d != other.f_d).any():
       +                    print(86)
       +                    return(86)
       +                elif (self.f_p != other.f_p).any():
       +                    print(87)
       +                    return(87)
       +                elif (self.f_v != other.f_v).any():
       +                    print(88)
       +                    return(88)
       +                elif (self.f_sum != other.f_sum).any():
       +                    print(89)
       +                    return(89)
       +
       +            if (self.cfd_solver == 1):
       +                if (self.tolerance != other.tolerance):
       +                    print(81)
       +                    return 81
       +                elif (self.maxiter != other.maxiter):
       +                    print(82)
       +                    return 82
       +                elif (self.ndem != other.ndem):
       +                    print(83)
       +                    return 83
       +                elif (self.c_phi != other.c_phi):
       +                    print(84)
       +                    return(84)
       +                elif (self.f_p != other.f_p).any():
       +                    print(86)
       +                    return(86)
       +                elif (self.beta_f != other.beta_f):
       +                    print(87)
       +                    return(87)
       +                elif (self.k_c != other.k_c):
       +                    print(88)
       +                    return(88)
        
                if ((self.color != other.color)).any():
                    print(90)
       t@@ -631,15 +706,31 @@ class sim:
                # All equal
                return 0
        
       -    def id(self, sid):
       +    def id(self, sid=''):
                '''
       -        Set the simulation id/name, which will be used to identify simulation
       -        files in the output folders.
       +        Returns or sets the simulation id/name, which is used to identify
       +        simulation files in the output folders.
        
       -        :param sid: The desired simulation id
       +        :param sid: The desired simulation id. If left blank the current
       +            simulation id will be returned.
                :type sid: str
       +        :returns: The current simulation id if no new value is set.
       +        :return type: str
                '''
       -        self.sid = sid
       +        if sid == '':
       +            return self.sid
       +        else:
       +            self.sid = sid
       +
       +    def idAppend(self, string):
       +        '''
       +        Append a string to the simulation id/name, which is used to identify
       +        simulation files in the output folders.
       +
       +        :param string: The string to append to the simulation id (`self.sid`).
       +        :type string: str
       +        '''
       +        self.sid += string
        
            def addParticle(self,
                    x,
       t@@ -852,7 +943,7 @@ class sim:
                    self.periodic = numpy.fromfile(fh, dtype=numpy.int32, count=1)
        
                    # Per-particle vectors
       -            for i in range(self.np):
       +            for i in numpy.arange(self.np):
                        self.x[i,:] =\
                                numpy.fromfile(fh, dtype=numpy.float64, count=self.nd)
                        self.radius[i] =\
       t@@ -865,7 +956,7 @@ class sim:
                        self.xyzsum = numpy.fromfile(fh, dtype=numpy.float64,\
                                count=self.np*2).reshape(self.np,2)
        
       -            for i in range(self.np):
       +            for i in numpy.arange(self.np):
                        self.vel[i,:] =\
                                numpy.fromfile(fh, dtype=numpy.float64, count=self.nd)
                        self.fixvel[i] =\
       t@@ -924,11 +1015,11 @@ class sim:
                    self.w_devs  = numpy.empty(self.nw, dtype=numpy.float64)
        
                    self.wmode   = numpy.fromfile(fh, dtype=numpy.int32, count=self.nw)
       -            for i in range(self.nw):
       +            for i in numpy.arange(self.nw):
                        self.w_n[i,:] =\
                                numpy.fromfile(fh, dtype=numpy.float64, count=self.nd)
                        self.w_x[i]   = numpy.fromfile(fh, dtype=numpy.float64, count=1)
       -            for i in range(self.nw):
       +            for i in numpy.arange(self.nw):
                        self.w_m[i]   = numpy.fromfile(fh, dtype=numpy.float64, count=1)
                        self.w_vel[i] = numpy.fromfile(fh, dtype=numpy.float64, count=1)
                        self.w_force[i] =\
       t@@ -946,7 +1037,7 @@ class sim:
                        self.sigma_b = numpy.fromfile(fh, dtype=numpy.float64, count=1)
                        self.tau_b = numpy.fromfile(fh, dtype=numpy.float64, count=1)
                        self.bonds = numpy.empty((self.nb0, 2), dtype=numpy.uint32)
       -                for i in range(self.nb0):
       +                for i in numpy.arange(self.nb0):
                            self.bonds[i,0] = numpy.fromfile(fh, dtype=numpy.uint32,
                                    count=1)
                            self.bonds[i,1] = numpy.fromfile(fh, dtype=numpy.uint32,
       t@@ -963,6 +1054,13 @@ class sim:
                        self.nb0 = numpy.zeros(1, dtype=numpy.uint32)
        
                    if self.fluid:
       +
       +                if self.version >= 2.0:
       +                    self.cfd_solver = numpy.fromfile(fh, dtype=numpy.int32,
       +                            count=1)
       +                else:
       +                    self.cfd_solver = numpy.zeros(1, dtype=numpy.int32)
       +
                        self.mu = numpy.fromfile(fh, dtype=numpy.float64, count=1)
        
                        self.v_f = numpy.empty(
       t@@ -978,9 +1076,9 @@ class sim:
                                numpy.empty((self.num[0],self.num[1],self.num[2]),
                                dtype=numpy.float64)
        
       -                for z in range(self.num[2]):
       -                    for y in range(self.num[1]):
       -                        for x in range(self.num[0]):
       +                for z in numpy.arange(self.num[2]):
       +                    for y in numpy.arange(self.num[1]):
       +                        for x in numpy.arange(self.num[0]):
                                    self.v_f[x,y,z,0] = \
                                            numpy.fromfile(fh, dtype=numpy.float64,\
                                            count=1)
       t@@ -1000,7 +1098,7 @@ class sim:
                                            numpy.fromfile(fh, dtype=numpy.float64,\
                                            count=1)
        
       -                if (self.version >= 0.36):
       +                if self.version >= 0.36:
                            self.rho_f =\
                                    numpy.fromfile(fh, dtype=numpy.float64, count=1)
                            self.p_mod_A =\
       t@@ -1019,65 +1117,93 @@ class sim:
                            self.free_slip_top =\
                                    numpy.fromfile(fh, dtype=numpy.int32, count=1)
        
       -                self.gamma = numpy.fromfile(fh, dtype=numpy.float64, count=1)
       -                self.theta = numpy.fromfile(fh, dtype=numpy.float64, count=1)
       -                self.beta  = numpy.fromfile(fh, dtype=numpy.float64, count=1)
       -                self.tolerance =\
       -                        numpy.fromfile(fh, dtype=numpy.float64, count=1)
       -                self.maxiter = numpy.fromfile(fh, dtype=numpy.uint32, count=1)
       -                if self.version >= 1.01:
       -                    self.ndem = numpy.fromfile(fh, dtype=numpy.uint32, count=1)
       -                else:
       -                    self.ndem = 1
       -
       -                if self.version >= 1.04:
       -                    self.c_phi = \
       +                if self.version >= 2.0 and self.cfd_solver == 0:
       +                    self.gamma = \
                                    numpy.fromfile(fh, dtype=numpy.float64, count=1)
       -                    self.c_v =\
       -                      numpy.fromfile(fh, dtype=numpy.float64, count=1)
       -                    if self.version == 1.06:
       -                        self.c_a =\
       -                                numpy.fromfile(fh, dtype=numpy.float64, count=1)
       -                    elif self.version >= 1.07:
       -                        self.dt_dem_fac =\
       -                                numpy.fromfile(fh, dtype=numpy.float64, count=1)
       +                    self.theta = \
       +                            numpy.fromfile(fh, dtype=numpy.float64, count=1)
       +                    self.beta  = \
       +                            numpy.fromfile(fh, dtype=numpy.float64, count=1)
       +                    self.tolerance =\
       +                            numpy.fromfile(fh, dtype=numpy.float64, count=1)
       +                    self.maxiter = \
       +                            numpy.fromfile(fh, dtype=numpy.uint32, count=1)
       +                    if self.version >= 1.01:
       +                        self.ndem = \
       +                                numpy.fromfile(fh, dtype=numpy.uint32, count=1)
                            else:
       -                        self.c_a = numpy.ones(1, dtype=numpy.float64)
       -                else:
       -                    self.c_phi = numpy.ones(1, dtype=numpy.float64)
       -                    self.c_v = numpy.ones(1, dtype=numpy.float64)
       +                        self.ndem = 1
        
       -                if self.version >= 1.05:
       -                    self.f_d = numpy.empty_like(self.x)
       +                    if self.version >= 1.04:
       +                        self.c_phi = \
       +                                numpy.fromfile(fh, dtype=numpy.float64, count=1)
       +                        self.c_v =\
       +                          numpy.fromfile(fh, dtype=numpy.float64, count=1)
       +                        if self.version == 1.06:
       +                            self.c_a =\
       +                                    numpy.fromfile(fh, \
       +                                    dtype=numpy.float64, count=1)
       +                        elif self.version >= 1.07:
       +                            self.dt_dem_fac =\
       +                                    numpy.fromfile(fh, \
       +                                    dtype=numpy.float64, count=1)
       +                        else:
       +                            self.c_a = numpy.ones(1, dtype=numpy.float64)
       +                    else:
       +                        self.c_phi = numpy.ones(1, dtype=numpy.float64)
       +                        self.c_v = numpy.ones(1, dtype=numpy.float64)
       +
       +                    if self.version >= 1.05:
       +                        self.f_d = numpy.empty_like(self.x)
       +                        self.f_p = numpy.empty_like(self.x)
       +                        self.f_v = numpy.empty_like(self.x)
       +                        self.f_sum = numpy.empty_like(self.x)
       +
       +                        for i in numpy.arange(self.np[0]):
       +                            self.f_d[i,:] = \
       +                                    numpy.fromfile(fh, dtype=numpy.float64,
       +                                            count=self.nd)
       +                        for i in numpy.arange(self.np[0]):
       +                            self.f_p[i,:] = \
       +                                    numpy.fromfile(fh, dtype=numpy.float64,
       +                                            count=self.nd)
       +                        for i in numpy.arange(self.np[0]):
       +                            self.f_v[i,:] = \
       +                                    numpy.fromfile(fh, dtype=numpy.float64,
       +                                            count=self.nd)
       +                        for i in numpy.arange(self.np[0]):
       +                            self.f_sum[i,:] = \
       +                                    numpy.fromfile(fh, dtype=numpy.float64,
       +                                            count=self.nd)
       +                    else:
       +                        self.f_d = numpy.zeros((self.np, self.nd),
       +                                dtype=numpy.float64)
       +                        self.f_p = numpy.zeros((self.np, self.nd),
       +                                dtype=numpy.float64)
       +                        self.f_v = numpy.zeros((self.np, self.nd),
       +                                dtype=numpy.float64)
       +                        self.f_sum = numpy.zeros((self.np, self.nd),
       +                                dtype=numpy.float64)
       +
       +                elif self.version >= 2.0 and self.cfd_solver == 1:
       +
       +                    self.tolerance = \
       +                            numpy.fromfile(fh, dtype=numpy.float64, count=1)
       +                    self.maxiter = \
       +                            numpy.fromfile(fh, dtype=numpy.uint32, count=1)
       +                    self.ndem = numpy.fromfile(fh, dtype=numpy.uint32, count=1)
       +                    self.c_phi = \
       +                            numpy.fromfile(fh, dtype=numpy.float64, count=1)
                            self.f_p = numpy.empty_like(self.x)
       -                    self.f_v = numpy.empty_like(self.x)
       -                    self.f_sum = numpy.empty_like(self.x)
       -
       -                    for i in range(self.np[0]):
       -                        self.f_d[i,:] = \
       -                                numpy.fromfile(fh, dtype=numpy.float64,
       -                                        count=self.nd)
       -                    for i in range(self.np[0]):
       +                    for i in numpy.arange(self.np[0]):
                                self.f_p[i,:] = \
                                        numpy.fromfile(fh, dtype=numpy.float64,
                                                count=self.nd)
       -                    for i in range(self.np[0]):
       -                        self.f_v[i,:] = \
       -                                numpy.fromfile(fh, dtype=numpy.float64,
       -                                        count=self.nd)
       -                    for i in range(self.np[0]):
       -                        self.f_sum[i,:] = \
       -                                numpy.fromfile(fh, dtype=numpy.float64,
       -                                        count=self.nd)
       -                else:
       -                    self.f_d = numpy.zeros((self.np, self.nd),
       -                            dtype=numpy.float64)
       -                    self.f_p = numpy.zeros((self.np, self.nd),
       -                            dtype=numpy.float64)
       -                    self.f_v = numpy.zeros((self.np, self.nd),
       -                            dtype=numpy.float64)
       -                    self.f_sum = numpy.zeros((self.np, self.nd),
       -                            dtype=numpy.float64)
       +                    self.beta_f = \
       +                            numpy.fromfile(fh, dtype=numpy.float64, count=1)
       +
       +                    self.k_c = \
       +                            numpy.fromfile(fh, dtype=numpy.float64, count=1)
        
                    if (self.version >= 1.02):
                        self.color =\
       t@@ -1131,14 +1257,14 @@ class sim:
                    fh.write(self.periodic.astype(numpy.uint32))
        
                    # Per-particle vectors
       -            for i in range(self.np):
       +            for i in numpy.arange(self.np):
                        fh.write(self.x[i,:].astype(numpy.float64))
                        fh.write(self.radius[i].astype(numpy.float64))
        
                    if (self.np[0] > 0):
                        fh.write(self.xyzsum.astype(numpy.float64))
        
       -            for i in range(self.np):
       +            for i in numpy.arange(self.np):
                        fh.write(self.vel[i,:].astype(numpy.float64))
                        fh.write(self.fixvel[i].astype(numpy.float64))
        
       t@@ -1177,13 +1303,13 @@ class sim:
                    fh.write(self.V_b.astype(numpy.float64))
        
                    fh.write(self.nw.astype(numpy.uint32))
       -            for i in range(self.nw):
       +            for i in numpy.arange(self.nw):
                        fh.write(self.wmode[i].astype(numpy.int32))
       -            for i in range(self.nw):
       +            for i in numpy.arange(self.nw):
                        fh.write(self.w_n[i,:].astype(numpy.float64))
                        fh.write(self.w_x[i].astype(numpy.float64))
        
       -            for i in range(self.nw):
       +            for i in numpy.arange(self.nw):
                        fh.write(self.w_m[i].astype(numpy.float64))
                        fh.write(self.w_vel[i].astype(numpy.float64))
                        fh.write(self.w_force[i].astype(numpy.float64))
       t@@ -1195,7 +1321,7 @@ class sim:
                    fh.write(self.nb0.astype(numpy.uint32))
                    fh.write(self.sigma_b.astype(numpy.float64))
                    fh.write(self.tau_b.astype(numpy.float64))
       -            for i in range(self.nb0):
       +            for i in numpy.arange(self.nb0):
                        fh.write(self.bonds[i,0].astype(numpy.uint32))
                        fh.write(self.bonds[i,1].astype(numpy.uint32))
                    fh.write(self.bonds_delta_n.astype(numpy.float64))
       t@@ -1204,10 +1330,12 @@ class sim:
                    fh.write(self.bonds_omega_t.astype(numpy.float64))
        
                    if self.fluid:
       +
       +                fh.write(self.cfd_solver.astype(numpy.int32))
                        fh.write(self.mu.astype(numpy.float64))
       -                for z in range(self.num[2]):
       -                    for y in range(self.num[1]):
       -                        for x in range(self.num[0]):
       +                for z in numpy.arange(self.num[2]):
       +                    for y in numpy.arange(self.num[1]):
       +                        for x in numpy.arange(self.num[0]):
                                    fh.write(self.v_f[x,y,z,0].astype(numpy.float64))
                                    fh.write(self.v_f[x,y,z,1].astype(numpy.float64))
                                    fh.write(self.v_f[x,y,z,2].astype(numpy.float64))
       t@@ -1225,25 +1353,44 @@ class sim:
                        fh.write(self.free_slip_bot.astype(numpy.int32))
                        fh.write(self.free_slip_top.astype(numpy.int32))
        
       -                fh.write(self.gamma.astype(numpy.float64))
       -                fh.write(self.theta.astype(numpy.float64))
       -                fh.write(self.beta.astype(numpy.float64))
       -                fh.write(self.tolerance.astype(numpy.float64))
       -                fh.write(self.maxiter.astype(numpy.uint32))
       -                fh.write(self.ndem.astype(numpy.uint32))
       -
       -                fh.write(self.c_phi.astype(numpy.float64))
       -                fh.write(self.c_v.astype(numpy.float64))
       -                fh.write(self.dt_dem_fac.astype(numpy.float64))
       -
       -                for i in numpy.arange(self.np):
       -                    fh.write(self.f_d[i,:].astype(numpy.float64))
       -                for i in numpy.arange(self.np):
       -                    fh.write(self.f_p[i,:].astype(numpy.float64))
       -                for i in numpy.arange(self.np):
       -                    fh.write(self.f_v[i,:].astype(numpy.float64))
       -                for i in numpy.arange(self.np):
       -                    fh.write(self.f_sum[i,:].astype(numpy.float64))
       +                # Navier Stokes
       +                if self.cfd_solver[0] == 0:
       +                    fh.write(self.gamma.astype(numpy.float64))
       +                    fh.write(self.theta.astype(numpy.float64))
       +                    fh.write(self.beta.astype(numpy.float64))
       +                    fh.write(self.tolerance.astype(numpy.float64))
       +                    fh.write(self.maxiter.astype(numpy.uint32))
       +                    fh.write(self.ndem.astype(numpy.uint32))
       +
       +                    fh.write(self.c_phi.astype(numpy.float64))
       +                    fh.write(self.c_v.astype(numpy.float64))
       +                    fh.write(self.dt_dem_fac.astype(numpy.float64))
       +
       +                    for i in numpy.arange(self.np):
       +                        fh.write(self.f_d[i,:].astype(numpy.float64))
       +                    for i in numpy.arange(self.np):
       +                        fh.write(self.f_p[i,:].astype(numpy.float64))
       +                    for i in numpy.arange(self.np):
       +                        fh.write(self.f_v[i,:].astype(numpy.float64))
       +                    for i in numpy.arange(self.np):
       +                        fh.write(self.f_sum[i,:].astype(numpy.float64))
       +
       +                # Darcy
       +                elif self.cfd_solver[0] == 1:
       +
       +                    fh.write(self.tolerance.astype(numpy.float64))
       +                    fh.write(self.maxiter.astype(numpy.uint32))
       +                    fh.write(self.ndem.astype(numpy.uint32))
       +                    fh.write(self.c_phi.astype(numpy.float64))
       +                    for i in numpy.arange(self.np):
       +                        fh.write(self.f_p[i,:].astype(numpy.float64))
       +                    fh.write(self.beta_f.astype(numpy.float64))
       +                    fh.write(self.k_c.astype(numpy.float64))
       +
       +                else:
       +                    raise Exception('Value of cfd_solver not understood (' + \
       +                            str(self.cfd_solver[0]) + ')')
       +
        
                    fh.write(self.color.astype(numpy.int32))
        
       t@@ -1385,7 +1532,7 @@ class sim:
                            + 'NumberOfComponents="3" format="ascii">\n')
                    fh.write('          ')
                    for i in range(self.np):
       -                fh.write('%f %f %f' % (self.x[i,0], self.x[i,1], self.x[i,2]))
       +                fh.write('%f %f %f ' % (self.x[i,0], self.x[i,1], self.x[i,2]))
                    fh.write('\n')
                    fh.write('        </DataArray>\n')
                    fh.write('      </Points>\n')
       t@@ -1423,27 +1570,30 @@ class sim:
                    fh.write('        </DataArray>\n')
        
                    if self.fluid:
       -                # Fluid interaction force
       -                fh.write('        <DataArray type="Float32" ' 
       -                        + 'Name="Fluid force total" '
       -                        + 'NumberOfComponents="3" format="ascii">\n')
       -                fh.write('          ')
       -                for i in range(self.np):
       -                    fh.write('%f %f %f ' % \
       -                            (self.f_sum[i,0], self.f_sum[i,1], self.f_sum[i,2]))
       -                fh.write('\n')
       -                fh.write('        </DataArray>\n')
        
       -                # Fluid drag force
       -                fh.write('        <DataArray type="Float32" '
       -                        + 'Name="Fluid drag force" '
       -                        + 'NumberOfComponents="3" format="ascii">\n')
       -                fh.write('          ')
       -                for i in range(self.np):
       -                    fh.write('%f %f %f ' % \
       -                            (self.f_d[i,0], self.f_d[i,1], self.f_d[i,2]))
       -                fh.write('\n')
       -                fh.write('        </DataArray>\n')
       +                if self.cfd_solver == 0:  # Navier Stokes
       +                    # Fluid interaction force
       +                    fh.write('        <DataArray type="Float32" ' 
       +                            + 'Name="Fluid force total" '
       +                            + 'NumberOfComponents="3" format="ascii">\n')
       +                    fh.write('          ')
       +                    for i in range(self.np):
       +                        fh.write('%f %f %f ' % \
       +                                (self.f_sum[i,0], self.f_sum[i,1], \
       +                                self.f_sum[i,2]))
       +                    fh.write('\n')
       +                    fh.write('        </DataArray>\n')
       +
       +                    # Fluid drag force
       +                    fh.write('        <DataArray type="Float32" '
       +                            + 'Name="Fluid drag force" '
       +                            + 'NumberOfComponents="3" format="ascii">\n')
       +                    fh.write('          ')
       +                    for i in range(self.np):
       +                        fh.write('%f %f %f ' % \
       +                                (self.f_d[i,0], self.f_d[i,1], self.f_d[i,2]))
       +                    fh.write('\n')
       +                    fh.write('        </DataArray>\n')
        
                        # Fluid pressure force
                        fh.write('        <DataArray type="Float32" '
       t@@ -1456,16 +1606,17 @@ class sim:
                        fh.write('\n')
                        fh.write('        </DataArray>\n')
        
       -                # Fluid viscous force
       -                fh.write('        <DataArray type="Float32" '
       -                        + 'Name="Fluid viscous force" '
       -                        + 'NumberOfComponents="3" format="ascii">\n')
       -                fh.write('          ')
       -                for i in range(self.np):
       -                    fh.write('%f %f %f ' % \
       -                            (self.f_v[i,0], self.f_v[i,1], self.f_v[i,2]))
       -                fh.write('\n')
       -                fh.write('        </DataArray>\n')
       +                if self.cfd_solver == 0:  # Navier Stokes
       +                    # Fluid viscous force
       +                    fh.write('        <DataArray type="Float32" '
       +                            + 'Name="Fluid viscous force" '
       +                            + 'NumberOfComponents="3" format="ascii">\n')
       +                    fh.write('          ')
       +                    for i in range(self.np):
       +                        fh.write('%f %f %f ' % \
       +                                (self.f_v[i,0], self.f_v[i,1], self.f_v[i,2]))
       +                    fh.write('\n')
       +                    fh.write('        </DataArray>\n')
        
                    # fixvel
                    fh.write('        <DataArray type="Float32" Name="FixedVel" '
       t@@ -1709,6 +1860,17 @@ class sim:
                else:
                    Re.SetNumberOfTuples(grid.GetNumberOfPoints())
        
       +        # Find permeabilities if the Darcy solver is used
       +        if self.cfd_solver[0] == 1:
       +            self.findPermeabilities()
       +            k = vtk.vtkDoubleArray()
       +            k.SetName("Permeability [m*m]")
       +            k.SetNumberOfComponents(1)
       +            if cell_centered:
       +                k.SetNumberOfTuples(grid.GetNumberOfCells())
       +            else:
       +                k.SetNumberOfTuples(grid.GetNumberOfPoints())
       +
                # insert values
                for z in range(self.num[2]):
                    for y in range(self.num[1]):
       t@@ -1719,6 +1881,8 @@ class sim:
                            poros.SetValue(idx, self.phi[x,y,z])
                            dporos.SetValue(idx, self.dphi[x,y,z])
                            Re.SetValue(idx, self.Re[x,y,z])
       +                    if self.cfd_solver[0] == 1:
       +                        k.SetValue(idx, self.k[x,y,z])
        
                # add pres array to grid
                if cell_centered:
       t@@ -1727,12 +1891,16 @@ class sim:
                    grid.GetCellData().AddArray(poros)
                    grid.GetCellData().AddArray(dporos)
                    grid.GetCellData().AddArray(Re)
       +            if self.cfd_solver[0] == 1:
       +                grid.GetCellData().AddArray(k)
                else:
                    grid.GetPointData().AddArray(pres)
                    grid.GetPointData().AddArray(vel)
                    grid.GetPointData().AddArray(poros)
                    grid.GetPointData().AddArray(dporos)
                    grid.GetPointData().AddArray(Re)
       +            if self.cfd_solver[0] == 1:
       +                grid.GetPointData().AddArray(k)
        
                # write VTK XML image data file
                writer = vtk.vtkXMLImageDataWriter()
       t@@ -1804,9 +1972,9 @@ class sim:
                self.readbin(fn, verbose)
        
            def generateRadii(self, psd = 'logn',
       -            radius_mean = 440e-6,
       -            radius_variance = 8.8e-9,
       -            histogram = True):
       +            mean = 440e-6,
       +            variance = 8.8e-9,
       +            histogram = False):
                '''
                Draw random particle radii from a selected probability distribution.
                The larger the variance of radii is, the slower the computations will
       t@@ -1820,26 +1988,29 @@ class sim:
                    ``logn``, which is a log-normal probability distribution, suitable
                    for approximating well-sorted, coarse sediments. The other possible
                    value is ``uni``, which is a uniform distribution from
       -            ``radius_mean-radius_variance`` to ``radius_mean+radius_variance``.
       +            ``mean - variance`` to ``mean + variance``.
                :type psd: str
       -        :param radius_mean: The mean radius [m] (default = 440e-6 m)
       -        :type radius_mean: float
       -        :param radius_variance: The variance in the probability distribution
       +        :param mean: The mean radius [m] (default = 440e-6 m)
       +        :type mean: float
       +        :param variance: The variance in the probability distribution
                    [m].
       -        :type radius_variance: float
       +        :type variance: float
        
                See also: :func:`generateBimodalRadii()`.
                '''
        
                if psd == 'logn': # Log-normal probability distribution
                    mu = math.log(\
       -                    (radius_mean**2)/math.sqrt(radius_variance+radius_mean**2))
       -            sigma = math.sqrt(math.log(radius_variance/(radius_mean**2)+1))
       +                    (mean**2)/math.sqrt(variance+mean**2))
       +            sigma = math.sqrt(math.log(variance/(mean**2)+1))
                    self.radius = numpy.random.lognormal(mu, sigma, self.np)
       -        if psd == 'uni':  # Uniform distribution
       -            radius_min = radius_mean - radius_variance
       -            radius_max = radius_mean + radius_variance
       +        elif psd == 'uni':  # Uniform distribution
       +            radius_min = mean - variance
       +            radius_max = mean + variance
                    self.radius = numpy.random.uniform(radius_min, radius_max, self.np)
       +        else:
       +            raise Exception('Particle size distribution type not understood (' 
       +                    + str(psd) + '). Valid values are \'uni\' or \'logn\'')
        
                # Show radii as histogram
                if histogram:
       t@@ -2560,7 +2731,7 @@ class sim:
                self.mu_ws[0] = 0.0
                self.mu_wd[0] = 0.0
        
       -    def largestFluidTimeStep(self, safety=0.01):
       +    def largestFluidTimeStep(self, safety=0.5):
                '''
                Finds and returns the largest time step in the fluid phase by von
                Neumann and Courant-Friedrichs-Lewy analysis given the current
       t@@ -2579,28 +2750,107 @@ class sim:
                :return type: float
                '''
        
       -        dx_min = numpy.min(self.L/self.num)
       -        dt_min_von_neumann = 0.5*dx_min**2/self.mu[0]
       +        if self.fluid:
        
       -        # Normalized velocities
       -        v_norm = numpy.empty(self.num[0]*self.num[1]*self.num[2])
       -        idx = 0
       -        for x in numpy.arange(self.num[0]):
       -            for y in numpy.arange(self.num[1]):
       -                for z in numpy.arange(self.num[2]):
       -                    v_norm[idx] = numpy.sqrt(self.v_f[x,y,z,:].dot(\
       -                            self.v_f[x,y,z,:]))
       -                    idx += 1
       +            # Navier-Stokes
       +            if self.cfd_solver[0] == 0:
       +                dx_min = numpy.min(self.L/self.num)
       +                dt_min_von_neumann = 0.5*dx_min**2/self.mu[0]
        
       -        # Advection term. This term has to be reevaluated during the
       -        # computations, as the fluid velocity changes.
       -        v_max = numpy.amax(v_norm)
       -        if v_max == 0:
       -            v_max = 1.0e-7
       +                # Normalized velocities
       +                v_norm = numpy.empty(self.num[0]*self.num[1]*self.num[2])
       +                idx = 0
       +                for x in numpy.arange(self.num[0]):
       +                    for y in numpy.arange(self.num[1]):
       +                        for z in numpy.arange(self.num[2]):
       +                            v_norm[idx] = numpy.sqrt(self.v_f[x,y,z,:].dot(\
       +                                    self.v_f[x,y,z,:]))
       +                            idx += 1
       +
       +                # Advection term. This term has to be reevaluated during the
       +                # computations, as the fluid velocity changes.
       +                v_max = numpy.amax(v_norm)
       +                if v_max == 0:
       +                    v_max = 1.0e-7
       +
       +                dt_min_cfl = dx_min/v_max
       +
       +                return numpy.min([dt_min_von_neumann, dt_min_cfl])*safety
       +
       +            # Darcy
       +            elif self.cfd_solver[0] == 1:
       +
       +                # Determine on the base of the diffusivity coefficient
       +                # components
       +                self.cellSize()
       +                self.hydraulicPermeability()
       +                alpha_max = numpy.max(self.k/(self.beta_f*0.9*self.mu))
       +                return safety * 1.0/(2.0*alpha_max)*1.0/(
       +                        1.0/(self.dx[0]**2) + \
       +                        1.0/(self.dx[1]**2) + \
       +                        1.0/(self.dx[2]**2))
       +
       +                '''
       +                # Determine value on the base of the hydraulic conductivity
       +                g = numpy.max(numpy.abs(self.g))
       +
       +                # Bulk modulus of fluid
       +                K = 1.0/self.beta_f[0]
       +
       +                self.hydraulicDiffusivity()
       +                self.cellSize()
       +
       +                return safety * 1.0/(2.0*self.D)*1.0/( \
       +                        1.0/(self.dx[0]**2) + \
       +                        1.0/(self.dx[1]**2) + \
       +                        1.0/(self.dx[2]**2))
       +                '''
        
       -        dt_min_cfl = dx_min/v_max
       +    def cellSize(self):
       +        '''
       +        Calculate the particle sorting (and fluid) cell dimensions.
       +        These values are stored in `self.dx` and are NOT returned.
       +        '''
       +        self.dx = self.L/self.num
       +
       +    def hydraulicConductivity(self):
       +        '''
       +        Determine the hydraulic conductivity (K) [m/s] from the permeability
       +        prefactor. This value is stored in `self.K_c`. This function only works
       +        for the Darcy solver (`self.cfd_solver == 1`)
       +        '''
       +        if self.cfd_solver[0] == 1:
       +            self.K_c = self.k_c*self.rho_f*g/self.mu
       +        else:
       +            raise Exception('This function only works for the Darcy solver')
       +
       +    def hydraulicPermeability(self, phi_min = 0.1, phi_max = 0.9):
       +        '''
       +        Determine the hydraulic permeability (k) [m*m] from the Kozeny-Carman
       +        relationship, using the permeability prefactor (`self.k_c`), and the
       +        range of valid porosities set in `src/darcy.cuh`, by default in the
       +        range 0.1 to 0.9.
       +
       +        This function is only valid for the Darcy solver (`self.cfd_solver ==
       +        1`).
       +        '''
       +        if self.cfd_solver[0] == 1:
       +            self.findPermeabilities()
       +        else:
       +            raise Exception('This function only works for the Darcy solver')
        
       -        return numpy.min([dt_min_von_neumann, dt_min_cfl])*safety
       +    def hydraulicDiffusivity(self):
       +        '''
       +        Determine the hydraulic diffusivity (D) [m*m/s]. The result is stored in
       +        `self.D`. This function only works for the Darcy solver
       +        (`self.cfd_solver[0] == 1`)
       +        '''
       +        if self.cfd_solver[0] == 1:
       +                self.hydraulicConductivity()
       +                phi_bar = numpy.mean(self.phi)
       +                alpha = self.K_c/(self.rho_f*g*(self.k_n[0] + phi_bar*K))
       +        else:
       +            raise Exception('This function only works for the Darcy solver')
        
            def initTemporal(self, total,
                    current = 0.0,
       t@@ -2707,7 +2957,7 @@ class sim:
                self.initFluid()
        
            def initFluid(self, mu = 8.9e-4, rho = 1.0e3, p = 1.0,
       -            hydrostatic = True):
       +            hydrostatic = False, cfd_solver = 0):
                '''
                Initialize the fluid arrays and the fluid viscosity. The default value
                of ``mu`` equals the dynamic viscosity of water at 25 degrees Celcius.
       t@@ -2725,6 +2975,9 @@ class sim:
                    created if a gravitational acceleration along :math:`z` previously
                    has been specified
                :type hydrostatic: bool
       +        :param cfd_solver: Solver to use for the computational fluid dynamics.
       +            Accepted values: 0 (Navier Stokes, default) and 1 (Darcy).
       +        :type cfd_solver: int
                '''
                self.fluid = True
                self.mu = numpy.ones(1, dtype=numpy.float64) * mu
       t@@ -2771,21 +3024,41 @@ class sim:
                self.free_slip_bot = numpy.ones(1, dtype=numpy.int32)
                self.free_slip_top = numpy.ones(1, dtype=numpy.int32)
        
       -        self.gamma = numpy.array(0.0)
       -        self.theta = numpy.array(1.0)
       -        self.beta = numpy.array(0.0)
       -        self.tolerance = numpy.array(1.0e-8)
       -        self.maxiter = numpy.array(1e4)
       -        self.ndem = numpy.array(1)
       +        # Fluid solver type
       +        # 0: Navier Stokes (fluid with inertia)
       +        # 1: Stokes-Darcy (fluid without inertia)
       +        self.cfd_solver = numpy.ones(1)*cfd_solver
       +
       +        if self.cfd_solver[0] == 0:
       +            self.gamma = numpy.array(0.0)
       +            self.theta = numpy.array(1.0)
       +            self.beta = numpy.array(0.0)
       +            self.tolerance = numpy.array(1.0e-3)
       +            self.maxiter = numpy.array(1e4)
       +            self.ndem = numpy.array(1)
       +
       +            self.c_phi = numpy.ones(1, dtype=numpy.float64)
       +            self.c_v = numpy.ones(1, dtype=numpy.float64)
       +            self.dt_dem_fac = numpy.ones(1, dtype=numpy.float64)
       +
       +            self.f_d = numpy.zeros((self.np, self.nd), dtype=numpy.float64)
       +            self.f_p = numpy.zeros((self.np, self.nd), dtype=numpy.float64)
       +            self.f_v = numpy.zeros((self.np, self.nd), dtype=numpy.float64)
       +            self.f_sum = numpy.zeros((self.np, self.nd), dtype=numpy.float64)
        
       -        self.c_phi = numpy.ones(1, dtype=numpy.float64)
       -        self.c_v = numpy.ones(1, dtype=numpy.float64)
       -        self.dt_dem_fac = numpy.ones(1, dtype=numpy.float64)
       +        elif self.cfd_solver[0] == 1:
       +            self.tolerance = numpy.array(1.0e-3)
       +            self.maxiter = numpy.array(1e4)
       +            self.ndem = numpy.array(1)
       +            self.c_phi = numpy.ones(1, dtype=numpy.float64)
       +            self.f_d = numpy.zeros((self.np, self.nd), dtype=numpy.float64)
       +            self.beta_f = numpy.ones(1, dtype=numpy.float64)*4.5e-10
       +            self.f_p = numpy.zeros((self.np, self.nd), dtype=numpy.float64)
       +            self.k_c = numpy.ones(1, dtype=numpy.float64)*4.6e-10
        
       -        self.f_d = numpy.zeros((self.np, self.nd), dtype=numpy.float64)
       -        self.f_p = numpy.zeros((self.np, self.nd), dtype=numpy.float64)
       -        self.f_v = numpy.zeros((self.np, self.nd), dtype=numpy.float64)
       -        self.f_sum = numpy.zeros((self.np, self.nd), dtype=numpy.float64)
       +        else:
       +            raise Exception('Value of cfd_solver not understood (' + \
       +                    str(self.cfd_solver[0]) + ')')
        
            def setFluidBottomNoFlow(self):
                '''
       t@@ -2847,6 +3120,18 @@ class sim:
                '''
                self.bc_top[0] = 0
        
       +    def findPermeabilities(self):
       +        '''
       +        Calculates the hydrological permeabilities from the Kozeny-Carman
       +        relationship. These values are only relevant when the Darcy solver is
       +        used (`self.cfd_solver = 1`). The permeability pre-factor `self.k_c`
       +        and the assemblage porosities must be set beforehand. The former values
       +        are set if a file from the `output/` folder is read using
       +        `self.readbin`.
       +        '''
       +        phi = numpy.clip(self.phi, 0.1, 0.9)
       +        self.k = self.k_c * phi**3/(1.0 - phi**2)
       +
            def defaultParams(self,
                    mu_s = 0.5,
                    mu_d = 0.5,
       t@@ -2960,6 +3245,26 @@ class sim:
                # Debonding distance
                self.db[0] = (1.0 + theta/2.0) * self.V_b**(1.0/3.0)
        
       +    def setStiffnessNormal(self, k_n):
       +        '''
       +        Set the elastic stiffness (`k_n`) in the normal direction of the
       +        contact.
       +
       +        :param k_n: The elastic stiffness coefficient [N/m]
       +        :type k_n: float
       +        '''
       +        self.k_n[0] = k_n
       +
       +    def setStiffnessTangential(self, k_t):
       +        '''
       +        Set the elastic stiffness (`k_t`) in the tangential direction of the
       +        contact.
       +
       +        :param k_t: The elastic stiffness coefficient [N/m]
       +        :type k_t: float
       +        '''
       +        self.k_t[0] = k_t
       +
            def setDampingNormal(self, gamma, over_damping=False):
                '''
                Set the dampening coefficient (gamma) in the normal direction of the
       t@@ -3324,18 +3629,27 @@ class sim:
        
            def bulkPorosity(self):
                '''
       -        Calculates the bulk porosity
       +        Calculates the bulk porosity in the smallest axis-parallel cube.
        
                :returns: The bulk porosity, in [0:1]
                :return type: float
                '''
        
       -        if (self.nw == 0):
       -            V_total = self.L[0] * self.L[1] * self.L[2]
       -        elif (self.nw == 1):
       -            V_total = self.L[0] * self.L[1] * self.w_x[0]
       -            if (V_total <= 0.0):
       -                raise Exception("Could not determine total volume")
       +        min_x = numpy.min(self.x[:,0] - self.radius)
       +        min_y = numpy.min(self.x[:,1] - self.radius)
       +        min_z = numpy.min(self.x[:,2] - self.radius)
       +        max_x = numpy.max(self.x[:,0] + self.radius)
       +        max_y = numpy.max(self.x[:,1] + self.radius)
       +        max_z = numpy.max(self.x[:,2] + self.radius)
       +
       +        #if (self.nw == 0):
       +            #V_total = self.L[0] * self.L[1] * self.L[2]
       +        #elif (self.nw == 1):
       +            #V_total = self.L[0] * self.L[1] * self.w_x[0]
       +            #if (V_total <= 0.0):
       +                #raise Exception("Could not determine total volume")
       +
       +        V_total = (max_x - min_x)*(max_y - min_y)*(max_z - min_z)
        
                # Find the volume of solids
                V_solid = numpy.sum(V_sphere(self.radius))
       t@@ -4677,7 +4991,7 @@ class sim:
                plt.ylabel('Jacobi iterations')
                plt.plot(conv[:,0], conv[:,1])
                plt.grid()
       -        plt.savefig('../output/' + self.sid + '-conv.' + graphics_format)
       +        plt.savefig(self.sid + '-conv.' + graphics_format)
                plt.clf()
                plt.close(fig)
        
       t@@ -4814,7 +5128,7 @@ class sim:
                the required value of the maximum normalized residual for the fluid
                solver.
        
       -        The default and recommended value is 1.0e-8.
       +        The default and recommended value is 1.0e-3.
        
                :param tolerance: The tolerance criteria for the maximal normalized
                    residual
 (DIR) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
       t@@ -23,13 +23,13 @@ ENDIF (GPU_GENERATION EQUAL 1)
        # Rule to build executable program 
        CUDA_ADD_EXECUTABLE(../sphere
            main.cpp file_io.cpp sphere.cpp device.cu utility.cu utility.cpp
       -    navierstokes.cpp)
       +    navierstokes.cpp darcy.cpp)
        CUDA_ADD_EXECUTABLE(../porosity
            porosity.cpp file_io.cpp sphere.cpp device.cu utility.cu utility.cpp
       -    navierstokes.cpp)
       +    navierstokes.cpp darcy.cpp)
        CUDA_ADD_EXECUTABLE(../forcechains
            forcechains.cpp file_io.cpp sphere.cpp device.cu utility.cu utility.cpp
       -    navierstokes.cpp)
       +    navierstokes.cpp darcy.cpp)
        
        #ADD_EXECUTABLE(unittests boost-unit-tests.cpp sphere.cpp)
        #TARGET_LINK_LIBRARIES(unittests
 (DIR) diff --git a/src/constants.h b/src/constants.h
       t@@ -16,7 +16,7 @@ const Float PI = 3.14159265358979;
        const unsigned int ND = 3;
        
        // Define source code version
       -const double VERSION = 1.07;
       +const double VERSION = 2.00;
        
        // Max. number of contacts per particle
        //const int NC = 16;
 (DIR) diff --git a/src/darcy.cpp b/src/darcy.cpp
       t@@ -0,0 +1,311 @@
       +#include <iostream>
       +#include <cstdio>
       +#include <cstdlib>
       +#include <string>
       +#include <vector>
       +
       +#include "typedefs.h"
       +#include "datatypes.h"
       +#include "constants.h"
       +#include "sphere.h"
       +#include "utility.h"
       +
       +// 1: Enable color output in array printing functions, 0: Disable
       +const int color_output = 0;
       +
       +// Initialize memory
       +void DEM::initDarcyMem()
       +{
       +    // Number of cells
       +    darcy.nx = grid.num[0];
       +    darcy.ny = grid.num[1];
       +    darcy.nz = grid.num[2];
       +    unsigned int ncells = darcyCells();
       +    //unsigned int ncells_st = darcyCellsVelocity();
       +
       +    darcy.p     = new Float[ncells];     // hydraulic pressure
       +    darcy.v     = new Float3[ncells];    // hydraulic velocity
       +    darcy.phi   = new Float[ncells];     // porosity
       +    darcy.dphi  = new Float[ncells];     // porosity change
       +    darcy.norm  = new Float[ncells];     // normalized residual of epsilon
       +    darcy.f_p   = new Float4[np];        // pressure force on particles
       +    darcy.k     = new Float[ncells];     // hydraulic pressure
       +}
       +
       +unsigned int DEM::darcyCells()
       +{
       +    //return darcy.nx*darcy.ny*darcy.nz; // without ghost nodes
       +    return (darcy.nx+2)*(darcy.ny+2)*(darcy.nz+2); // with ghost nodes
       +}
       +
       +// Returns the number of velocity nodes in a congruent padded grid. There are
       +// velocity nodes between the boundary points and the pressure ghost nodes, but
       +// not on the outer side of the ghost nodes
       +unsigned int DEM::darcyCellsVelocity()
       +{
       +    // Congruent padding for velocity grids. See "Cohen and Molemaker 'A fast
       +    // double precision CFD code using CUDA'" for details
       +    //return (darcy.nx+1)*(darcy.ny+1)*(darcy.nz+1); // without ghost nodes
       +    return (darcy.nx+3)*(darcy.ny+3)*(darcy.nz+3); // with ghost nodes
       +}
       +
       +// Free memory
       +void DEM::freeDarcyMem()
       +{
       +    delete[] darcy.p;
       +    delete[] darcy.v;
       +    delete[] darcy.phi;
       +    delete[] darcy.dphi;
       +    delete[] darcy.norm;
       +    delete[] darcy.f_p;
       +    delete[] darcy.k;
       +}
       +
       +// 3D index to 1D index
       +unsigned int DEM::d_idx(
       +        const int x,
       +        const int y,
       +        const int z)
       +{
       +    // without ghost nodes
       +    //return x + d.nx*y + d.nx*d.ny*z;
       +
       +    // with ghost nodes
       +    // the ghost nodes are placed at x,y,z = -1 and WIDTH
       +    return (x+1) + (darcy.nx+2)*(y+1) + (darcy.nx+2)*(darcy.ny+2)*(z+1);
       +}
       +
       +// 3D index to 1D index of cell-face velocity nodes. The cell-face velocities
       +// are placed at x = [0;nx], y = [0;ny], z = [0;nz].
       +// The coordinate x,y,z corresponds to the lowest corner of cell(x,y,z).
       +unsigned int DEM::d_vidx(
       +        const int x,
       +        const int y,
       +        const int z)
       +{
       +    //return x + (darcy.nx+1)*y + (darcy.nx+1)*(darcy.ny+1)*z; // without ghost nodes
       +    return (x+1) + (darcy.nx+3)*(y+1) + (darcy.nx+3)*(darcy.ny+3)*(z+1); // with ghost nodes
       +}
       +
       +Float DEM::largestDarcyPermeability()
       +{
       +    Float k_max = 0.0;
       +    for (unsigned int z=0; z<grid.num[2]; z++)
       +        for (unsigned int y=0; y<grid.num[1]; y++)
       +            for (unsigned int x=0; x<grid.num[0]; x++)
       +                if (darcy.k[d_idx(x,y,z)] > k_max)
       +                    k_max = darcy.k[d_idx(x,y,z)];
       +    return k_max;
       +}
       +
       +Float DEM::smallestDarcyPorosity()
       +{
       +    Float phi_min = 10.0;
       +    for (unsigned int z=0; z<grid.num[2]; z++)
       +        for (unsigned int y=0; y<grid.num[1]; y++)
       +            for (unsigned int x=0; x<grid.num[0]; x++)
       +                if (darcy.phi[d_idx(x,y,z)] < phi_min)
       +                    phi_min = darcy.phi[d_idx(x,y,z)];
       +    return phi_min;
       +}
       +
       +// Determine if the FTCS (forward time, central space) solution of the Navier
       +// Stokes equations is unstable
       +void DEM::checkDarcyStability()
       +{
       +    // Cell dimensions
       +    const Float dx = grid.L[0]/grid.num[0];
       +    const Float dy = grid.L[1]/grid.num[1];
       +    const Float dz = grid.L[2]/grid.num[2];
       +
       +    const Float alpha_max = largestDarcyPermeability()
       +        /(darcy.beta_f*smallestDarcyPorosity()*darcy.mu);
       +
       +    // Check the diffusion term using von Neumann stability analysis
       +    //if (darcy.mu*time.dt/(dmin*dmin) > 0.5) {
       +    if (time.dt >= 1.0/(2.0*alpha_max) *
       +            1.0/(1.0/(dx*dx) + 1.0/(dy*dy) + 1.0/(dz*dz))) {
       +        std::cerr << "Error: The time step is too large to ensure stability in "
       +            "the diffusive term of the fluid momentum equation.\n"
       +            "Increase the viscosity, decrease the time step, and/or increase "
       +            "the fluid grid cell size." << std::endl;
       +        //exit(1);
       +    }
       +}
       +
       +// Print array values to file stream (stdout, stderr, other file)
       +void DEM::printDarcyArray(FILE* stream, Float* arr)
       +{
       +    int x, y, z;
       +
       +    // show ghost nodes
       +    for (z = darcy.nz; z >= -1; z--) { // top to bottom
       +
       +        fprintf(stream, "z = %d\n", z);
       +
       +        for (y=-1; y<=darcy.ny; y++) {
       +            for (x=-1; x<=darcy.nx; x++) {
       +
       +                if (x > -1 && x < darcy.nx &&
       +                        y > -1 && y < darcy.ny &&
       +                        z > -1 && z < darcy.nz) {
       +                    fprintf(stream, "%f\t", arr[d_idx(x,y,z)]);
       +                } else { // ghost node
       +                    if (color_output) {
       +                        fprintf(stream, "\x1b[30;1m%f\x1b[0m\t",
       +                                arr[d_idx(x,y,z)]);
       +                    } else {
       +                        fprintf(stream, "%f\t", arr[d_idx(x,y,z)]);
       +                    }
       +                }
       +            }
       +            fprintf(stream, "\n");
       +        }
       +        fprintf(stream, "\n");
       +    }
       +}
       +
       +// Overload printDarcyArray to add optional description
       +void DEM::printDarcyArray(FILE* stream, Float* arr, std::string desc)
       +{
       +    std::cout << "\n" << desc << ":\n";
       +    printDarcyArray(stream, arr);
       +}
       +
       +// Print array values to file stream (stdout, stderr, other file)
       +void DEM::printDarcyArray(FILE* stream, Float3* arr)
       +{
       +    int x, y, z;
       +    for (z=0; z<darcy.nz; z++) {
       +        for (y=0; y<darcy.ny; y++) {
       +            for (x=0; x<darcy.nx; x++) {
       +                fprintf(stream, "%f,%f,%f\t",
       +                        arr[d_idx(x,y,z)].x,
       +                        arr[d_idx(x,y,z)].y,
       +                        arr[d_idx(x,y,z)].z);
       +            }
       +            fprintf(stream, "\n");
       +        }
       +        fprintf(stream, "\n");
       +    }
       +}
       +
       +// Overload printDarcyArray to add optional description
       +void DEM::printDarcyArray(FILE* stream, Float3* arr, std::string desc)
       +{
       +    std::cout << "\n" << desc << ":\n";
       +    printDarcyArray(stream, arr);
       +}
       +
       +// Returns the average value of the normalized residuals
       +double DEM::avgNormResDarcy()
       +{
       +    double norm_res_sum, norm_res;
       +
       +    // do not consider the values of the ghost nodes
       +    for (int z=0; z<grid.num[2]; ++z) {
       +        for (int y=0; y<grid.num[1]; ++y) {
       +            for (int x=0; x<grid.num[0]; ++x) {
       +                norm_res = static_cast<double>(darcy.norm[d_idx(x,y,z)]);
       +                if (norm_res != norm_res) {
       +                    std::cerr << "\nError: normalized residual is NaN ("
       +                        << norm_res << ") in cell "
       +                        << x << "," << y << "," << z << std::endl;
       +                    std::cerr << "\tt = " << time.current << ", iter = "
       +                        << int(time.current/time.dt) << std::endl;
       +                    std::cerr << "This often happens if the system has become "
       +                        "unstable." << std::endl;
       +                    exit(1);
       +                }
       +                norm_res_sum += norm_res;
       +            }
       +        }
       +    }
       +    return norm_res_sum/(grid.num[0]*grid.num[1]*grid.num[2]);
       +}
       +
       +
       +// Returns the average value of the normalized residuals
       +double DEM::maxNormResDarcy()
       +{
       +    double max_norm_res = -1.0e9; // initialized to a small number
       +    double norm_res;
       +
       +    // do not consider the values of the ghost nodes
       +    for (int z=0; z<grid.num[2]; ++z) {
       +        for (int y=0; y<grid.num[1]; ++y) {
       +            for (int x=0; x<grid.num[0]; ++x) {
       +                norm_res = fabs(static_cast<double>(darcy.norm[d_idx(x,y,z)]));
       +                if (norm_res != norm_res) {
       +                    std::cerr << "\nError: normalized residual is NaN ("
       +                        << norm_res << ") in cell "
       +                        << x << "," << y << "," << z << std::endl;
       +                    std::cerr << "\tt = " << time.current << ", iter = "
       +                        << int(time.current/time.dt) << std::endl;
       +                    std::cerr << "This often happens if the system has become "
       +                        "unstable." << std::endl;
       +                    exit(1);
       +                }
       +                if (norm_res > max_norm_res)
       +                    max_norm_res = norm_res;
       +            }
       +        }
       +    }
       +    return max_norm_res;
       +}
       +
       +// Initialize fluid parameters
       +void DEM::initDarcy()
       +{
       +    // Cell size 
       +    darcy.dx = grid.L[0]/darcy.nx;
       +    darcy.dy = grid.L[1]/darcy.ny;
       +    darcy.dz = grid.L[2]/darcy.nz;
       +
       +    if (verbose == 1) {
       +        std::cout << "  - Fluid grid dimensions: "
       +            << darcy.nx << "*"
       +            << darcy.ny << "*"
       +            << darcy.nz << std::endl;
       +        std::cout << "  - Fluid grid cell size: "
       +            << darcy.dx << "*"
       +            << darcy.dy << "*"
       +            << darcy.dz << std::endl;
       +    }
       +}
       +
       +// Write values in scalar field to file
       +void DEM::writeDarcyArray(Float* array, const char* filename)
       +{
       +    FILE* file;
       +    if ((file = fopen(filename,"w"))) {
       +        printDarcyArray(file, array);
       +        fclose(file);
       +    } else {
       +        fprintf(stderr, "Error, could not open %s.\n", filename);
       +    }
       +}
       +
       +// Write values in vector field to file
       +void DEM::writeDarcyArray(Float3* array, const char* filename)
       +{
       +    FILE* file;
       +    if ((file = fopen(filename,"w"))) {
       +        printDarcyArray(file, array);
       +        fclose(file);
       +    } else {
       +        fprintf(stderr, "Error, could not open %s.\n", filename);
       +    }
       +}
       +
       +
       +// Print final heads and free memory
       +void DEM::endDarcy()
       +{
       +    // Write arrays to stdout/text files for debugging
       +    //writeDarcyArray(darcy.phi, "ns_phi.txt");
       +
       +    freeDarcyMem();
       +}
       +
       +// vim: tabstop=8 expandtab shiftwidth=4 softtabstop=4
 (DIR) diff --git a/src/darcy.cuh b/src/darcy.cuh
       t@@ -0,0 +1,1025 @@
       +// darcy.cuh
       +// CUDA implementation of Darcy porous flow
       +
       +#include <iostream>
       +#include <cuda.h>
       +//#include <cutil_math.h>
       +#include <helper_math.h>
       +
       +#include "vector_arithmetic.h"  // for arbitrary precision vectors
       +#include "sphere.h"
       +#include "datatypes.h"
       +#include "utility.h"
       +#include "constants.cuh"
       +#include "debug.h"
       +
       +// Initialize memory
       +void DEM::initDarcyMemDev(void)
       +{
       +    // size of scalar field
       +    unsigned int memSizeF = sizeof(Float)*darcyCells();
       +
       +    // size of cell-face arrays in staggered grid discretization
       +    //unsigned int memSizeFface = sizeof(Float)*darcyCellsVelocity();
       +
       +    cudaMalloc((void**)&dev_darcy_dpdt, memSizeF);  // Backwards Euler gradient
       +    cudaMalloc((void**)&dev_darcy_p_old, memSizeF); // old pressure
       +    cudaMalloc((void**)&dev_darcy_p, memSizeF);     // hydraulic pressure
       +    cudaMalloc((void**)&dev_darcy_p_new, memSizeF); // updated pressure
       +    cudaMalloc((void**)&dev_darcy_v, memSizeF*3);   // cell hydraulic velocity
       +    //cudaMalloc((void**)&dev_darcy_vp_avg, memSizeF*3); // avg. particle velocity
       +    //cudaMalloc((void**)&dev_darcy_d_avg, memSizeF); // avg. particle diameter
       +    cudaMalloc((void**)&dev_darcy_phi, memSizeF);   // cell porosity
       +    cudaMalloc((void**)&dev_darcy_dphi, memSizeF);  // cell porosity change
       +    cudaMalloc((void**)&dev_darcy_norm, memSizeF);  // normalized residual
       +    cudaMalloc((void**)&dev_darcy_f_p, sizeof(Float4)*np); // pressure force
       +    cudaMalloc((void**)&dev_darcy_k, memSizeF);        // hydraulic permeability
       +    cudaMalloc((void**)&dev_darcy_grad_k, memSizeF*3);  // grad(permeability)
       +    //cudaMalloc((void**)&dev_darcy_div_v_p, memSizeF3); // divergence(v_p)
       +
       +    checkForCudaErrors("End of initDarcyMemDev");
       +}
       +
       +// Free memory
       +void DEM::freeDarcyMemDev()
       +{
       +    cudaFree(dev_darcy_dpdt);
       +    cudaFree(dev_darcy_p_old);
       +    cudaFree(dev_darcy_p);
       +    cudaFree(dev_darcy_p_new);
       +    cudaFree(dev_darcy_v);
       +    //cudaFree(dev_darcy_vp_avg);
       +    //cudaFree(dev_darcy_d_avg);
       +    cudaFree(dev_darcy_phi);
       +    cudaFree(dev_darcy_dphi);
       +    cudaFree(dev_darcy_norm);
       +    cudaFree(dev_darcy_f_p);
       +    cudaFree(dev_darcy_k);
       +    cudaFree(dev_darcy_grad_k);
       +    //cudaFree(dev_darcy_div_v_p);
       +}
       +
       +// Transfer to device
       +void DEM::transferDarcyToGlobalDeviceMemory(int statusmsg)
       +{
       +    checkForCudaErrors("Before attempting cudaMemcpy in "
       +            "transferDarcyToGlobalDeviceMemory");
       +
       +    //if (verbose == 1 && statusmsg == 1)
       +    //std::cout << "  Transfering fluid data to the device:           ";
       +
       +    // memory size for a scalar field
       +    unsigned int memSizeF  = sizeof(Float)*darcyCells();
       +
       +    //writeNSarray(ns.p, "ns.p.txt");
       +
       +    cudaMemcpy(dev_darcy_p, darcy.p, memSizeF, cudaMemcpyHostToDevice);
       +    checkForCudaErrors("transferDarcytoGlobalDeviceMemory after first "
       +            "cudaMemcpy");
       +    cudaMemcpy(dev_darcy_v, darcy.v, memSizeF*3, cudaMemcpyHostToDevice);
       +    cudaMemcpy(dev_darcy_phi, darcy.phi, memSizeF, cudaMemcpyHostToDevice);
       +    cudaMemcpy(dev_darcy_dphi, darcy.dphi, memSizeF, cudaMemcpyHostToDevice);
       +    cudaMemcpy(dev_darcy_f_p, darcy.f_p, sizeof(Float4)*np,
       +            cudaMemcpyHostToDevice);
       +
       +    checkForCudaErrors("End of transferDarcyToGlobalDeviceMemory");
       +    //if (verbose == 1 && statusmsg == 1)
       +    //std::cout << "Done" << std::endl;
       +}
       +
       +// Transfer from device
       +void DEM::transferDarcyFromGlobalDeviceMemory(int statusmsg)
       +{
       +    if (verbose == 1 && statusmsg == 1)
       +        std::cout << "  Transfering fluid data from the device:         ";
       +
       +    // memory size for a scalar field
       +    unsigned int memSizeF  = sizeof(Float)*darcyCells();
       +
       +    cudaMemcpy(darcy.p, dev_darcy_p, memSizeF, cudaMemcpyDeviceToHost);
       +    checkForCudaErrors("In transferDarcyFromGlobalDeviceMemory, dev_darcy_p", 0);
       +    cudaMemcpy(darcy.v, dev_darcy_v, memSizeF*3, cudaMemcpyDeviceToHost);
       +    cudaMemcpy(darcy.phi, dev_darcy_phi, memSizeF, cudaMemcpyDeviceToHost);
       +    cudaMemcpy(darcy.dphi, dev_darcy_dphi, memSizeF, cudaMemcpyDeviceToHost);
       +    cudaMemcpy(darcy.f_p, dev_darcy_f_p, sizeof(Float4)*np,
       +            cudaMemcpyDeviceToHost);
       +    cudaMemcpy(darcy.k, dev_darcy_k, memSizeF, cudaMemcpyDeviceToHost);
       +
       +    checkForCudaErrors("End of transferDarcyFromGlobalDeviceMemory", 0);
       +    if (verbose == 1 && statusmsg == 1)
       +        std::cout << "Done" << std::endl;
       +}
       +
       +// Transfer the normalized residuals from device to host
       +void DEM::transferDarcyNormFromGlobalDeviceMemory()
       +{
       +    cudaMemcpy(darcy.norm, dev_darcy_norm, sizeof(Float)*darcyCells(),
       +            cudaMemcpyDeviceToHost);
       +    checkForCudaErrors("End of transferDarcyNormFromGlobalDeviceMemory");
       +}
       +
       +// Transfer the pressures from device to host
       +void DEM::transferDarcyPressuresFromGlobalDeviceMemory()
       +{
       +    cudaMemcpy(darcy.p, dev_darcy_p, sizeof(Float)*darcyCells(),
       +            cudaMemcpyDeviceToHost);
       +    checkForCudaErrors("End of transferDarcyNormFromGlobalDeviceMemory");
       +}
       +
       +// Get linear index from 3D grid position
       +__inline__ __device__ unsigned int d_idx(
       +        const int x, const int y, const int z)
       +{
       +    // without ghost nodes
       +    //return x + dev_grid.num[0]*y + dev_grid.num[0]*dev_grid.num[1]*z;
       +
       +    // with ghost nodes
       +    // the ghost nodes are placed at x,y,z = -1 and WIDTH
       +    return (x+1) + (devC_grid.num[0]+2)*(y+1) +
       +        (devC_grid.num[0]+2)*(devC_grid.num[1]+2)*(z+1);
       +}
       +
       +// Get linear index of velocity node from 3D grid position in staggered grid
       +__inline__ __device__ unsigned int d_vidx(
       +        const int x, const int y, const int z)
       +{
       +    // without ghost nodes
       +    //return x + (devC_grid.num[0]+1)*y
       +    //+ (devC_grid.num[0]+1)*(devC_grid.num[1]+1)*z;
       +
       +    // with ghost nodes
       +    // the ghost nodes are placed at x,y,z = -1 and WIDTH+1
       +    return (x+1) + (devC_grid.num[0]+3)*(y+1)
       +        + (devC_grid.num[0]+3)*(devC_grid.num[1]+3)*(z+1);
       +}
       +
       +// The normalized residuals are given an initial value of 0, since the values at
       +// the Dirichlet boundaries aren't written during the iterations.
       +__global__ void setDarcyNormZero(
       +        Float* __restrict__ dev_darcy_norm)
       +{
       +    // 3D thread index
       +    const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
       +    const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
       +    const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
       +
       +    // check that we are not outside the fluid grid
       +    if (x < devC_grid.num[0] && y < devC_grid.num[1] && z < devC_grid.num[2]) {
       +        __syncthreads();
       +        dev_darcy_norm[d_idx(x,y,z)] = 0.0;
       +    }
       +}
       +
       +// Update a field in the ghost nodes from their parent cell values. The edge
       +// (diagonal) cells are not written since they are not read. Launch this kernel
       +// for all cells in the grid using
       +// setDarcyGhostNodes<datatype><<<.. , ..>>>( .. );
       +    template<typename T>
       +__global__ void setDarcyGhostNodes(
       +        T* __restrict__ dev_scalarfield,
       +        const int bc_bot,
       +        const int bc_top)
       +{
       +    // 3D thread index
       +    const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
       +    const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
       +    const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
       +
       +    // Grid dimensions
       +    const unsigned int nx = devC_grid.num[0];
       +    const unsigned int ny = devC_grid.num[1];
       +    const unsigned int nz = devC_grid.num[2];
       +
       +    // check that we are not outside the fluid grid
       +    if (x < nx && y < ny && z < nz) {
       +
       +        const T val = dev_scalarfield[d_idx(x,y,z)];
       +
       +        // x
       +        if (x == 0)
       +            dev_scalarfield[idx(nx,y,z)] = val;
       +        if (x == nx-1)
       +            dev_scalarfield[idx(-1,y,z)] = val;
       +
       +        // y
       +        if (y == 0)
       +            dev_scalarfield[idx(x,ny,z)] = val;
       +        if (y == ny-1)
       +            dev_scalarfield[idx(x,-1,z)] = val;
       +
       +        // z
       +        if (z == 0 && bc_bot == 0)
       +            dev_scalarfield[idx(x,y,-1)] = val;     // Dirichlet
       +        if (z == 0 && bc_bot == 1)
       +            dev_scalarfield[idx(x,y,-1)] = val;     // Neumann
       +        if (z == 0 && bc_bot == 2)
       +            dev_scalarfield[idx(x,y,nz)] = val;     // Periodic -z
       +
       +        if (z == nz-1 && bc_top == 0)
       +            dev_scalarfield[idx(x,y,nz)] = val;     // Dirichlet
       +        if (z == nz-2 && bc_top == 1)
       +            dev_scalarfield[idx(x,y,nz)] = val;     // Neumann
       +        if (z == nz-1 && bc_top == 2)
       +            dev_scalarfield[idx(x,y,-1)] = val;     // Periodic +z
       +    }
       +}
       +
       +// Find the porosity in each cell on the base of a sphere, centered at the cell
       +// center. 
       +__global__ void findDarcyPorosities(
       +        const unsigned int* __restrict__ dev_cellStart,   // in
       +        const unsigned int* __restrict__ dev_cellEnd,     // in
       +        const Float4* __restrict__ dev_x_sorted,          // in
       +        const unsigned int iteration,                     // in
       +        const unsigned int np,                            // in
       +        const Float c_phi,                                // in
       +        Float*  __restrict__ dev_darcy_phi,               // in + out
       +        Float*  __restrict__ dev_darcy_dphi)              // out
       +{
       +    // 3D thread index
       +    const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
       +    const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
       +    const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
       +
       +    // Grid dimensions
       +    const unsigned int nx = devC_grid.num[0];
       +    const unsigned int ny = devC_grid.num[1];
       +    const unsigned int nz = devC_grid.num[2];
       +
       +    // Cell dimensions
       +    const Float dx = devC_grid.L[0]/nx;
       +    const Float dy = devC_grid.L[1]/ny;
       +    const Float dz = devC_grid.L[2]/nz;
       +
       +    // Cell sphere radius
       +    //const Float R = fmin(dx, fmin(dy,dz)) * 0.5; // diameter = cell width
       +    const Float R = fmin(dx, fmin(dy,dz));       // diameter = 2*cell width
       +    const Float cell_volume = 4.0/3.0*M_PI*R*R*R;
       +
       +    Float void_volume = cell_volume;
       +    Float4 xr;  // particle pos. and radius
       +
       +    // check that we are not outside the fluid grid
       +    if (x < nx && y < ny && z < nz) {
       +
       +        if (np > 0) {
       +
       +            // Cell sphere center position
       +            const Float3 X = MAKE_FLOAT3(
       +                    x*dx + 0.5*dx,
       +                    y*dy + 0.5*dy,
       +                    z*dz + 0.5*dz);
       +
       +            Float d, r;
       +            Float phi = 1.00;
       +            //Float4 v;
       +            unsigned int n = 0;
       +
       +            //Float3 v_avg = MAKE_FLOAT3(0.0, 0.0, 0.0);
       +            //Float  d_avg = 0.0;
       +
       +            // Read old porosity
       +            __syncthreads();
       +            Float phi_0 = dev_darcy_phi[d_idx(x,y,z)];
       +
       +            // The cell 3d index
       +            const int3 gridPos = make_int3((int)x,(int)y,(int)z);
       +
       +            // The neighbor cell 3d index
       +            int3 targetCell;
       +
       +            // The distance modifier for particles across periodic boundaries
       +            Float3 dist, distmod;
       +
       +            unsigned int cellID, startIdx, endIdx, i;
       +
       +            // Iterate over 27 neighbor cells, R = cell width
       +            /*for (int z_dim=-1; z_dim<2; ++z_dim) // z-axis
       +              for (int y_dim=-1; y_dim<2; ++y_dim) // y-axis
       +              for (int x_dim=-1; x_dim<2; ++x_dim) // x-axis*/
       +
       +            // Iterate over 27 neighbor cells, R = 2*cell width
       +            for (int z_dim=-2; z_dim<3; ++z_dim) { // z-axis
       +                for (int y_dim=-2; y_dim<3; ++y_dim) { // y-axis
       +                    for (int x_dim=-2; x_dim<3; ++x_dim) { // x-axis
       +
       +                        // Index of neighbor cell this iteration is looking at
       +                        targetCell = gridPos + make_int3(x_dim, y_dim, z_dim);
       +
       +                        // Get distance modifier for interparticle
       +                        // vector, if it crosses a periodic boundary
       +                        distmod = MAKE_FLOAT3(0.0, 0.0, 0.0);
       +                        if (findDistMod(&targetCell, &distmod) != -1) {
       +
       +                            // Calculate linear cell ID
       +                            cellID = targetCell.x
       +                                + targetCell.y * devC_grid.num[0]
       +                                + (devC_grid.num[0] * devC_grid.num[1])
       +                                * targetCell.z;
       +
       +                            // Lowest particle index in cell
       +                            __syncthreads();
       +                            startIdx = dev_cellStart[cellID];
       +
       +                            // Make sure cell is not empty
       +                            if (startIdx != 0xffffffff) {
       +
       +                                // Highest particle index in cell
       +                                __syncthreads();
       +                                endIdx = dev_cellEnd[cellID];
       +
       +                                // Iterate over cell particles
       +                                for (i=startIdx; i<endIdx; ++i) {
       +
       +                                    // Read particle position and radius
       +                                    __syncthreads();
       +                                    xr = dev_x_sorted[i];
       +                                    //v  = dev_vel_sorted[i];
       +                                    r = xr.w;
       +
       +                                    // Find center distance
       +                                    dist = MAKE_FLOAT3(
       +                                            X.x - xr.x, 
       +                                            X.y - xr.y,
       +                                            X.z - xr.z);
       +                                    dist += distmod;
       +                                    d = length(dist);
       +
       +                                    // Lens shaped intersection
       +                                    if ((R - r) < d && d < (R + r)) {
       +                                        void_volume -=
       +                                            1.0/(12.0*d) * (
       +                                                    M_PI*(R + r - d)*(R + r - d)
       +                                                    *(d*d + 2.0*d*r - 3.0*r*r
       +                                                        + 2.0*d*R + 6.0*r*R
       +                                                        - 3.0*R*R) );
       +                                        //v_avg += MAKE_FLOAT3(v.x, v.y, v.z);
       +                                        //d_avg += r+r;
       +                                        n++;
       +                                    }
       +
       +                                    // Particle fully contained in cell sphere
       +                                    if (d <= R - r) {
       +                                        void_volume -= 4.0/3.0*M_PI*r*r*r;
       +                                        //v_avg += MAKE_FLOAT3(v.x, v.y, v.z);
       +                                        //d_avg += r+r;
       +                                        n++;
       +                                    }
       +                                }
       +                            }
       +                        }
       +                    }
       +                }
       +            }
       +
       +            //if (phi < 0.999) {
       +            //v_avg /= n;
       +            //d_avg /= n;
       +            //}
       +
       +            // Make sure that the porosity is in the interval [0.0;1.0]
       +            phi = fmin(0.99, fmax(0.01, void_volume/cell_volume));
       +            //phi = void_volume/cell_volume;
       +
       +            Float dphi = phi - phi_0;
       +            if (iteration == 0)
       +                dphi = 0.0;
       +
       +            // report values to stdout for debugging
       +            //printf("%d,%d,%d\tphi = %f dphi = %f\n", x,y,z, phi, dphi);
       +            //printf("%d,%d,%d\tphi = %f dphi = %f v_avg = %f,%f,%f d_avg = %f\n",
       +            //       x,y,z, phi, dphi, v_avg.x, v_avg.y, v_avg.z, d_avg);
       +
       +            // Save porosity and porosity change
       +            __syncthreads();
       +            //phi = 0.5; dphi = 0.0; // disable porosity effects
       +            const unsigned int cellidx = d_idx(x,y,z);
       +            dev_darcy_phi[cellidx]  = phi*c_phi;
       +            dev_darcy_dphi[cellidx] = dphi*c_phi;
       +            //dev_darcy_vp_avg[cellidx] = v_avg;
       +            //dev_darcy_d_avg[cellidx]  = d_avg;
       +
       +#ifdef CHECK_FLUID_FINITE
       +            (void)checkFiniteFloat("phi", x, y, z, phi);
       +            (void)checkFiniteFloat("dphi", x, y, z, dphi);
       +            //(void)checkFiniteFloat3("v_avg", x, y, z, v_avg);
       +            //(void)checkFiniteFloat("d_avg", x, y, z, d_avg);
       +#endif
       +        } else {
       +
       +            __syncthreads();
       +            const unsigned int cellidx = d_idx(x,y,z);
       +
       +            //Float phi = 0.5;
       +            //Float dphi = 0.0;
       +            //if (iteration == 20 && x == nx/2 && y == ny/2 && z == nz/2) {
       +            //phi = 0.4;
       +            //dphi = 0.1;
       +            //}
       +            //dev_darcy_phi[cellidx]  = phi;
       +            //dev_darcy_dphi[cellidx] = dphi;
       +            dev_darcy_phi[cellidx]  = 0.999;
       +            dev_darcy_dphi[cellidx] = 0.0;
       +
       +            //dev_darcy_vp_avg[cellidx] = MAKE_FLOAT3(0.0, 0.0, 0.0);
       +            //dev_darcy_d_avg[cellidx]  = 0.0;
       +        }
       +    }
       +}
       +
       +// Find the particle velocity divergence at the cell center from the average
       +// particle velocities on the cell faces
       +__global__ void findDarcyParticleVelocityDivergence(
       +        const Float* __restrict__ dev_darcy_v_p_x,  // in
       +        const Float* __restrict__ dev_darcy_v_p_y,  // in
       +        const Float* __restrict__ dev_darcy_v_p_z,  // in
       +        Float* __restrict__ dev_darcy_div_v_p)      // out
       +{
       +    // 3D thread index
       +    const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
       +    const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
       +    const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
       +
       +    // Grid dimensions
       +    const unsigned int nx = devC_grid.num[0];
       +    const unsigned int ny = devC_grid.num[1];
       +    const unsigned int nz = devC_grid.num[2];
       +
       +    if (x < nx && y < ny && z < nz) {
       +
       +        // read values
       +        __syncthreads();
       +        Float v_p_xn = dev_darcy_v_p_x[d_vidx(x,y,z)];
       +        Float v_p_xp = dev_darcy_v_p_x[d_vidx(x+1,y,z)];
       +        Float v_p_yn = dev_darcy_v_p_y[d_vidx(x,y,z)];
       +        Float v_p_yp = dev_darcy_v_p_y[d_vidx(x,y+1,z)];
       +        Float v_p_zn = dev_darcy_v_p_z[d_vidx(x,y,z)];
       +        Float v_p_zp = dev_darcy_v_p_z[d_vidx(x,y,z+1)];
       +
       +        // cell dimensions
       +        const Float dx = devC_grid.L[0]/nx;
       +        const Float dy = devC_grid.L[1]/ny;
       +        const Float dz = devC_grid.L[2]/nz;
       +
       +        // calculate the divergence using first order central finite differences
       +        const Float div_v_p =
       +            (v_p_xp - v_p_xn)/dx +
       +            (v_p_yp - v_p_yn)/dy +
       +            (v_p_zp - v_p_zn)/dz;
       +
       +        __syncthreads();
       +        dev_darcy_div_v_p[d_idx(x,y,z)] = div_v_p;
       +
       +#ifdef CHECK_FLUID_FINITE
       +        checkFiniteFloat("div_v_p", x, y, z, div_v_p);
       +#endif
       +    }
       +}
       +
       +// Find particle-fluid interaction force as outlined by Zhou et al. 2010, and
       +// originally by Gidaspow 1992. All terms other than the pressure force are
       +// neglected. The buoyancy force is included.
       +__global__ void findDarcyPressureForce(
       +    const Float4* __restrict__ dev_x,           // in
       +    const Float*  __restrict__ dev_darcy_p,     // in
       +    const Float*  __restrict__ dev_darcy_phi,   // in
       +    Float4* __restrict__ dev_force,             // out
       +    Float4* __restrict__ dev_darcy_f_p)         // out
       +{
       +    unsigned int i = threadIdx.x + blockIdx.x*blockDim.x; // Particle index
       +
       +    if (i < devC_np) {
       +
       +        // read particle information
       +        __syncthreads();
       +        const Float4 x = dev_x[i];
       +
       +        // determine fluid cell containing the particle
       +        const unsigned int i_x =
       +            floor((x.x - devC_grid.origo[0])/(devC_grid.L[0]/devC_grid.num[0]));
       +        const unsigned int i_y =
       +            floor((x.y - devC_grid.origo[1])/(devC_grid.L[1]/devC_grid.num[1]));
       +        const unsigned int i_z =
       +            floor((x.z - devC_grid.origo[2])/(devC_grid.L[2]/devC_grid.num[2]));
       +        const unsigned int cellidx = d_idx(i_x, i_y, i_z);
       +
       +        // determine cell dimensions
       +        const Float dx = devC_grid.L[0]/devC_grid.num[0];
       +        const Float dy = devC_grid.L[1]/devC_grid.num[1];
       +        const Float dz = devC_grid.L[2]/devC_grid.num[2];
       +
       +        // read fluid information
       +        __syncthreads();
       +        const Float phi = dev_darcy_phi[cellidx];
       +        const Float p_xn = dev_darcy_p[d_idx(i_x-1,i_y,i_z)];
       +        //const Float p    = dev_darcy_p[cellidx];
       +        const Float p_xp = dev_darcy_p[d_idx(i_x+1,i_y,i_z)];
       +        const Float p_yn = dev_darcy_p[d_idx(i_x,i_y-1,i_z)];
       +        const Float p_yp = dev_darcy_p[d_idx(i_x,i_y+1,i_z)];
       +        const Float p_zn = dev_darcy_p[d_idx(i_x,i_y,i_z-1)];
       +        const Float p_zp = dev_darcy_p[d_idx(i_x,i_y,i_z+1)];
       +
       +        // find particle volume (radius in x.w)
       +        const Float V = 4.0/3.0*M_PI*x.w*x.w*x.w;
       +
       +        // determine pressure gradient from first order central difference
       +        const Float3 grad_p = MAKE_FLOAT3(
       +                (p_xp - p_xn)/(dx + dx),
       +                (p_yp - p_yn)/(dy + dy),
       +                (p_zp - p_zn)/(dz + dz));
       +
       +        // find pressure gradient force plus buoyancy force.
       +        // buoyancy force = weight of displaced fluid
       +        // f_b = -rho_f*V*g
       +        const Float3 f_p = -1.0*grad_p*V/(1.0 - phi);
       +            //- devC_params.rho_f*V*MAKE_FLOAT3(
       +                    //devC_params.g[0],
       +                    //devC_params.g[1],
       +                    //devC_params.g[2]);
       +
       +        /*printf("%d,%d,%d findPF:\n"
       +                "\tphi    = %f\n"
       +                "\tp      = %f\n"
       +                "\tgrad_p = % f, % f, % f\n"
       +                "\tf_p    = % f, % f, % f\n",
       +                i_x, i_y, i_z,
       +                phi, p,
       +                grad_p.x, grad_p.y, grad_p.z,
       +                f_p.x, f_p.y, f_p.z);*/
       +
       +#ifdef CHECK_FLUID_FINITE
       +        checkFiniteFloat3("f_p", i_x, i_y, i_z, f_p);
       +#endif
       +        // save force
       +        __syncthreads();
       +        dev_force[i]    += MAKE_FLOAT4(f_p.x, f_p.y, f_p.z, 0.0);
       +        dev_darcy_f_p[i] = MAKE_FLOAT4(f_p.x, f_p.y, f_p.z, 0.0);
       +    }
       +}
       +
       +// Set the pressure at the top boundary to new_pressure
       +__global__ void setDarcyTopPressure(
       +    const Float new_pressure, Float* __restrict__ dev_darcy_p)
       +{
       +    // 3D thread index
       +    const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
       +    const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
       +    const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
       +    
       +    // check that the thread is located at the top boundary
       +    if (x < devC_grid.num[0] &&
       +        y < devC_grid.num[1] &&
       +        z == devC_grid.num[2]-1) {
       +
       +        const unsigned int cellidx = idx(x,y,z);
       +
       +        // Write the new pressure the top boundary cells
       +        __syncthreads();
       +        dev_darcy_p[cellidx] = new_pressure;
       +    }
       +}
       +
       +// Set the pressure at the top wall to new_pressure
       +__global__ void setDarcyTopWallPressure(
       +    const Float new_pressure,
       +    const unsigned int wall0_iz,
       +    Float* __restrict__ dev_darcy_p)
       +{
       +    // 3D thread index
       +    const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
       +    const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
       +    const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
       +    
       +    // check that the thread is located at the top boundary
       +    if (x < devC_grid.num[0] &&
       +        y < devC_grid.num[1] &&
       +        z == wall0_iz) {
       +
       +        const unsigned int cellidx = idx(x,y,z);
       +
       +        // Write the new pressure the top boundary cells
       +        __syncthreads();
       +        dev_darcy_p[cellidx] = new_pressure;
       +    }
       +}
       +
       +
       +// Find the cell permeabilities from the Kozeny-Carman equation
       +__global__ void findDarcyPermeabilities(
       +        const Float k_c,                            // in
       +        const Float* __restrict__ dev_darcy_phi,    // in
       +        Float* __restrict__ dev_darcy_k)            // out
       +{
       +    // 3D thread index
       +    const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
       +    const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
       +    const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
       +
       +    // Grid dimensions
       +    const unsigned int nx = devC_grid.num[0];
       +    const unsigned int ny = devC_grid.num[1];
       +    const unsigned int nz = devC_grid.num[2];
       +
       +    if (x < nx && y < ny && z < nz) {
       +
       +        // 1D thread index
       +        const unsigned int cellidx = d_idx(x,y,z);
       +
       +        __syncthreads();
       +        Float phi = dev_darcy_phi[cellidx];
       +
       +        // avoid division by zero
       +        if (phi > 0.9999)
       +            phi = 0.9999;
       +
       +        Float k = k_c*pow(phi,3)/pow(1.0 - phi, 2);
       +
       +        /*printf("%d,%d,%d findK:\n"
       +                "\tphi    = %f\n"
       +                "\tk      = %e\n",
       +                x, y, z,
       +                phi, k);*/
       +
       +        // limit permeability [m*m]
       +        // K_gravel = 3.0e-2 m/s => k_gravel = 2.7e-9 m*m
       +        k = fmin(2.7e-9, k);
       +
       +        __syncthreads();
       +        dev_darcy_k[cellidx] = k;
       +
       +#ifdef CHECK_FLUID_FINITE
       +        checkFiniteFloat("k", x, y, z, k);
       +#endif
       +    }
       +}
       +
       +// Find the spatial gradients of the permeability. To be used in the pressure
       +// diffusion term in updateDarcySolution.
       +__global__ void findDarcyPermeabilityGradients(
       +        const Float*  __restrict__ dev_darcy_k,   // in
       +        Float3* __restrict__ dev_darcy_grad_k)    // out
       +{
       +    // 3D thread index
       +    const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
       +    const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
       +    const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
       +
       +    // Grid dimensions
       +    const unsigned int nx = devC_grid.num[0];
       +    const unsigned int ny = devC_grid.num[1];
       +    const unsigned int nz = devC_grid.num[2];
       +
       +    // Cell size
       +    const Float dx = devC_grid.L[0]/nx;
       +    const Float dy = devC_grid.L[1]/ny;
       +    const Float dz = devC_grid.L[2]/nz;
       +
       +    if (x < nx && y < ny && z < nz) {
       +
       +        // 1D thread index
       +        const unsigned int cellidx = d_idx(x,y,z);
       +
       +        // read values
       +        __syncthreads();
       +        const Float k_xn = dev_darcy_k[d_idx(x-1,y,z)];
       +        const Float k_xp = dev_darcy_k[d_idx(x+1,y,z)];
       +        const Float k_yn = dev_darcy_k[d_idx(x,y-1,z)];
       +        const Float k_yp = dev_darcy_k[d_idx(x,y+1,z)];
       +        const Float k_zn = dev_darcy_k[d_idx(x,y,z-1)];
       +        const Float k_zp = dev_darcy_k[d_idx(x,y,z+1)];
       +
       +        // gradient approximated by first-order central difference
       +        const Float3 grad_k = MAKE_FLOAT3(
       +                (k_xp - k_xn)/(dx+dx),
       +                (k_yp - k_yn)/(dy+dy),
       +                (k_zp - k_zn)/(dz+dz));
       +
       +        // write result
       +        __syncthreads();
       +        dev_darcy_grad_k[cellidx] = grad_k;
       +
       +        /*printf("%d,%d,%d findK:\n"
       +                "\tk_x     = %e, %e\n"
       +                "\tk_y     = %e, %e\n"
       +                "\tk_z     = %e, %e\n"
       +                "\tgrad(k) = %e, %e, %e\n",
       +                x, y, z,
       +                k_xn, k_xp,
       +                k_yn, k_yp,
       +                k_zn, k_zp,
       +                grad_k.x, grad_k.y, grad_k.z);*/
       +
       +#ifdef CHECK_FLUID_FINITE
       +        checkFiniteFloat3("grad_k", x, y, z, grad_k);
       +#endif
       +    }
       +}
       +
       +// Find the temporal gradient in pressure using the backwards Euler method
       +__global__ void findDarcyPressureChange(
       +        const Float* __restrict__ dev_darcy_p_old,
       +        const Float* __restrict__ dev_darcy_p,
       +        const unsigned int iter,
       +        Float* __restrict__ dev_darcy_dpdt)
       +{
       +    // 3D thread index
       +    const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
       +    const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
       +    const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
       +
       +    if (x < devC_grid.num[0] && y < devC_grid.num[1] && z < devC_grid.num[2]) {
       +
       +        // 1D thread index
       +        const unsigned int cellidx = d_idx(x,y,z);
       +
       +        // read values
       +        __syncthreads();
       +        const Float p_old = dev_darcy_p_old[cellidx];
       +        const Float p     = dev_darcy_p[cellidx];
       +
       +        Float dpdt = (p - p_old)/devC_dt;
       +
       +        // Ignore the large initial pressure gradients caused by solver "warm
       +        // up" towards hydrostatic pressure distribution
       +        if (iter < 2)
       +            dpdt = 0.0;
       +
       +        // write result
       +        __syncthreads();
       +        dev_darcy_dpdt[cellidx] = dpdt;
       +
       +        /*printf("%d,%d,%d\n"
       +                "\tp_old = %e\n"
       +                "\tp     = %e\n"
       +                "\tdt    = %e\n"
       +                "\tdpdt  = %e\n",
       +                x,y,z,
       +                p_old, p,
       +                devC_dt, dpdt);*/
       +
       +#ifdef CHECK_FLUID_FINITE
       +        checkFiniteFloat("dpdt", x, y, z, dpdt);
       +#endif
       +    }
       +}
       +
       +// A single jacobi iteration where the pressure values are updated to
       +// dev_darcy_p_new.
       +// bc = 0: Dirichlet, 1: Neumann
       +__global__ void updateDarcySolution(
       +        const Float*  __restrict__ dev_darcy_p_old,   // in
       +        //const Float*  __restrict__ dev_darcy_dpdt,    // in
       +        const Float*  __restrict__ dev_darcy_p,       // in
       +        const Float*  __restrict__ dev_darcy_k,       // in
       +        const Float*  __restrict__ dev_darcy_phi,     // in
       +        const Float*  __restrict__ dev_darcy_dphi,    // in
       +        const Float3* __restrict__ dev_darcy_grad_k,  // in
       +        const Float beta_f,                           // in
       +        const Float mu,                               // in
       +        const int bc_bot,                             // in
       +        const int bc_top,                             // in
       +        const unsigned int ndem,                      // in
       +        const unsigned int wall0_iz,                  // in
       +        Float* __restrict__ dev_darcy_p_new,          // out
       +        Float* __restrict__ dev_darcy_norm)           // out
       +{
       +    // 3D thread index
       +    const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
       +    const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
       +    const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
       +
       +    // Grid dimensions
       +    const unsigned int nx = devC_grid.num[0];
       +    const unsigned int ny = devC_grid.num[1];
       +    const unsigned int nz = devC_grid.num[2];
       +
       +    // Cell size
       +    const Float dx = devC_grid.L[0]/nx;
       +    const Float dy = devC_grid.L[1]/ny;
       +    const Float dz = devC_grid.L[2]/nz;
       +
       +    if (x < nx && y < ny && z < nz) {
       +
       +        // 1D thread index
       +        const unsigned int cellidx = d_idx(x,y,z);
       +
       +        // read values
       +        __syncthreads();
       +        const Float  k      = dev_darcy_k[cellidx];
       +        const Float3 grad_k = dev_darcy_grad_k[cellidx];
       +        const Float  phi    = dev_darcy_phi[cellidx];
       +        const Float  dphi   = dev_darcy_dphi[cellidx];
       +
       +        //const Float dpdt  = dev_darcy_dpdt[cellidx];
       +
       +        const Float p_old = dev_darcy_p_old[cellidx];
       +
       +        const Float p_xn  = dev_darcy_p[d_idx(x-1,y,z)];
       +        const Float p     = dev_darcy_p[cellidx];
       +        const Float p_xp  = dev_darcy_p[d_idx(x+1,y,z)];
       +        const Float p_yn  = dev_darcy_p[d_idx(x,y-1,z)];
       +        const Float p_yp  = dev_darcy_p[d_idx(x,y+1,z)];
       +        Float p_zn = dev_darcy_p[d_idx(x,y,z-1)];
       +        Float p_zp = dev_darcy_p[d_idx(x,y,z+1)];
       +
       +        // gradient approximated by first-order central difference
       +        const Float3 grad_p = MAKE_FLOAT3(
       +                (p_xp - p_xn)/(dx+dx),
       +                (p_yp - p_yn)/(dy+dy),
       +                (p_zp - p_zn)/(dz+dz));
       +
       +        const Float laplace_p =
       +                (p_xp - (p+p) + p_xn)/(dx*dx) +
       +                (p_yp - (p+p) + p_yn)/(dy*dy) +
       +                (p_zp - (p+p) + p_zn)/(dz*dz);
       +
       +        // find forcing function value
       +        /*const Float f_transient = beta_f*phi*mu/k*dpdt;
       +        const Float f_forcing = mu/((1.0 - phi)*k)*dphi/devC_dt;
       +        const Float f_diff = -1.0*dot(grad_p, grad_k)/k;
       +        const Float f = f_transient + f_forcing + f_diff;*/
       +
       +        //const Float div_v_p = dev_darcy_div_v_p[cellidx];
       +
       +        // Neumann BCs
       +        if (z == 0 && bc_bot == 1)
       +            p_zn = p;
       +        if (z == nz-1 && bc_top == 1)
       +            p_zp = p;
       +
       +        // New value of epsilon in 3D update, derived by rearranging the
       +        // 3d discrete finite difference Laplacian
       +        /*const Float dxdx = dx*dx;
       +        const Float dydy = dy*dy;
       +        const Float dzdz = dz*dz;
       +        Float p_new
       +            = (-dxdx*dydy*dzdz*f
       +               + dydy*dzdz*(p_xn + p_xp)
       +               + dxdx*dzdz*(p_yn + p_yp)
       +               + dxdx*dydy*(p_zn + p_zp))
       +            /(2.0*(dxdx*dydy + dxdx*dzdz + dydy*dzdz));*/
       +
       +        Float p_new = p_old
       +            + devC_dt/(beta_f*phi*mu)*(k*laplace_p + dot(grad_k, grad_p))
       +            - dphi/(beta_f*phi*(1.0 - phi));
       +
       +        // Dirichlet BC at dynamic top wall. wall0_iz will be larger than the
       +        // grid if the wall isn't dynamic
       +        if (z == wall0_iz)
       +            p_new = p;
       +
       +        // Neumann BC at dynamic top wall
       +        //if (z == wall0_iz + 1)
       +            //p_new = p_zn + dp_dz;
       +
       +        // Dirichlet BCs
       +        if ((z == 0 && bc_bot == 0) ||
       +                (z == nz-1 && bc_top == 0) ||
       +                (z == wall0_iz))
       +            p_new = p;
       +
       +        // normalized residual, avoid division by zero
       +        //const Float res_norm = (p_new - p)*(p_new - p)/(p_new*p_new + 1.0e-16);
       +        const Float res_norm = (p_new - p)/(p + 1.0e-16);
       +
       +#ifdef REPORT_FORCING_TERMS
       +        printf("\n%d,%d,%d updateDarcySolution\n"
       +                "dpdt        = %e\n"
       +                "p           = %e\n"
       +                "p_new       = %e\n"
       +                "f           = %e\n"
       +                "f_transient = %e\n"
       +                "f_forcing   = %e\n"
       +                "f_diff      = %e\n"
       +                "res_norm    = %e\n",
       +                x,y,z,
       +                dpdt,
       +                p, p_new,
       +                f,
       +                f_transient, f_forcing, f_diff,
       +                res_norm);
       +#endif
       +
       +        // save new pressure and the residual
       +        __syncthreads();
       +        dev_darcy_p_new[cellidx] = p_new;
       +        dev_darcy_norm[cellidx]  = res_norm;
       +
       +        /*printf("%d,%d,%d\tp = % f\tp_new = % f\tres_norm = % f\n",
       +                x,y,z,
       +                p,
       +                p_new,
       +                res_norm);*/
       +
       +#ifdef CHECK_FLUID_FINITE
       +        checkFiniteFloat("p_new", x, y, z, p_new);
       +        checkFiniteFloat("res_norm", x, y, z, res_norm);
       +#endif
       +    }
       +}
       +
       +__global__ void findNewPressure(
       +        const Float* __restrict__ dev_darcy_dp,     // in
       +        Float* __restrict__ dev_darcy_p)            // in+out
       +{
       +    // 3D thread index
       +    const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
       +    const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
       +    const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
       +
       +    // Grid dimensions
       +    const unsigned int nx = devC_grid.num[0];
       +    const unsigned int ny = devC_grid.num[1];
       +    const unsigned int nz = devC_grid.num[2];
       +
       +    // Check that we are not outside the fluid grid
       +    if (x < nx && y < ny && z < nz) {
       +
       +        const unsigned int cellidx = d_idx(x,y,z);
       +
       +        const Float dp = dev_darcy_dp[cellidx];
       +
       +        // save new pressure
       +        __syncthreads();
       +        dev_darcy_p[cellidx] += dp;
       +
       +        /*printf("%d,%d,%d\tp = % f\tp_new = % f\tres_norm = % f\n",
       +                x,y,z,
       +                p,
       +                p_new,
       +                res_norm);*/
       +
       +#ifdef CHECK_FLUID_FINITE
       +        checkFiniteFloat("dp", x, y, z, dp);
       +#endif
       +    }
       +}
       +
       +// Find cell velocities
       +__global__ void findDarcyVelocities(
       +        const Float* __restrict__ dev_darcy_p,      // in
       +        const Float* __restrict__ dev_darcy_phi,    // in
       +        const Float* __restrict__ dev_darcy_k,      // in
       +        const Float mu,                             // in
       +        Float3* __restrict__ dev_darcy_v)           // out
       +{
       +    // 3D thread index
       +    const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
       +    const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
       +    const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
       +
       +    // Grid dimensions
       +    const unsigned int nx = devC_grid.num[0];
       +    const unsigned int ny = devC_grid.num[1];
       +    const unsigned int nz = devC_grid.num[2];
       +
       +    // Cell size
       +    const Float dx = devC_grid.L[0]/nx;
       +    const Float dy = devC_grid.L[1]/ny;
       +    const Float dz = devC_grid.L[2]/nz;
       +
       +    // Check that we are not outside the fluid grid
       +    if (x < nx && y < ny && z < nz) {
       +
       +        const unsigned int cellidx = d_idx(x,y,z);
       +
       +        __syncthreads();
       +        const Float p_xn = dev_darcy_p[d_idx(x-1,y,z)];
       +        const Float p_xp = dev_darcy_p[d_idx(x+1,y,z)];
       +        const Float p_yn = dev_darcy_p[d_idx(x,y-1,z)];
       +        const Float p_yp = dev_darcy_p[d_idx(x,y+1,z)];
       +        const Float p_zn = dev_darcy_p[d_idx(x,y,z-1)];
       +        const Float p_zp = dev_darcy_p[d_idx(x,y,z+1)];
       +
       +        const Float k   = dev_darcy_k[cellidx];
       +        const Float phi = dev_darcy_phi[cellidx];
       +
       +        // approximate pressure gradient with first order central differences
       +        const Float3 grad_p = MAKE_FLOAT3(
       +                (p_xp - p_xn)/(dx + dx),
       +                (p_yp - p_yn)/(dy + dy),
       +                (p_zp - p_zn)/(dz + dz));
       +
       +        // Flux [m/s]: q = -k/nu * dH
       +        // Pore velocity [m/s]: v = q/n
       +
       +        // calculate flux
       +        //const Float3 q = -k/mu*grad_p;
       +
       +        // calculate velocity
       +        //const Float3 v = q/phi;
       +        const Float3 v = (-k/mu * grad_p)/phi;
       +
       +        // Save velocity
       +        __syncthreads();
       +        dev_darcy_v[cellidx] = v;
       +    }
       +}
       +
       +// Print final heads and free memory
       +void DEM::endDarcyDev()
       +{
       +    freeDarcyMemDev();
       +}
       +
       +// vim: tabstop=8 expandtab shiftwidth=4 softtabstop=4
 (DIR) diff --git a/src/datatypes.h b/src/datatypes.h
       t@@ -96,8 +96,6 @@ struct Params {
            Float tau_b;          // Bond shear strength
            Float devs_A;         // Amplitude of modulations in normal stress
            Float devs_f;         // Frequency of modulations in normal stress
       -    Float mu;             // Fluid dynamic viscosity
       -    Float rho_f;          // Fluid density
        };
        
        // Structure containing wall parameters
       t@@ -146,6 +144,35 @@ struct NavierStokes {
            Float4* f_p;            // Pressure force on particles
            Float4* f_v;            // Viscous force on particles
            Float4* f_sum;          // Viscous force on particles
       +    Float   mu;             // Fluid dynamic viscosity
       +    Float   rho_f;          // Fluid density
       +};
       +
       +struct Darcy {
       +    int     nx, ny, nz;     // Number of cells in each dimension
       +    Float   dx, dy, dz;     // Cell length in each dim
       +    Float*  p;              // Cell hydraulic pressures
       +    Float3* v;              // Cell fluid velocity
       +    Float*  k;              // Cell hydraulic permeability
       +    Float*  phi;            // Cell porosity
       +    Float*  dphi;           // Cell porosity change
       +    Float*  norm;           // Normalized residual of epsilon updates
       +    Float   p_mod_A;        // Pressure modulation amplitude at top
       +    Float   p_mod_f;        // Pressure modulation frequency at top
       +    Float   p_mod_phi;      // Pressure modulation phase at top
       +    int     bc_bot;         // 0: Dirichlet, 1: Neumann
       +    int     bc_top;         // 0: Dirichlet, 1: Neumann
       +    int     free_slip_bot;  // 0: no, 1: yes
       +    int     free_slip_top;  // 0: no, 1: yes
       +    Float   tolerance;      // Solver parameter: Max residual tolerance
       +    unsigned int maxiter;   // Solver parameter: Max iterations to perform
       +    unsigned int ndem;      // Solver parameter: DEM time steps per CFD step
       +    Float   c_phi;          // Porosity scaling coefficient
       +    Float4* f_p;            // Pressure force on particles
       +    Float   beta_f;         // Adiabatic fluid compressibility
       +    Float   k_c;            // Permeability prefactor in Kozeny-Carman eq.
       +    Float   mu;             // Fluid dynamic viscosity
       +    Float   rho_f;          // Fluid density
        };
        
        // Image structure
 (DIR) diff --git a/src/debug.h b/src/debug.h
       t@@ -20,7 +20,7 @@ const unsigned int nijacnorm = 10;
        // 0: False, 1: True
        const int write_res_log = 0;
        
       -// Report epsilon values during Jacobi iterations to stdout
       +// Report pressure (epsilon) values during Jacobi iterations to stdout
        //#define REPORT_EPSILON
        //#define REPORT_MORE_EPSILON
        
       t@@ -30,15 +30,15 @@ const int write_res_log = 0;
        const int write_conv_log = 1;
        
        // The interval between iteration number reporting in 'output/<sid>-conv.log'
       -//const int conv_log_interval = 10;
       -const int conv_log_interval = 4;
       +const int conv_log_interval = 10;
       +//const int conv_log_interval = 4;
        //const int conv_log_interval = 1;
        
        // Enable drag force and particle fluid coupling
        #define CFD_DEM_COUPLING
        
        // Check for nan/inf values in fluid solver kernels
       -#define CHECK_NS_FINITE
       +#define CHECK_FLUID_FINITE
        
        // Enable reporting of velocity prediction components to stdout
        //#define REPORT_V_P_COMPONENTS
 (DIR) diff --git a/src/device.cu b/src/device.cu
       t@@ -24,6 +24,7 @@
        #include "integration.cuh"
        #include "raytracer.cuh"
        #include "navierstokes.cuh"
       +#include "darcy.cuh"
        
        // Returns the number of cores per streaming multiprocessor, which is
        // a function of the device compute capability
       t@@ -170,40 +171,65 @@ __global__ void checkConstantValues(int* dev_equal,
        
            // Compare values between global- and constant
            // memory structures
       -    if (dev_grid->origo[0] != devC_grid.origo[0] ||
       -            dev_grid->origo[1] != devC_grid.origo[1] ||
       -            dev_grid->origo[2] != devC_grid.origo[2] ||
       -            dev_grid->L[0] != devC_grid.L[0] ||
       -            dev_grid->L[1] != devC_grid.L[1] ||
       -            dev_grid->L[2] != devC_grid.L[2] ||
       -            dev_grid->num[0] != devC_grid.num[0] ||
       -            dev_grid->num[1] != devC_grid.num[1] ||
       -            dev_grid->num[2] != devC_grid.num[2] ||
       -            dev_grid->periodic != devC_grid.periodic)
       -        *dev_equal = 1; // Not ok
       -
       -    else if (dev_params->g[0] != devC_params.g[0] ||
       -            dev_params->g[1] != devC_params.g[1] ||
       -            dev_params->g[2] != devC_params.g[2] ||
       -            dev_params->k_n != devC_params.k_n ||
       -            dev_params->k_t != devC_params.k_t ||
       -            dev_params->k_r != devC_params.k_r ||
       -            dev_params->gamma_n != devC_params.gamma_n ||
       -            dev_params->gamma_t != devC_params.gamma_t ||
       -            dev_params->gamma_r != devC_params.gamma_r ||
       -            dev_params->mu_s != devC_params.mu_s ||
       -            dev_params->mu_d != devC_params.mu_d ||
       -            dev_params->mu_r != devC_params.mu_r ||
       -            dev_params->rho != devC_params.rho ||
       -            dev_params->contactmodel != devC_params.contactmodel ||
       -            dev_params->kappa != devC_params.kappa ||
       -            dev_params->db != devC_params.db ||
       -            dev_params->V_b != devC_params.V_b ||
       -            dev_params->lambda_bar != devC_params.lambda_bar ||
       -            dev_params->nb0 != devC_params.nb0 ||
       -            dev_params->mu != devC_params.mu ||
       -            dev_params->rho_f != devC_params.rho_f)
       +    if (dev_grid->origo[0] != devC_grid.origo[0])
       +        *dev_equal = 1;
       +    if (dev_grid->origo[1] != devC_grid.origo[1])
                *dev_equal = 2; // Not ok
       +    if (dev_grid->origo[2] != devC_grid.origo[2])
       +        *dev_equal = 3; // Not ok
       +    if (dev_grid->L[0] != devC_grid.L[0])
       +        *dev_equal = 4; // Not ok
       +    if (dev_grid->L[1] != devC_grid.L[1])
       +        *dev_equal = 5; // Not ok
       +    if (dev_grid->L[2] != devC_grid.L[2])
       +        *dev_equal = 6; // Not ok
       +    if (dev_grid->num[0] != devC_grid.num[0])
       +        *dev_equal = 7; // Not ok
       +    if (dev_grid->num[1] != devC_grid.num[1])
       +        *dev_equal = 8; // Not ok
       +    if (dev_grid->num[2] != devC_grid.num[2])
       +        *dev_equal = 9; // Not ok
       +    if (dev_grid->periodic != devC_grid.periodic)
       +        *dev_equal = 10; // Not ok
       +
       +    if (dev_params->g[0] != devC_params.g[0])
       +        *dev_equal = 11; // Not ok
       +    if (dev_params->g[1] != devC_params.g[1])
       +        *dev_equal = 12; // Not ok
       +    if (dev_params->g[2] != devC_params.g[2])
       +        *dev_equal = 13; // Not ok
       +    if (dev_params->k_n != devC_params.k_n)
       +        *dev_equal = 14; // Not ok
       +    if (dev_params->k_t != devC_params.k_t)
       +        *dev_equal = 15; // Not ok
       +    if (dev_params->k_r != devC_params.k_r)
       +        *dev_equal = 16; // Not ok
       +    if (dev_params->gamma_n != devC_params.gamma_n)
       +        *dev_equal = 17; // Not ok
       +    if (dev_params->gamma_t != devC_params.gamma_t)
       +        *dev_equal = 18; // Not ok
       +    if (dev_params->gamma_r != devC_params.gamma_r)
       +        *dev_equal = 19; // Not ok
       +    if (dev_params->mu_s != devC_params.mu_s)
       +        *dev_equal = 20; // Not ok
       +    if (dev_params->mu_d != devC_params.mu_d)
       +        *dev_equal = 21; // Not ok
       +    if (dev_params->mu_r != devC_params.mu_r)
       +        *dev_equal = 22; // Not ok
       +    if (dev_params->rho != devC_params.rho)
       +        *dev_equal = 23; // Not ok
       +    if (dev_params->contactmodel != devC_params.contactmodel)
       +        *dev_equal = 24; // Not ok
       +    if (dev_params->kappa != devC_params.kappa)
       +        *dev_equal = 25; // Not ok
       +    if (dev_params->db != devC_params.db)
       +        *dev_equal = 26; // Not ok
       +    if (dev_params->V_b != devC_params.V_b)
       +        *dev_equal = 27; // Not ok
       +    if (dev_params->lambda_bar != devC_params.lambda_bar)
       +        *dev_equal = 28; // Not ok
       +    if (dev_params->nb0 != devC_params.nb0)
       +        *dev_equal = 29; // Not ok
        }
        
        // Copy the constant data components to device memory,
       t@@ -524,9 +550,12 @@ __host__ void DEM::freeGlobalDeviceMemory()
            cudaFree(dev_walls_acc);
        
            // Fluid arrays
       -    if (navierstokes == 1) {
       +    if (fluid == 1 && cfd_solver == 0) {
                freeNSmemDev();
            }
       +    if (fluid == 1 && cfd_solver == 1) {
       +        freeDarcyMemDev();
       +    }
        
            //checkForCudaErrors("During cudaFree calls");
        
       t@@ -604,11 +633,15 @@ __host__ void DEM::transferToGlobalDeviceMemory(int statusmsg)
                        sizeof(Float4)*walls.nw, cudaMemcpyHostToDevice);
        
            // Fluid arrays
       -    if (navierstokes == 1) {
       -        transferNStoGlobalDeviceMemory(1);
       -    } else if (navierstokes != 0) {
       -        std::cerr << "Error: navierstokes value not understood ("
       -            << navierstokes << ")" << std::endl;
       +    if (fluid == 1) {
       +        if (cfd_solver == 0) {
       +            transferNStoGlobalDeviceMemory(1);
       +        } else if (cfd_solver == 1) {
       +            transferDarcyToGlobalDeviceMemory(1);
       +        } else {
       +            std::cerr << "Error: cfd_solver value not understood ("
       +                << cfd_solver << ")" << std::endl;
       +        }
            }
        
            checkForCudaErrors("End of transferToGlobalDeviceMemory");
       t@@ -680,9 +713,13 @@ __host__ void DEM::transferFromGlobalDeviceMemory()
                    sizeof(Float4)*walls.nw, cudaMemcpyDeviceToHost);
        
            // Fluid arrays
       -    if (navierstokes == 1) {
       +    if (fluid == 1 && cfd_solver == 0) {
                transferNSfromGlobalDeviceMemory(0);
            }
       +    else if (fluid == 1 && cfd_solver == 1) {
       +        transferDarcyFromGlobalDeviceMemory(0);
       +        checkDarcyStability();
       +    }
        
            //checkForCudaErrors("End of transferFromGlobalDeviceMemory");
        }
       t@@ -733,7 +770,7 @@ __host__ void DEM::startTime()
                    iDivUp(grid.num[0], dimBlockFluid.x),
                    iDivUp(grid.num[1], dimBlockFluid.y),
                    iDivUp(grid.num[2], dimBlockFluid.z));
       -    if (dimGridFluid.z > 64 && navierstokes == 1) {
       +    if (dimGridFluid.z > 64 && fluid == 1) {
                cerr << "Error: dimGridFluid.z > 64" << endl;
                exit(1);
            }
       t@@ -744,7 +781,7 @@ __host__ void DEM::startTime()
                    iDivUp(grid.num[0]+1, dimBlockFluidFace.x),
                    iDivUp(grid.num[1]+1, dimBlockFluidFace.y),
                    iDivUp(grid.num[2]+1, dimBlockFluidFace.z));
       -    if (dimGridFluidFace.z > 64 && navierstokes == 1) {
       +    if (dimGridFluidFace.z > 64 && fluid == 1) {
                cerr << "Error: dimGridFluidFace.z > 64" << endl;
                exit(1);
            }
       t@@ -766,7 +803,7 @@ __host__ void DEM::startTime()
                    << dimBlock.x << "*" << dimBlock.y << "*" << dimBlock.z << "\n"
                    << "  - Shared memory required per block: " << smemSize << " bytes"
                    << endl;
       -        if (navierstokes == 1) {
       +        if (fluid == 1) {
                    cout << "  - Blocks per fluid grid: "
                        << dimGridFluid.x << "*" << dimGridFluid.y << "*" <<
                        dimGridFluid.z << "\n"
       t@@ -832,6 +869,17 @@ __host__ void DEM::startTime()
            double t_jacobiIterationNS = 0.0;
            double t_updateNSvelocityPressure = 0.0;
        
       +    double t_findDarcyPorosities = 0.0;
       +    double t_setDarcyGhostNodes = 0.0;
       +    double t_findDarcyPressureForce = 0.0;
       +    double t_setDarcyTopPressure = 0.0;
       +    double t_findDarcyPermeabilities = 0.0;
       +    double t_findDarcyPermeabilityGradients = 0.0;
       +    //double t_findDarcyPressureChange = 0.0;
       +    double t_updateDarcySolution = 0.0;
       +    double t_copyValues = 0.0;
       +    double t_findDarcyVelocities = 0.0;
       +
            if (PROFILING == 1) {
                cudaEventCreate(&kernel_tic);
                cudaEventCreate(&kernel_toc);
       t@@ -844,7 +892,11 @@ __host__ void DEM::startTime()
            // Index of dynamic top wall (if it exists)
            unsigned int wall0_iz = 10000000;
            // weight of fluid between two cells in z direction
       -    const Float dp_dz = fabs(params.rho_f*params.g[2]*grid.L[2]/grid.num[2]);
       +    Float dp_dz;
       +    if (cfd_solver == 0)
       +        dp_dz = fabs(ns.rho_f*params.g[2]*grid.L[2]/grid.num[2]);
       +    else if (cfd_solver == 1)
       +        dp_dz = fabs(darcy.rho_f*params.g[2]*grid.L[2]/grid.num[2]);
            //std::cout << "dp_dz = " << dp_dz << std::endl;
        
            // Write a log file of the number of iterations it took before
       t@@ -1018,163 +1070,170 @@ __host__ void DEM::startTime()
                    }
                }
        
       -        // Solve Navier Stokes flow through the grid
       -        if (navierstokes == 1) {
       -            checkForCudaErrorsIter("Before findPorositiesDev", iter);
       -            // Find cell porosities, average particle velocities, and average
       -            // particle diameters. These are needed for predicting the fluid
       -            // velocities
       -            if (PROFILING == 1)
       -                startTimer(&kernel_tic);
       -            findPorositiesVelocitiesDiametersSpherical
       -            //findPorositiesVelocitiesDiametersSphericalGradient
       -                <<<dimGridFluid, dimBlockFluid>>>(
       -                        dev_cellStart,
       -                        dev_cellEnd,
       -                        dev_x_sorted,
       -                        dev_vel_sorted,
       -                        dev_ns_phi,
       -                        dev_ns_dphi,
       -                        dev_ns_vp_avg,
       -                        dev_ns_d_avg,
       -                        iter,
       -                        np,
       -                        ns.c_phi);
       -            cudaThreadSynchronize();
       -            if (PROFILING == 1)
       -                stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
       -                        &t_findPorositiesDev);
       -            checkForCudaErrorsIter("Post findPorositiesDev", iter);
       +        // Solve fluid flow through the grid
       +        if (fluid == 1) {
        
       -#ifdef CFD_DEM_COUPLING
       -            /*if (params.nu <= 0.0) {
       -                std::cerr << "Error! The fluid needs a positive viscosity "
       -                    "value in order to simulate particle-fluid interaction."
       -                    << std::endl;
       -                exit(1);
       -            }*/
       -            if (iter == 0) {
       -                // set cell center ghost nodes
       -                setNSghostNodes<Float3><<<dimGridFluid, dimBlockFluid>>>(
       -                    dev_ns_v, ns.bc_bot, ns.bc_top);
       +            // Navier-Stokes solution
       +            if (cfd_solver == 0) {
        
       -                // find cell face velocities
       -                interpolateCenterToFace
       -                    <<<dimGridFluidFace, dimBlockFluidFace>>>(
       -                        dev_ns_v,
       -                        dev_ns_v_x,
       -                        dev_ns_v_y,
       -                        dev_ns_v_z);
       +                checkForCudaErrorsIter("Before findPorositiesDev", iter);
       +                // Find cell porosities, average particle velocities, and average
       +                // particle diameters. These are needed for predicting the fluid
       +                // velocities
       +                if (PROFILING == 1)
       +                    startTimer(&kernel_tic);
       +                findPorositiesVelocitiesDiametersSpherical
       +                //findPorositiesVelocitiesDiametersSphericalGradient
       +                    <<<dimGridFluid, dimBlockFluid>>>(
       +                            dev_cellStart,
       +                            dev_cellEnd,
       +                            dev_x_sorted,
       +                            dev_vel_sorted,
       +                            dev_ns_phi,
       +                            dev_ns_dphi,
       +                            dev_ns_vp_avg,
       +                            dev_ns_d_avg,
       +                            iter,
       +                            np,
       +                            ns.c_phi);
                        cudaThreadSynchronize();
       -                checkForCudaErrors("Post interpolateCenterToFace");
       -            }
       -
       -            setNSghostNodesFace<Float>
       -                <<<dimGridFluidFace, dimBlockFluidFace>>>(
       -                    dev_ns_v_x,
       -                    dev_ns_v_y,
       -                    dev_ns_v_z,
       -                    ns.bc_bot,
       -                    ns.bc_top);
       -            cudaThreadSynchronize();
       -            checkForCudaErrorsIter("Post setNSghostNodesFace", iter);
       -
       -            findFaceDivTau<<<dimGridFluidFace, dimBlockFluidFace>>>(
       -                dev_ns_v_x,
       -                dev_ns_v_y,
       -                dev_ns_v_z,
       -                dev_ns_div_tau_x,
       -                dev_ns_div_tau_y,
       -                dev_ns_div_tau_z);
       -            cudaThreadSynchronize();
       -            checkForCudaErrorsIter("Post findFaceDivTau", iter);
       -
       -            setNSghostNodesFace<Float>
       -                <<<dimGridFluidFace, dimBlockFluid>>>(
       -                    dev_ns_div_tau_x,
       -                    dev_ns_div_tau_y,
       -                    dev_ns_div_tau_z,
       -                    ns.bc_bot,
       -                    ns.bc_top);
       -            cudaThreadSynchronize();
       -            checkForCudaErrorsIter("Post setNSghostNodes(dev_ns_div_tau)",
       -                                   iter);
       -
       -            setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
       -                dev_ns_p, ns.bc_bot, ns.bc_top);
       -            cudaThreadSynchronize();
       -            checkForCudaErrorsIter("Post setNSghostNodes(dev_ns_p)", iter);
       -
       -            setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
       -                dev_ns_phi, ns.bc_bot, ns.bc_top);
       -            cudaThreadSynchronize();
       -            checkForCudaErrorsIter("Post setNSghostNodes(dev_ns_p)", iter);
       +                if (PROFILING == 1)
       +                    stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
       +                            &t_findPorositiesDev);
       +                checkForCudaErrorsIter("Post findPorositiesDev", iter);
        
       +#ifdef CFD_DEM_COUPLING
       +                /*if (params.nu <= 0.0) {
       +                  std::cerr << "Error! The fluid needs a positive viscosity "
       +                  "value in order to simulate particle-fluid interaction."
       +                  << std::endl;
       +                  exit(1);
       +                  }*/
       +                if (iter == 0) {
       +                    // set cell center ghost nodes
       +                    setNSghostNodes<Float3><<<dimGridFluid, dimBlockFluid>>>(
       +                            dev_ns_v, ns.bc_bot, ns.bc_top);
       +
       +                    // find cell face velocities
       +                    interpolateCenterToFace
       +                        <<<dimGridFluidFace, dimBlockFluidFace>>>(
       +                                dev_ns_v,
       +                                dev_ns_v_x,
       +                                dev_ns_v_y,
       +                                dev_ns_v_z);
       +                    cudaThreadSynchronize();
       +                    checkForCudaErrors("Post interpolateCenterToFace");
       +                }
        
       -            if (np > 0) {
       +                setNSghostNodesFace<Float>
       +                    <<<dimGridFluidFace, dimBlockFluidFace>>>(
       +                            dev_ns_v_x,
       +                            dev_ns_v_y,
       +                            dev_ns_v_z,
       +                            ns.bc_bot,
       +                            ns.bc_top);
       +                cudaThreadSynchronize();
       +                checkForCudaErrorsIter("Post setNSghostNodesFace", iter);
        
       -                // Per particle, find the fluid-particle interaction force f_pf
       -                // and apply it to the particle
       -                findInteractionForce<<<dimGrid, dimBlock>>>(
       -                        dev_x,
       -                        dev_vel,
       -                        dev_ns_phi,
       -                        dev_ns_p,
       +                findFaceDivTau<<<dimGridFluidFace, dimBlockFluidFace>>>(
                                dev_ns_v_x,
                                dev_ns_v_y,
                                dev_ns_v_z,
       +                        ns.mu,
                                dev_ns_div_tau_x,
                                dev_ns_div_tau_y,
       -                        dev_ns_div_tau_z,
       -                        //ns.c_v,
       -                        dev_ns_f_pf,
       -                        dev_force,
       -                        dev_ns_f_d,
       -                        dev_ns_f_p,
       -                        dev_ns_f_v,
       -                        dev_ns_f_sum);
       +                        dev_ns_div_tau_z);
                        cudaThreadSynchronize();
       -                checkForCudaErrorsIter("Post findInteractionForce", iter);
       +                checkForCudaErrorsIter("Post findFaceDivTau", iter);
       +
       +                setNSghostNodesFace<Float>
       +                    <<<dimGridFluidFace, dimBlockFluid>>>(
       +                            dev_ns_div_tau_x,
       +                            dev_ns_div_tau_y,
       +                            dev_ns_div_tau_z,
       +                            ns.bc_bot,
       +                            ns.bc_top);
       +                cudaThreadSynchronize();
       +                checkForCudaErrorsIter("Post setNSghostNodes(dev_ns_div_tau)",
       +                        iter);
        
                        setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
                                dev_ns_p, ns.bc_bot, ns.bc_top);
                        cudaThreadSynchronize();
                        checkForCudaErrorsIter("Post setNSghostNodes(dev_ns_p)", iter);
        
       -                // Apply fluid-particle interaction force to the fluid
       -                applyInteractionForceToFluid<<<dimGridFluid, dimBlockFluid>>>(
       -                        dev_gridParticleIndex,
       -                        dev_cellStart,
       -                        dev_cellEnd,
       -                        dev_ns_f_pf,
       -                        dev_ns_F_pf);
       -                        //dev_ns_F_pf_x,
       -                        //dev_ns_F_pf_y,
       -                        //dev_ns_F_pf_z);
       +                setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
       +                        dev_ns_phi, ns.bc_bot, ns.bc_top);
                        cudaThreadSynchronize();
       -                checkForCudaErrorsIter("Post applyInteractionForceToFluid",
       -                        iter);
       +                checkForCudaErrorsIter("Post setNSghostNodes(dev_ns_p)", iter);
        
       -                setNSghostNodes<Float3><<<dimGridFluid, dimBlockFluid>>>(
       -                        dev_ns_F_pf, ns.bc_bot, ns.bc_top);
       -                cudaThreadSynchronize();
       -                checkForCudaErrorsIter("Post setNSghostNodes(F_pf)", iter);
       -            }
       +
       +                if (np > 0) {
       +
       +                    // Per particle, find the fluid-particle interaction force f_pf
       +                    // and apply it to the particle
       +                    findInteractionForce<<<dimGrid, dimBlock>>>(
       +                            dev_x,
       +                            dev_vel,
       +                            dev_ns_phi,
       +                            dev_ns_p,
       +                            dev_ns_v_x,
       +                            dev_ns_v_y,
       +                            dev_ns_v_z,
       +                            dev_ns_div_tau_x,
       +                            dev_ns_div_tau_y,
       +                            dev_ns_div_tau_z,
       +                            //ns.c_v,
       +                            ns.mu,
       +                            ns.rho_f,
       +                            dev_ns_f_pf,
       +                            dev_force,
       +                            dev_ns_f_d,
       +                            dev_ns_f_p,
       +                            dev_ns_f_v,
       +                            dev_ns_f_sum);
       +                    cudaThreadSynchronize();
       +                    checkForCudaErrorsIter("Post findInteractionForce", iter);
       +
       +                    setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
       +                            dev_ns_p, ns.bc_bot, ns.bc_top);
       +                    cudaThreadSynchronize();
       +                    checkForCudaErrorsIter("Post setNSghostNodes(dev_ns_p)", iter);
       +
       +                    // Apply fluid-particle interaction force to the fluid
       +                    applyInteractionForceToFluid<<<dimGridFluid, dimBlockFluid>>>(
       +                            dev_gridParticleIndex,
       +                            dev_cellStart,
       +                            dev_cellEnd,
       +                            dev_ns_f_pf,
       +                            dev_ns_F_pf);
       +                    //dev_ns_F_pf_x,
       +                    //dev_ns_F_pf_y,
       +                    //dev_ns_F_pf_z);
       +                    cudaThreadSynchronize();
       +                    checkForCudaErrorsIter("Post applyInteractionForceToFluid",
       +                            iter);
       +
       +                    setNSghostNodes<Float3><<<dimGridFluid, dimBlockFluid>>>(
       +                            dev_ns_F_pf, ns.bc_bot, ns.bc_top);
       +                    cudaThreadSynchronize();
       +                    checkForCudaErrorsIter("Post setNSghostNodes(F_pf)", iter);
       +                }
        #endif
        
       -            if ((iter % ns.ndem) == 0) {
       -                // Initial guess for the top epsilon values. These may be
       -                // changed in setUpperPressureNS
       -                // TODO: Check if this should only be set when top bc=Dirichlet
       -                Float pressure = ns.p[idx(0,0,ns.nz-1)];
       -                Float pressure_new = pressure; // Dirichlet
       -                Float epsilon_value = pressure_new - ns.beta*pressure;
       -                setNSepsilonTop<<<dimGridFluid, dimBlockFluid>>>(
       -                        dev_ns_epsilon,
       -                        dev_ns_epsilon_new,
       -                        epsilon_value);
       -                cudaThreadSynchronize();
       -                checkForCudaErrorsIter("Post setNSepsilonTop", iter);
       +                if ((iter % ns.ndem) == 0) {
       +                    // Initial guess for the top epsilon values. These may be
       +                    // changed in setUpperPressureNS
       +                    // TODO: Check if this should only be set when top bc=Dirichlet
       +                    Float pressure = ns.p[idx(0,0,ns.nz-1)];
       +                    Float pressure_new = pressure; // Dirichlet
       +                    Float epsilon_value = pressure_new - ns.beta*pressure;
       +                    setNSepsilonTop<<<dimGridFluid, dimBlockFluid>>>(
       +                            dev_ns_epsilon,
       +                            dev_ns_epsilon_new,
       +                            epsilon_value);
       +                    cudaThreadSynchronize();
       +                    checkForCudaErrorsIter("Post setNSepsilonTop", iter);
        
        #if defined(REPORT_EPSILON) || defined(REPORT_V_P_COMPONENTS) || defined(REPORT_V_C_COMPONENTS)
                            std::cout
       t@@ -1182,503 +1241,811 @@ __host__ void DEM::startTime()
                                << std::endl;
        #endif
        
       -                // find cell containing top wall
       -                if (walls.nw > 0 && walls.wmode[0] == 1) {
       -                    wall0_iz = walls.nx->w/(grid.L[2]/grid.num[2]);
       -                    setNSepsilonAtTopWall<<<dimGridFluid, dimBlockFluid>>>(
       -                            dev_ns_epsilon,
       -                            dev_ns_epsilon_new,
       -                            wall0_iz,
       -                            epsilon_value,
       -                            dp_dz);
       -                    cudaThreadSynchronize();
       -                    checkForCudaErrorsIter("Post setNSepsilonAtTopWall", iter);
       +                    // find cell containing top wall
       +                    if (walls.nw > 0 && walls.wmode[0] == 1) {
       +                        wall0_iz = walls.nx->w/(grid.L[2]/grid.num[2]);
       +                        setNSepsilonAtTopWall<<<dimGridFluid, dimBlockFluid>>>(
       +                                dev_ns_epsilon,
       +                                dev_ns_epsilon_new,
       +                                wall0_iz,
       +                                epsilon_value,
       +                                dp_dz);
       +                        cudaThreadSynchronize();
       +                        checkForCudaErrorsIter("Post setNSepsilonAtTopWall", iter);
        
        #ifdef REPORT_EPSILON
       -                    std::cout
       -                        << "\n###### EPSILON setNSepsilonAtTopWall "
       -                        << "######" << std::endl;
       -                    transferNSepsilonFromGlobalDeviceMemory();
       -                    printNSarray(stdout, ns.epsilon, "epsilon");
       +                        std::cout
       +                            << "\n###### EPSILON setNSepsilonAtTopWall "
       +                            << "######" << std::endl;
       +                        transferNSepsilonFromGlobalDeviceMemory();
       +                        printNSarray(stdout, ns.epsilon, "epsilon");
        #endif
       -                }
       +                    }
        
       -                // Modulate the pressures at the upper boundary cells
       -                if ((ns.p_mod_A > 1.0e-5 || ns.p_mod_A < -1.0e-5) &&
       -                        ns.p_mod_f > 1.0e-7) {
       -                                         // original pressure
       -                    Float new_pressure = ns.p[idx(0,0,ns.nz-1)]
       -                        + ns.p_mod_A*sin(2.0*M_PI*ns.p_mod_f*time.current
       -                                + ns.p_mod_phi);
       -                    setUpperPressureNS<<<dimGridFluid, dimBlockFluid>>>(
       +                    // Modulate the pressures at the upper boundary cells
       +                    if ((ns.p_mod_A > 1.0e-5 || ns.p_mod_A < -1.0e-5) &&
       +                            ns.p_mod_f > 1.0e-7) {
       +                        // original pressure
       +                        Float new_pressure = ns.p[idx(0,0,ns.nz-1)]
       +                            + ns.p_mod_A*sin(2.0*M_PI*ns.p_mod_f*time.current
       +                                    + ns.p_mod_phi);
       +                        setUpperPressureNS<<<dimGridFluid, dimBlockFluid>>>(
       +                                dev_ns_p,
       +                                dev_ns_epsilon,
       +                                dev_ns_epsilon_new,
       +                                ns.beta,
       +                                new_pressure);
       +                        cudaThreadSynchronize();
       +                        checkForCudaErrorsIter("Post setUpperPressureNS", iter);
       +
       +#ifdef REPORT_MORE_EPSILON
       +                        std::cout
       +                            << "\n@@@@@@ TIME STEP " << iter << " @@@@@@"
       +                            << "\n###### EPSILON AFTER setUpperPressureNS "
       +                            << "######" << std::endl;
       +                        transferNSepsilonFromGlobalDeviceMemory();
       +                        printNSarray(stdout, ns.epsilon, "epsilon");
       +#endif
       +                    }
       +
       +                    // Set the values of the ghost nodes in the grid
       +                    if (PROFILING == 1)
       +                        startTimer(&kernel_tic);
       +
       +                    setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
       +                            dev_ns_p, ns.bc_bot, ns.bc_top);
       +
       +                    //setNSghostNodes<Float3><<<dimGridFluid, dimBlockFluid>>>(
       +                    //dev_ns_v, ns.bc_bot, ns.bc_top);
       +
       +                    setNSghostNodesFace<Float>
       +                        <<<dimGridFluidFace, dimBlockFluidFace>>>(
       +                                dev_ns_v_p_x,
       +                                dev_ns_v_p_y,
       +                                dev_ns_v_p_z,
       +                                ns.bc_bot, ns.bc_top);
       +
       +                    setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
       +                            dev_ns_phi, ns.bc_bot, ns.bc_top);
       +
       +                    setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
       +                            dev_ns_dphi, ns.bc_bot, ns.bc_top);
       +
       +                    cudaThreadSynchronize();
       +                    if (PROFILING == 1)
       +                        stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
       +                                &t_setNSghostNodesDev);
       +                    checkForCudaErrorsIter("Post setNSghostNodesDev", iter);
       +                    /*std::cout << "\n###### EPSILON AFTER setNSghostNodesDev #####"
       +                      << std::endl;
       +                      transferNSepsilonFromGlobalDeviceMemory();
       +                      printNSarray(stdout, ns.epsilon, "epsilon");*/
       +
       +                    // interpolate velocities to cell centers which makes velocity
       +                    // prediction easier
       +                    interpolateFaceToCenter<<<dimGridFluid, dimBlockFluid>>>(
       +                            dev_ns_v_x,
       +                            dev_ns_v_y,
       +                            dev_ns_v_z,
       +                            dev_ns_v);
       +                    cudaThreadSynchronize();
       +                    checkForCudaErrorsIter("Post interpolateFaceToCenter", iter);
       +
       +                    // Set cell-center velocity ghost nodes
       +                    setNSghostNodes<Float3><<<dimGridFluid, dimBlockFluid>>>(
       +                            dev_ns_v, ns.bc_bot, ns.bc_top);
       +                    cudaThreadSynchronize();
       +                    checkForCudaErrorsIter("Post setNSghostNodes(v)", iter);
       +
       +                    // Find the divergence of phi*vi*v, needed for predicting the
       +                    // fluid velocities
       +                    if (PROFILING == 1)
       +                        startTimer(&kernel_tic);
       +                    findNSdivphiviv<<<dimGridFluid, dimBlockFluid>>>(
       +                            dev_ns_phi,
       +                            dev_ns_v,
       +                            dev_ns_div_phi_vi_v);
       +                    cudaThreadSynchronize();
       +                    if (PROFILING == 1)
       +                        stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
       +                                &t_findNSdivphiviv);
       +                    checkForCudaErrorsIter("Post findNSdivphiviv", iter);
       +
       +                    // Set cell-center ghost nodes
       +                    setNSghostNodes<Float3><<<dimGridFluid, dimBlockFluid>>>(
       +                            dev_ns_div_phi_vi_v, ns.bc_bot, ns.bc_top);
       +                    cudaThreadSynchronize();
       +                    checkForCudaErrorsIter("Post setNSghostNodes(div_phi_vi_v)",
       +                            iter);
       +
       +                    // Predict the fluid velocities on the base of the old pressure
       +                    // field and ignoring the incompressibility constraint
       +                    if (PROFILING == 1)
       +                        startTimer(&kernel_tic);
       +                    findPredNSvelocities<<<dimGridFluidFace, dimBlockFluidFace>>>(
                                    dev_ns_p,
       -                            dev_ns_epsilon,
       -                            dev_ns_epsilon_new,
       +                            dev_ns_v_x,
       +                            dev_ns_v_y,
       +                            dev_ns_v_z,
       +                            dev_ns_phi,
       +                            dev_ns_dphi,
       +                            dev_ns_div_tau_x,
       +                            dev_ns_div_tau_y,
       +                            dev_ns_div_tau_z,
       +                            dev_ns_div_phi_vi_v,
       +                            ns.bc_bot,
       +                            ns.bc_top,
                                    ns.beta,
       -                            new_pressure);
       +                            dev_ns_F_pf,
       +                            ns.ndem,
       +                            wall0_iz,
       +                            ns.c_v,
       +                            ns.mu,
       +                            ns.rho_f,
       +                            dev_ns_v_p_x,
       +                            dev_ns_v_p_y,
       +                            dev_ns_v_p_z);
                            cudaThreadSynchronize();
       -                    checkForCudaErrorsIter("Post setUpperPressureNS", iter);
       +                    if (PROFILING == 1)
       +                        stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
       +                                &t_findPredNSvelocities);
       +                    checkForCudaErrorsIter("Post findPredNSvelocities", iter);
       +
       +                    setNSghostNodesFace<Float>
       +                        <<<dimGridFluidFace, dimBlockFluidFace>>>(
       +                                dev_ns_v_p_x,
       +                                dev_ns_v_p_y,
       +                                dev_ns_v_p_z,
       +                                ns.bc_bot, ns.bc_top);
       +                    cudaThreadSynchronize();
       +                    checkForCudaErrorsIter(
       +                            "Post setNSghostNodesFace(dev_ns_v_p)", iter);
       +
       +                    interpolateFaceToCenter<<<dimGridFluid, dimBlockFluid>>>(
       +                            dev_ns_v_p_x,
       +                            dev_ns_v_p_y,
       +                            dev_ns_v_p_z,
       +                            dev_ns_v_p);
       +                    cudaThreadSynchronize();
       +                    checkForCudaErrorsIter("Post interpolateFaceToCenter", iter);
       +
       +
       +                    // In the first iteration of the sphere program, we'll need to
       +                    // manually estimate the values of epsilon. In the subsequent
       +                    // iterations, the previous values are  used.
       +                    if (iter == 0) {
       +
       +                        // Define the first estimate of the values of epsilon.
       +                        // The initial guess depends on the value of ns.beta.
       +                        Float pressure = ns.p[idx(2,2,2)];
       +                        Float pressure_new = pressure; // Guess p_current = p_new
       +                        Float epsilon_value = pressure_new - ns.beta*pressure;
       +                        if (PROFILING == 1)
       +                            startTimer(&kernel_tic);
       +                        setNSepsilonInterior<<<dimGridFluid, dimBlockFluid>>>(
       +                                dev_ns_epsilon, epsilon_value);
       +                        cudaThreadSynchronize();
       +
       +                        setNSnormZero<<<dimGridFluid, dimBlockFluid>>>(dev_ns_norm);
       +                        cudaThreadSynchronize();
       +
       +                        if (PROFILING == 1)
       +                            stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
       +                                    &t_setNSepsilon);
       +                        checkForCudaErrorsIter("Post setNSepsilonInterior", iter);
        
        #ifdef REPORT_MORE_EPSILON
       -                    std::cout
       +                        std::cout
       +                            << "\n###### EPSILON AFTER setNSepsilonInterior "
       +                            << "######" << std::endl;
       +                        transferNSepsilonFromGlobalDeviceMemory();
       +                        printNSarray(stdout, ns.epsilon, "epsilon");
       +#endif
       +
       +                        // Set the epsilon values at the lower boundary
       +                        pressure = ns.p[idx(0,0,0)];
       +                        pressure_new = pressure; // Guess p_current = p_new
       +                        epsilon_value = pressure_new - ns.beta*pressure;
       +                        if (PROFILING == 1)
       +                            startTimer(&kernel_tic);
       +                        setNSepsilonBottom<<<dimGridFluid, dimBlockFluid>>>(
       +                                dev_ns_epsilon,
       +                                dev_ns_epsilon_new,
       +                                epsilon_value);
       +                        cudaThreadSynchronize();
       +                        if (PROFILING == 1)
       +                            stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
       +                                    &t_setNSdirichlet);
       +                        checkForCudaErrorsIter("Post setNSepsilonBottom", iter);
       +
       +#ifdef REPORT_MORE_EPSILON
       +                        std::cout
       +                            << "\n###### EPSILON AFTER setNSepsilonBottom "
       +                            << "######" << std::endl;
       +                        transferNSepsilonFromGlobalDeviceMemory();
       +                        printNSarray(stdout, ns.epsilon, "epsilon");
       +#endif
       +
       +                        /*setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
       +                          dev_ns_epsilon);
       +                          cudaThreadSynchronize();
       +                          checkForCudaErrors("Post setNSghostNodesFloat(dev_ns_epsilon)",
       +                          iter);*/
       +                        setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
       +                                dev_ns_epsilon,
       +                                ns.bc_bot, ns.bc_top);
       +                        cudaThreadSynchronize();
       +                        checkForCudaErrorsIter("Post setNSghostNodesEpsilon(1)",
       +                                iter);
       +
       +#ifdef REPORT_MORE_EPSILON
       +                        std::cout <<
       +                            "\n###### EPSILON AFTER setNSghostNodes(epsilon) "
       +                            << "######" << std::endl;
       +                        transferNSepsilonFromGlobalDeviceMemory();
       +                        printNSarray(stdout, ns.epsilon, "epsilon");
       +#endif
       +                    }
       +
       +                    // Solve the system of epsilon using a Jacobi iterative solver.
       +                    // The average normalized residual is initialized to a large
       +                    // value.
       +                    //double avg_norm_res;
       +                    double max_norm_res;
       +
       +                    // Write a log file of the normalized residuals during the Jacobi
       +                    // iterations
       +                    std::ofstream reslog;
       +                    if (write_res_log == 1)
       +                        reslog.open("max_res_norm.dat");
       +
       +                    // transfer normalized residuals from GPU to CPU
       +#ifdef REPORT_MORE_EPSILON
       +                    std::cout << "\n###### BEFORE FIRST JACOBI ITERATION ######"
                                << "\n@@@@@@ TIME STEP " << iter << " @@@@@@"
       -                        << "\n###### EPSILON AFTER setUpperPressureNS "
       -                        << "######" << std::endl;
       +                        << std::endl;
                            transferNSepsilonFromGlobalDeviceMemory();
                            printNSarray(stdout, ns.epsilon, "epsilon");
        #endif
       -                }
        
       -                // Set the values of the ghost nodes in the grid
       -                if (PROFILING == 1)
       -                    startTimer(&kernel_tic);
       +                    for (unsigned int nijac = 0; nijac<ns.maxiter; ++nijac) {
        
       -                setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
       -                        dev_ns_p, ns.bc_bot, ns.bc_top);
       +                        // Only grad(epsilon) changes during the Jacobi iterations.
       +                        // The remaining terms of the forcing function are only
       +                        // calculated during the first iteration.
       +                        if (PROFILING == 1)
       +                            startTimer(&kernel_tic);
       +                        findNSforcing<<<dimGridFluid, dimBlockFluid>>>(
       +                                dev_ns_epsilon,
       +                                dev_ns_phi,
       +                                dev_ns_dphi,
       +                                dev_ns_v_p,
       +                                dev_ns_v_p_x,
       +                                dev_ns_v_p_y,
       +                                dev_ns_v_p_z,
       +                                nijac,
       +                                ns.ndem,
       +                                ns.c_v,
       +                                ns.rho_f,
       +                                dev_ns_f1,
       +                                dev_ns_f2,
       +                                dev_ns_f);
       +                        cudaThreadSynchronize();
       +                        if (PROFILING == 1)
       +                            stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
       +                                    &t_findNSforcing);
       +                        checkForCudaErrorsIter("Post findNSforcing", iter);
       +                        /*setNSghostNodesForcing<<<dimGridFluid, dimBlockFluid>>>(
       +                          dev_ns_f1,
       +                          dev_ns_f2,
       +                          dev_ns_f,
       +                          nijac);
       +                          cudaThreadSynchronize();
       +                          checkForCudaErrors("Post setNSghostNodesForcing", iter);*/
       +
       +                        setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
       +                                dev_ns_epsilon,
       +                                ns.bc_bot, ns.bc_top);
       +                        cudaThreadSynchronize();
       +                        checkForCudaErrorsIter("Post setNSghostNodesEpsilon(2)",
       +                                iter);
        
       -                //setNSghostNodes<Float3><<<dimGridFluid, dimBlockFluid>>>(
       -                        //dev_ns_v, ns.bc_bot, ns.bc_top);
       +#ifdef REPORT_EPSILON
       +                        std::cout << "\n###### JACOBI ITERATION "
       +                            << nijac
       +                            << " after setNSghostNodes(epsilon,2) ######"
       +                            << std::endl;
       +                        transferNSepsilonFromGlobalDeviceMemory();
       +                        printNSarray(stdout, ns.epsilon, "epsilon");
       +#endif
        
       -                setNSghostNodesFace<Float>
       -                    <<<dimGridFluidFace, dimBlockFluidFace>>>(
       -                        dev_ns_v_p_x,
       -                        dev_ns_v_p_y,
       -                        dev_ns_v_p_z,
       -                        ns.bc_bot, ns.bc_top);
       +                        // Perform a single Jacobi iteration
       +                        if (PROFILING == 1)
       +                            startTimer(&kernel_tic);
       +                        jacobiIterationNS<<<dimGridFluid, dimBlockFluid>>>(
       +                                dev_ns_epsilon,
       +                                dev_ns_epsilon_new,
       +                                dev_ns_norm,
       +                                dev_ns_f,
       +                                ns.bc_bot,
       +                                ns.bc_top,
       +                                ns.theta,
       +                                wall0_iz,
       +                                dp_dz);
       +                        cudaThreadSynchronize();
       +                        if (PROFILING == 1)
       +                            stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
       +                                    &t_jacobiIterationNS);
       +                        checkForCudaErrorsIter("Post jacobiIterationNS", iter);
       +
       +                        // set Dirichlet and Neumann BC at cells containing top wall
       +                        /*if (walls.nw > 0 && walls.wmode[0] == 1) {
       +                          setNSepsilonAtTopWall<<<dimGridFluid, dimBlockFluid>>>(
       +                          dev_ns_epsilon,
       +                          dev_ns_epsilon_new,
       +                          wall0_iz,
       +                          epsilon_value,
       +                          dp_dz);
       +                          cudaThreadSynchronize();
       +                          checkForCudaErrorsIter("Post setNSepsilonAtTopWall",
       +                          iter);
       +                          }*/
       +
       +                        // Copy new values to current values
       +                        copyValues<Float><<<dimGridFluid, dimBlockFluid>>>(
       +                                dev_ns_epsilon_new,
       +                                dev_ns_epsilon);
       +                        cudaThreadSynchronize();
       +                        checkForCudaErrorsIter
       +                            ("Post copyValues (epsilon_new->epsilon)", iter);
        
       -                setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
       -                        dev_ns_phi, ns.bc_bot, ns.bc_top);
       +#ifdef REPORT_EPSILON
       +                        std::cout << "\n###### JACOBI ITERATION "
       +                            << nijac << " after jacobiIterationNS ######"
       +                            << std::endl;
       +                        transferNSepsilonFromGlobalDeviceMemory();
       +                        printNSarray(stdout, ns.epsilon, "epsilon");
       +#endif
        
       -                setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
       -                        dev_ns_dphi, ns.bc_bot, ns.bc_top);
       +                        if (nijac % nijacnorm == 0) {
        
       -                cudaThreadSynchronize();
       -                if (PROFILING == 1)
       -                    stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
       -                            &t_setNSghostNodesDev);
       -                checkForCudaErrorsIter("Post setNSghostNodesDev", iter);
       -                /*std::cout << "\n###### EPSILON AFTER setNSghostNodesDev #####"
       -                  << std::endl;
       -                  transferNSepsilonFromGlobalDeviceMemory();
       -                  printNSarray(stdout, ns.epsilon, "epsilon");*/
       +                            // Read the normalized residuals from the device
       +                            transferNSnormFromGlobalDeviceMemory();
        
       -                // interpolate velocities to cell centers which makes velocity
       -                // prediction easier
       -                interpolateFaceToCenter<<<dimGridFluid, dimBlockFluid>>>(
       -                        dev_ns_v_x,
       -                        dev_ns_v_y,
       -                        dev_ns_v_z,
       -                        dev_ns_v);
       -                cudaThreadSynchronize();
       -                checkForCudaErrorsIter("Post interpolateFaceToCenter", iter);
       +                            // Write the normalized residuals to the terminal
       +                            //printNSarray(stdout, ns.norm, "norm");
        
       -                // Set cell-center velocity ghost nodes
       -                setNSghostNodes<Float3><<<dimGridFluid, dimBlockFluid>>>(
       -                    dev_ns_v, ns.bc_bot, ns.bc_top);
       -                cudaThreadSynchronize();
       -                checkForCudaErrorsIter("Post setNSghostNodes(v)", iter);
       -                
       -                // Find the divergence of phi*vi*v, needed for predicting the
       -                // fluid velocities
       -                if (PROFILING == 1)
       -                    startTimer(&kernel_tic);
       -                findNSdivphiviv<<<dimGridFluid, dimBlockFluid>>>(
       -                        dev_ns_phi,
       -                        dev_ns_v,
       -                        dev_ns_div_phi_vi_v);
       -                cudaThreadSynchronize();
       -                if (PROFILING == 1)
       -                    stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
       -                            &t_findNSdivphiviv);
       -                checkForCudaErrorsIter("Post findNSdivphiviv", iter);
       +                            // Find the maximum value of the normalized residuals
       +                            max_norm_res = maxNormResNS();
        
       -                // Set cell-center ghost nodes
       -                setNSghostNodes<Float3><<<dimGridFluid, dimBlockFluid>>>(
       -                    dev_ns_div_phi_vi_v, ns.bc_bot, ns.bc_top);
       -                cudaThreadSynchronize();
       -                checkForCudaErrorsIter("Post setNSghostNodes(div_phi_vi_v)",
       -                                       iter);
       +                            // Write the Jacobi iteration number and maximum value
       +                            // of the normalized residual to the log file
       +                            if (write_res_log == 1)
       +                                reslog << nijac << '\t' << max_norm_res
       +                                    << std::endl;
       +                        }
        
       -                // Predict the fluid velocities on the base of the old pressure
       -                // field and ignoring the incompressibility constraint
       -                if (PROFILING == 1)
       -                    startTimer(&kernel_tic);
       -                findPredNSvelocities<<<dimGridFluidFace, dimBlockFluidFace>>>(
       -                        dev_ns_p,
       -                        dev_ns_v_x,
       -                        dev_ns_v_y,
       -                        dev_ns_v_z,
       -                        dev_ns_phi,
       -                        dev_ns_dphi,
       -                        dev_ns_div_tau_x,
       -                        dev_ns_div_tau_y,
       -                        dev_ns_div_tau_z,
       -                        dev_ns_div_phi_vi_v,
       -                        ns.bc_bot,
       -                        ns.bc_top,
       -                        ns.beta,
       -                        dev_ns_F_pf,
       -                        ns.ndem,
       -                        wall0_iz,
       -                        ns.c_v,
       -                        dev_ns_v_p_x,
       -                        dev_ns_v_p_y,
       -                        dev_ns_v_p_z);
       -                cudaThreadSynchronize();
       -                if (PROFILING == 1)
       -                    stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
       -                            &t_findPredNSvelocities);
       -                checkForCudaErrorsIter("Post findPredNSvelocities", iter);
       +                        if (max_norm_res < ns.tolerance) {
        
       -                setNSghostNodesFace<Float>
       -                    <<<dimGridFluidFace, dimBlockFluidFace>>>(
       -                        dev_ns_v_p_x,
       -                        dev_ns_v_p_y,
       -                        dev_ns_v_p_z,
       -                        ns.bc_bot, ns.bc_top);
       -                cudaThreadSynchronize();
       -                checkForCudaErrorsIter(
       -                        "Post setNSghostNodesFace(dev_ns_v_p)", iter);
       +                            if (write_conv_log == 1 && iter % conv_log_interval == 0)
       +                                convlog << iter+1 << '\t' << nijac << std::endl;
        
       -                interpolateFaceToCenter<<<dimGridFluid, dimBlockFluid>>>(
       -                        dev_ns_v_p_x,
       -                        dev_ns_v_p_y,
       -                        dev_ns_v_p_z,
       -                        dev_ns_v_p);
       -                cudaThreadSynchronize();
       -                checkForCudaErrorsIter("Post interpolateFaceToCenter", iter);
       +                            setNSghostNodes<Float>
       +                                <<<dimGridFluid, dimBlockFluid>>>(
       +                                        dev_ns_epsilon,
       +                                        ns.bc_bot, ns.bc_top);
       +                            cudaThreadSynchronize();
       +                            checkForCudaErrorsIter
       +                                ("Post setNSghostNodesEpsilon(4)", iter);
        
       +                            // Apply smoothing if requested
       +                            if (ns.gamma > 0.0) {
       +
       +                                smoothing<<<dimGridFluid, dimBlockFluid>>>(
       +                                        dev_ns_epsilon,
       +                                        ns.gamma,
       +                                        ns.bc_bot, ns.bc_top);
       +                                cudaThreadSynchronize();
       +                                checkForCudaErrorsIter("Post smoothing", iter);
       +
       +                                setNSghostNodes<Float>
       +                                    <<<dimGridFluid, dimBlockFluid>>>(
       +                                            dev_ns_epsilon,
       +                                            ns.bc_bot, ns.bc_top);
       +                                cudaThreadSynchronize();
       +                                checkForCudaErrorsIter
       +                                    ("Post setNSghostNodesEpsilon(4)", iter);
       +                            }
        
       -                // In the first iteration of the sphere program, we'll need to
       -                // manually estimate the values of epsilon. In the subsequent
       -                // iterations, the previous values are  used.
       -                if (iter == 0) {
       +#ifdef REPORT_EPSILON
       +                            std::cout << "\n###### JACOBI ITERATION "
       +                                << nijac << " after smoothing ######"
       +                                << std::endl;
       +                            transferNSepsilonFromGlobalDeviceMemory();
       +                            printNSarray(stdout, ns.epsilon, "epsilon");
       +#endif
        
       -                    // Define the first estimate of the values of epsilon.
       -                    // The initial guess depends on the value of ns.beta.
       -                    Float pressure = ns.p[idx(2,2,2)];
       -                    Float pressure_new = pressure; // Guess p_current = p_new
       -                    Float epsilon_value = pressure_new - ns.beta*pressure;
       +                            break;  // solution has converged, exit Jacobi loop
       +                        }
       +
       +                        if (nijac >= ns.maxiter-1) {
       +
       +                            if (write_conv_log == 1)
       +                                convlog << iter+1 << '\t' << nijac << std::endl;
       +
       +                            std::cerr << "\nIteration " << iter << ", time " 
       +                                << iter*time.dt << " s: "
       +                                "Error, the epsilon solution in the fluid "
       +                                "calculations did not converge. Try increasing the "
       +                                "value of 'ns.maxiter' (" << ns.maxiter
       +                                << ") or increase 'ns.tolerance' ("
       +                                << ns.tolerance << ")." << std::endl;
       +                        }
       +                        //break; // end after Jacobi first iteration
       +                    } // end Jacobi iteration loop
       +
       +                    if (write_res_log == 1)
       +                        reslog.close();
       +
       +                    // Find the new pressures and velocities
                            if (PROFILING == 1)
                                startTimer(&kernel_tic);
       -                    setNSepsilonInterior<<<dimGridFluid, dimBlockFluid>>>(
       -                            dev_ns_epsilon, epsilon_value);
       -                    cudaThreadSynchronize();
        
       -                    setNSnormZero<<<dimGridFluid, dimBlockFluid>>>(dev_ns_norm);
       +                    updateNSpressure<<<dimGridFluid, dimBlockFluid>>>(
       +                            dev_ns_epsilon,
       +                            ns.beta,
       +                            dev_ns_p);
                            cudaThreadSynchronize();
       +                    checkForCudaErrorsIter("Post updateNSpressure", iter);
        
       +                    updateNSvelocity<<<dimGridFluidFace, dimBlockFluidFace>>>(
       +                            dev_ns_v_p_x,
       +                            dev_ns_v_p_y,
       +                            dev_ns_v_p_z,
       +                            dev_ns_phi,
       +                            dev_ns_epsilon,
       +                            ns.beta,
       +                            ns.bc_bot,
       +                            ns.bc_top,
       +                            ns.ndem,
       +                            ns.c_v,
       +                            ns.rho_f,
       +                            wall0_iz,
       +                            iter,
       +                            dev_ns_v_x,
       +                            dev_ns_v_y,
       +                            dev_ns_v_z);
       +                    cudaThreadSynchronize();
                            if (PROFILING == 1)
                                stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
       -                                &t_setNSepsilon);
       -                    checkForCudaErrorsIter("Post setNSepsilonInterior", iter);
       +                                &t_updateNSvelocityPressure);
       +                    checkForCudaErrorsIter("Post updateNSvelocity", iter);
       +
       +                    setNSghostNodesFace<Float>
       +                        <<<dimGridFluidFace, dimBlockFluidFace>>>(
       +                                dev_ns_v_p_x,
       +                                dev_ns_v_p_y,
       +                                dev_ns_v_p_z,
       +                                ns.bc_bot, ns.bc_top);
       +                    cudaThreadSynchronize();
       +                    checkForCudaErrorsIter(
       +                            "Post setNSghostNodesFace(dev_ns_v)", iter);
       +
       +                    interpolateFaceToCenter<<<dimGridFluid, dimBlockFluid>>>(
       +                            dev_ns_v_x,
       +                            dev_ns_v_y,
       +                            dev_ns_v_z,
       +                            dev_ns_v);
       +                    cudaThreadSynchronize();
       +                    checkForCudaErrorsIter("Post interpolateFaceToCenter", iter);
       +                } // end iter % ns.dem == 0
       +            } // end cfd_solver == 0
        
       -#ifdef REPORT_MORE_EPSILON
       +            // Darcy solution
       +            else if (cfd_solver == 1) { 
       +
       +#if defined(REPORT_EPSILON) || defined(REPORT_FORCING_TERMS)
                            std::cout
       -                        << "\n###### EPSILON AFTER setNSepsilonInterior "
       -                        << "######" << std::endl;
       -                    transferNSepsilonFromGlobalDeviceMemory();
       -                    printNSarray(stdout, ns.epsilon, "epsilon");
       +                        << "\n\n@@@@@@ TIME STEP " << iter << " @@@"
       +                        << std::endl;
        #endif
        
       -                    // Set the epsilon values at the lower boundary
       -                    pressure = ns.p[idx(0,0,0)];
       -                    pressure_new = pressure; // Guess p_current = p_new
       -                    epsilon_value = pressure_new - ns.beta*pressure;
       +                if (PROFILING == 1)
       +                    startTimer(&kernel_tic);
       +                findDarcyPorosities<<<dimGridFluid, dimBlockFluid>>>(
       +                        dev_cellStart,
       +                        dev_cellEnd,
       +                        dev_x_sorted,
       +                        iter,
       +                        np,
       +                        darcy.c_phi,
       +                        dev_darcy_phi,
       +                        dev_darcy_dphi);
       +                cudaThreadSynchronize();
       +                if (PROFILING == 1)
       +                    stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
       +                            &t_findDarcyPorosities);
       +                checkForCudaErrorsIter("Post findDarcyPorosities", iter);
       +
       +                if (np > 0) {
       +
                            if (PROFILING == 1)
                                startTimer(&kernel_tic);
       -                    setNSepsilonBottom<<<dimGridFluid, dimBlockFluid>>>(
       -                            dev_ns_epsilon,
       -                            dev_ns_epsilon_new,
       -                            epsilon_value);
       +                    setDarcyGhostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
       +                            dev_darcy_p, darcy.bc_bot, darcy.bc_top);
                            cudaThreadSynchronize();
                            if (PROFILING == 1)
                                stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
       -                                &t_setNSdirichlet);
       -                    checkForCudaErrorsIter("Post setNSepsilonBottom", iter);
       -
       -#ifdef REPORT_MORE_EPSILON
       -                    std::cout
       -                        << "\n###### EPSILON AFTER setNSepsilonBottom "
       -                        << "######" << std::endl;
       -                    transferNSepsilonFromGlobalDeviceMemory();
       -                    printNSarray(stdout, ns.epsilon, "epsilon");
       -#endif
       +                                &t_setDarcyGhostNodes);
       +                    checkForCudaErrorsIter("Post setDarcyGhostNodes("
       +                            "dev_darcy_p) before findDarcyPressureForce", iter);
        
       -                    /*setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
       -                      dev_ns_epsilon);
       -                      cudaThreadSynchronize();
       -                      checkForCudaErrors("Post setNSghostNodesFloat(dev_ns_epsilon)",
       -                      iter);*/
       -                    setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
       -                            dev_ns_epsilon,
       -                            ns.bc_bot, ns.bc_top);
       +                    if (PROFILING == 1)
       +                        startTimer(&kernel_tic);
       +                    findDarcyPressureForce<<<dimGrid, dimBlock>>>(
       +                            dev_x,
       +                            dev_darcy_p,
       +                            dev_darcy_phi,
       +                            dev_force,
       +                            dev_darcy_f_p);
                            cudaThreadSynchronize();
       -                    checkForCudaErrorsIter("Post setNSghostNodesEpsilon(1)",
       +                    if (PROFILING == 1)
       +                        stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
       +                                &t_findDarcyPressureForce);
       +                    checkForCudaErrorsIter("Post findDarcyPressureForce",
                                    iter);
       +                }
        
       -#ifdef REPORT_MORE_EPSILON
       -                    std::cout <<
       -                        "\n###### EPSILON AFTER setNSghostNodes(epsilon) "
       -                        << "######" << std::endl;
       -                    transferNSepsilonFromGlobalDeviceMemory();
       -                    printNSarray(stdout, ns.epsilon, "epsilon");
       -#endif
       +                // Modulate the pressures at the upper boundary cells
       +                if ((darcy.p_mod_A > 1.0e-5 || darcy.p_mod_A < -1.0e-5) &&
       +                        darcy.p_mod_f > 1.0e-7) {
       +                    // original pressure
       +                    Float new_pressure = darcy.p[d_idx(0,0,darcy.nz-1)] //orig p
       +                        + darcy.p_mod_A*sin(2.0*M_PI*darcy.p_mod_f*time.current
       +                                + darcy.p_mod_phi);
       +                    if (PROFILING == 1)
       +                        startTimer(&kernel_tic);
       +                    setDarcyTopPressure<<<dimGridFluid, dimBlockFluid>>>(
       +                            new_pressure,
       +                            dev_darcy_p);
       +                    if (PROFILING == 1)
       +                        stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
       +                                &t_setDarcyTopPressure);
       +                    cudaThreadSynchronize();
       +                    checkForCudaErrorsIter("Post setUpperPressureNS", iter);
       +                }
       +
       +                if (walls.nw > 0 && walls.wmode[0] == 1) {
       +                    wall0_iz = walls.nx->w/(grid.L[2]/grid.num[2]);
       +                    /*setDarcyTopWallPressure<<<dimGridFluid, dimBlockFluid>>>(
       +                            new_pressure,
       +                            wall0_iz,
       +                            dev_darcy_p);
       +                    cudaThreadSynchronize();
       +                    checkForCudaErrorsIter("Post setDarcyTopWallPressure",
       +                            iter);*/
       +                }
       +
       +                if (PROFILING == 1)
       +                    startTimer(&kernel_tic);
       +                findDarcyPermeabilities<<<dimGridFluid, dimBlockFluid>>>(
       +                        darcy.k_c, dev_darcy_phi, dev_darcy_k);
       +                cudaThreadSynchronize();
       +                if (PROFILING == 1)
       +                    stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
       +                            &t_findDarcyPermeabilities);
       +                checkForCudaErrorsIter("Post findDarcyPermeabilities",
       +                        iter);
       +
       +                if (PROFILING == 1)
       +                    startTimer(&kernel_tic);
       +                setDarcyGhostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
       +                        dev_darcy_k, darcy.bc_bot, darcy.bc_top);
       +                cudaThreadSynchronize();
       +                if (PROFILING == 1)
       +                    stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
       +                            &t_setDarcyGhostNodes);
       +                checkForCudaErrorsIter("Post setDarcyGhostNodes(dev_darcy_k)",
       +                        iter);
       +
       +                if (PROFILING == 1)
       +                    startTimer(&kernel_tic);
       +                findDarcyPermeabilityGradients<<<dimGridFluid, dimBlockFluid>>>(
       +                        dev_darcy_k, dev_darcy_grad_k);
       +                cudaThreadSynchronize();
       +                if (PROFILING == 1)
       +                    stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
       +                            &t_findDarcyPermeabilityGradients);
       +                checkForCudaErrorsIter("Post findDarcyPermeabilityGradients",
       +                        iter);
       +
       +                if (iter == 0) {
       +                    setDarcyNormZero<<<dimGridFluid, dimBlockFluid>>>(
       +                            dev_darcy_norm);
       +                    cudaThreadSynchronize();
       +                    checkForCudaErrorsIter("Post setDarcyNormZero", iter);
       +
       +                    if (PROFILING == 1)
       +                        startTimer(&kernel_tic);
       +                    copyValues<Float><<<dimGridFluid, dimBlockFluid>>>(
       +                            dev_darcy_p,
       +                            dev_darcy_p_old);
       +                    cudaThreadSynchronize();
       +                    if (PROFILING == 1)
       +                        stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
       +                                &t_copyValues);
       +                    checkForCudaErrorsIter("Post copyValues(p -> p_old)",
       +                            iter);
                        }
        
       +                /*if (PROFILING == 1)
       +                    startTimer(&kernel_tic);
       +                findDarcyPressureChange<<<dimGridFluid, dimBlockFluid>>>(
       +                        dev_darcy_p_old,
       +                        dev_darcy_p,
       +                        iter,
       +                        dev_darcy_dpdt);
       +                cudaThreadSynchronize();
       +                if (PROFILING == 1)
       +                    stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
       +                            &t_findDarcyPressureChange);
       +                checkForCudaErrorsIter("Post findDarcyPressureChange", iter);*/
       +
                        // Solve the system of epsilon using a Jacobi iterative solver.
                        // The average normalized residual is initialized to a large
                        // value.
                        //double avg_norm_res;
                        double max_norm_res;
        
       -                // Write a log file of the normalized residuals during the Jacobi
       -                // iterations
       +                // Write a log file of the normalized residuals during the
       +                // Jacobi iterations
                        std::ofstream reslog;
                        if (write_res_log == 1)
                            reslog.open("max_res_norm.dat");
        
       -                // transfer normalized residuals from GPU to CPU
       -#ifdef REPORT_MORE_EPSILON
       -                std::cout << "\n###### BEFORE FIRST JACOBI ITERATION ######"
       -                    << "\n@@@@@@ TIME STEP " << iter << " @@@@@@"
       -                    << std::endl;
       -                transferNSepsilonFromGlobalDeviceMemory();
       -                printNSarray(stdout, ns.epsilon, "epsilon");
       -#endif
       +                for (unsigned int nijac = 0; nijac<darcy.maxiter; ++nijac) {
        
       -                for (unsigned int nijac = 0; nijac<ns.maxiter; ++nijac) {
       -
       -                    //printf("### Jacobi iteration %d\n", nijac);
       +                    if (nijac == 0) {
       +                        if (PROFILING == 1)
       +                            startTimer(&kernel_tic);
       +                        copyValues<Float><<<dimGridFluid, dimBlockFluid>>>(
       +                                dev_darcy_p,
       +                                dev_darcy_p_old);
       +                        cudaThreadSynchronize();
       +                        if (PROFILING == 1)
       +                            stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
       +                                    &t_copyValues);
       +                        checkForCudaErrorsIter("Post copyValues(p -> p_old)",
       +                                iter);
       +                    }
        
       -                    // Only grad(epsilon) changes during the Jacobi iterations.
       -                    // The remaining terms of the forcing function are only
       -                    // calculated during the first iteration.
                            if (PROFILING == 1)
                                startTimer(&kernel_tic);
       -                    findNSforcing<<<dimGridFluid, dimBlockFluid>>>(
       -                            dev_ns_epsilon,
       -                            dev_ns_phi,
       -                            dev_ns_dphi,
       -                            dev_ns_v_p,
       -                            dev_ns_v_p_x,
       -                            dev_ns_v_p_y,
       -                            dev_ns_v_p_z,
       -                            nijac,
       -                            ns.ndem,
       -                            ns.c_v,
       -                            dev_ns_f1,
       -                            dev_ns_f2,
       -                            dev_ns_f);
       +                    setDarcyGhostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
       +                            dev_darcy_p, darcy.bc_bot, darcy.bc_top);
                            cudaThreadSynchronize();
                            if (PROFILING == 1)
                                stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
       -                                &t_findNSforcing);
       -                    checkForCudaErrorsIter("Post findNSforcing", iter);
       -                    /*setNSghostNodesForcing<<<dimGridFluid, dimBlockFluid>>>(
       -                      dev_ns_f1,
       -                      dev_ns_f2,
       -                      dev_ns_f,
       -                      nijac);
       -                      cudaThreadSynchronize();
       -                      checkForCudaErrors("Post setNSghostNodesForcing", iter);*/
       -
       -                    setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
       -                            dev_ns_epsilon,
       -                            ns.bc_bot, ns.bc_top);
       -                    cudaThreadSynchronize();
       -                    checkForCudaErrorsIter("Post setNSghostNodesEpsilon(2)",
       -                            iter);
       -
       -#ifdef REPORT_EPSILON
       -                    std::cout << "\n###### JACOBI ITERATION "
       -                        << nijac
       -                        << " after setNSghostNodes(epsilon,2) ######"
       -                        << std::endl;
       -                    transferNSepsilonFromGlobalDeviceMemory();
       -                    printNSarray(stdout, ns.epsilon, "epsilon");
       -#endif
       +                                &t_setDarcyGhostNodes);
       +                    checkForCudaErrorsIter("Post setDarcyGhostNodes("
       +                            "dev_darcy_p) in Jacobi loop", iter);
        
       -                    // Perform a single Jacobi iteration
                            if (PROFILING == 1)
                                startTimer(&kernel_tic);
       -                    jacobiIterationNS<<<dimGridFluid, dimBlockFluid>>>(
       -                            dev_ns_epsilon,
       -                            dev_ns_epsilon_new,
       -                            dev_ns_norm,
       -                            dev_ns_f,
       -                            ns.bc_bot,
       -                            ns.bc_top,
       -                            ns.theta,
       +                    updateDarcySolution<<<dimGridFluid, dimBlockFluid>>>(
       +                            dev_darcy_p_old,
       +                            //dev_darcy_dpdt,
       +                            dev_darcy_p,
       +                            dev_darcy_k,
       +                            dev_darcy_phi,
       +                            dev_darcy_dphi,
       +                            dev_darcy_grad_k,
       +                            darcy.beta_f,
       +                            darcy.mu,
       +                            darcy.bc_bot,
       +                            darcy.bc_top,
       +                            darcy.ndem,
                                    wall0_iz,
       -                            dp_dz);
       +                            dev_darcy_p_new,
       +                            dev_darcy_norm);
                            cudaThreadSynchronize();
                            if (PROFILING == 1)
                                stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
       -                                &t_jacobiIterationNS);
       -                    checkForCudaErrorsIter("Post jacobiIterationNS", iter);
       -
       -                    // set Dirichlet and Neumann BC at cells containing top wall
       -                    /*if (walls.nw > 0 && walls.wmode[0] == 1) {
       -                        setNSepsilonAtTopWall<<<dimGridFluid, dimBlockFluid>>>(
       -                                dev_ns_epsilon,
       -                                dev_ns_epsilon_new,
       -                                wall0_iz,
       -                                epsilon_value,
       -                                dp_dz);
       -                        cudaThreadSynchronize();
       -                        checkForCudaErrorsIter("Post setNSepsilonAtTopWall",
       -                                iter);
       -                    }*/
       +                                &t_updateDarcySolution);
       +                    checkForCudaErrorsIter("Post updateDarcySolution", iter);
        
                            // Copy new values to current values
       +                    if (PROFILING == 1)
       +                        startTimer(&kernel_tic);
                            copyValues<Float><<<dimGridFluid, dimBlockFluid>>>(
       -                            dev_ns_epsilon_new,
       -                            dev_ns_epsilon);
       +                            dev_darcy_p_new,
       +                            dev_darcy_p);
                            cudaThreadSynchronize();
       -                    checkForCudaErrorsIter
       -                        ("Post copyValues (epsilon_new->epsilon)", iter);
       +                    if (PROFILING == 1)
       +                        stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
       +                                &t_copyValues);
       +                    checkForCudaErrorsIter("Post copyValues(p_new -> p)", iter);
        
        #ifdef REPORT_EPSILON
                            std::cout << "\n###### JACOBI ITERATION "
       -                        << nijac << " after jacobiIterationNS ######"
       -                        << std::endl;
       -                    transferNSepsilonFromGlobalDeviceMemory();
       -                    printNSarray(stdout, ns.epsilon, "epsilon");
       +                        << nijac << " after copyValues ######" << std::endl;
       +                    transferDarcyPressuresFromGlobalDeviceMemory();
       +                    printDarcyArray(stdout, darcy.p, "p");
        #endif
        
                            if (nijac % nijacnorm == 0) {
       -
                                // Read the normalized residuals from the device
       -                        transferNSnormFromGlobalDeviceMemory();
       +                        transferDarcyNormFromGlobalDeviceMemory();
        
                                // Write the normalized residuals to the terminal
       -                        //printNSarray(stdout, ns.norm, "norm");
       +                        //printDarcyArray(stdout, darcy.norm, "norm");
        
                                // Find the maximum value of the normalized residuals
       -                        max_norm_res = maxNormResNS();
       +                        max_norm_res = maxNormResDarcy();
        
                                // Write the Jacobi iteration number and maximum value
                                // of the normalized residual to the log file
                                if (write_res_log == 1)
                                    reslog << nijac << '\t' << max_norm_res
                                        << std::endl;
       -                    }
        
       -                    if (max_norm_res < ns.tolerance) {
       +                        if (max_norm_res <= darcy.tolerance) {
       +                            if (write_conv_log == 1
       +                                    && iter % conv_log_interval == 0)
       +                                convlog << iter+1 << '\t' << nijac << std::endl;
        
       -                        if (write_conv_log == 1 && iter % conv_log_interval == 0)
       -                            convlog << iter << '\t' << nijac << std::endl;
       -
       -                        setNSghostNodes<Float>
       -                            <<<dimGridFluid, dimBlockFluid>>>(
       -                                dev_ns_epsilon,
       -                                ns.bc_bot, ns.bc_top);
       -                        cudaThreadSynchronize();
       -                        checkForCudaErrorsIter
       -                            ("Post setNSghostNodesEpsilon(4)", iter);
       -
       -                        // Apply smoothing if requested
       -                        if (ns.gamma > 0.0) {
       -
       -                            smoothing<<<dimGridFluid, dimBlockFluid>>>(
       -                                    dev_ns_epsilon,
       -                                    ns.gamma,
       -                                    ns.bc_bot, ns.bc_top);
       -                            cudaThreadSynchronize();
       -                            checkForCudaErrorsIter("Post smoothing", iter);
       -
       -                            setNSghostNodes<Float>
       -                                <<<dimGridFluid, dimBlockFluid>>>(
       -                                    dev_ns_epsilon,
       -                                    ns.bc_bot, ns.bc_top);
       -                            cudaThreadSynchronize();
       -                            checkForCudaErrorsIter
       -                                ("Post setNSghostNodesEpsilon(4)", iter);
       +                            break;  // solution has converged, exit Jacobi loop
                                }
       -
       -#ifdef REPORT_EPSILON
       -                        std::cout << "\n###### JACOBI ITERATION "
       -                            << nijac << " after smoothing ######"
       -                            << std::endl;
       -                        transferNSepsilonFromGlobalDeviceMemory();
       -                        printNSarray(stdout, ns.epsilon, "epsilon");
       -#endif
       -
       -                        break;  // solution has converged, exit Jacobi loop
                            }
        
       -                    if (nijac >= ns.maxiter-1) {
       +                    if (nijac == darcy.maxiter-1) {
        
                                if (write_conv_log == 1)
       -                            convlog << iter << '\t' << nijac << std::endl;
       +                            convlog << iter+1 << '\t' << nijac << std::endl;
        
                                std::cerr << "\nIteration " << iter << ", time " 
                                    << iter*time.dt << " s: "
       -                            "Error, the epsilon solution in the fluid "
       -                            "calculations did not converge. Try increasing the "
       -                            "value of 'ns.maxiter' (" << ns.maxiter
       -                            << ") or increase 'ns.tolerance' ("
       -                            << ns.tolerance << ")." << std::endl;
       +                            "Error, the pressure solution in the fluid "
       +                            "calculations did not converge. Try increasing "
       +                            "the value of 'darcy.maxiter' ("
       +                            << darcy.maxiter
       +                            << ") or increase 'darcy.tolerance' ("
       +                            << darcy.tolerance << ")." << std::endl;
                            }
       -                    //break; // end after Jacobi first iteration
       -                } // end Jacobi iteration loop
        
       -                if (write_res_log == 1)
       -                    reslog.close();
       +                    if (write_res_log == 1)
       +                        reslog.close();
       +
       +                    //break; // end after first iteration
       +                }
        
       -                // Find the new pressures and velocities
                        if (PROFILING == 1)
                            startTimer(&kernel_tic);
       -
       -                updateNSpressure<<<dimGridFluid, dimBlockFluid>>>(
       -                        dev_ns_epsilon,
       -                        ns.beta,
       -                        dev_ns_p);
       -                cudaThreadSynchronize();
       -                checkForCudaErrorsIter("Post updateNSpressure", iter);
       -
       -                updateNSvelocity<<<dimGridFluidFace, dimBlockFluidFace>>>(
       -                        dev_ns_v_p_x,
       -                        dev_ns_v_p_y,
       -                        dev_ns_v_p_z,
       -                        dev_ns_phi,
       -                        dev_ns_epsilon,
       -                        ns.beta,
       -                        ns.bc_bot,
       -                        ns.bc_top,
       -                        ns.ndem,
       -                        ns.c_v,
       -                        wall0_iz,
       -                        iter,
       -                        dev_ns_v_x,
       -                        dev_ns_v_y,
       -                        dev_ns_v_z);
       +                setDarcyGhostNodes<Float> <<<dimGridFluid, dimBlockFluid>>>
       +                    (dev_darcy_p, darcy.bc_bot, darcy.bc_top);
                        cudaThreadSynchronize();
                        if (PROFILING == 1)
                            stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
       -                            &t_updateNSvelocityPressure);
       -                checkForCudaErrorsIter("Post updateNSvelocity", iter);
       -
       -                setNSghostNodesFace<Float>
       -                    <<<dimGridFluidFace, dimBlockFluidFace>>>(
       -                        dev_ns_v_p_x,
       -                        dev_ns_v_p_y,
       -                        dev_ns_v_p_z,
       -                        ns.bc_bot, ns.bc_top);
       -                cudaThreadSynchronize();
       -                checkForCudaErrorsIter(
       -                        "Post setNSghostNodesFace(dev_ns_v)", iter);
       +                            &t_setDarcyGhostNodes);
       +                checkForCudaErrorsIter("Post setDarcyGhostNodes("
       +                        "dev_darcy_p) after Jacobi loop", iter);
        
       -                interpolateFaceToCenter<<<dimGridFluid, dimBlockFluid>>>(
       -                        dev_ns_v_x,
       -                        dev_ns_v_y,
       -                        dev_ns_v_z,
       -                        dev_ns_v);
       +                if (PROFILING == 1)
       +                    startTimer(&kernel_tic);
       +                findDarcyVelocities<<<dimGridFluid, dimBlockFluid>>>(
       +                        dev_darcy_p,
       +                        dev_darcy_phi,
       +                        dev_darcy_k,
       +                        darcy.mu,
       +                        dev_darcy_v);
                        cudaThreadSynchronize();
       -                checkForCudaErrorsIter("Post interpolateFaceToCenter", iter);
       -            } // end iter % ns.dem == 0
       -        } // end navierstokes == 1
       +                if (PROFILING == 1)
       +                    stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
       +                            &t_findDarcyVelocities);
       +                checkForCudaErrorsIter("Post findDarcyVelocities", iter);
       +            }
       +        }
       +        //break; // end after first iteration
        
                if (np > 0) {
                    // Update particle kinematics
       t@@ -1777,7 +2144,7 @@ __host__ void DEM::startTime()
                    checkForCudaErrorsIter("Beginning of file output section", iter);
        
                    // v_x, v_y, v_z -> v
       -            if (navierstokes == 1) {
       +            if (fluid == 1 && cfd_solver == 0) {
                        interpolateFaceToCenter<<<dimGridFluid, dimBlockFluid>>>(
                                dev_ns_v_x,
                                dev_ns_v_y,
       t@@ -1797,7 +2164,7 @@ __host__ void DEM::startTime()
                    cudaThreadSynchronize();
        
                    // Check the numerical stability of the NS solver
       -            if (navierstokes == 1)
       +            if (fluid == 1 && cfd_solver == 0)
                        checkNSstability();
        
                    // Write binary output file
       t@@ -1812,7 +2179,7 @@ __host__ void DEM::startTime()
                    printNSarray(stdout, ns.epsilon, "epsilon");*/
        
                    // Write fluid arrays
       -            /*if (navierstokes == 1) {
       +            /*if (fluid == 1 && cfd_solver == 0) {
                        sprintf(file,"output/%s.ns_phi.output%05d.bin", sid.c_str(),
                            time.step_count);
                        writeNSarray(ns.phi, file);
       t@@ -1904,7 +2271,12 @@ __host__ void DEM::startTime()
                    t_findNSstressTensor +
                    t_findNSdivphiviv + t_findNSdivtau + t_findPredNSvelocities +
                    t_setNSepsilon + t_setNSdirichlet + t_setNSghostNodesDev +
       -            t_findNSforcing + t_jacobiIterationNS + t_updateNSvelocityPressure;
       +            t_findNSforcing + t_jacobiIterationNS + t_updateNSvelocityPressure +
       +            t_findDarcyPorosities + t_setDarcyGhostNodes +
       +            t_findDarcyPressureForce + t_setDarcyTopPressure +
       +            t_findDarcyPermeabilities + t_findDarcyPermeabilityGradients +
       +            //t_findDarcyPressureChange +
       +            t_updateDarcySolution + t_copyValues + t_findDarcyVelocities;
        
                cout << "\nKernel profiling statistics:\n"
                    << "  - calcParticleCellID:\t\t" << t_calcParticleCellID/1000.0
       t@@ -1931,34 +2303,67 @@ __host__ void DEM::startTime()
                    << "\t(" << 100.0*t_summation/t_sum << " %)\n"
                    << "  - integrateWalls:\t\t" << t_integrateWalls/1000.0 << " s"
                    << "\t(" << 100.0*t_integrateWalls/t_sum << " %)\n";
       -        if (navierstokes == 1) {
       +        if (fluid == 1 && cfd_solver == 0) {
                    cout << "  - findPorositiesDev:\t\t" << t_findPorositiesDev/1000.0
       -            << " s" << "\t(" << 100.0*t_findPorositiesDev/t_sum << " %)\n"
       -            << "  - findNSstressTensor:\t\t" << t_findNSstressTensor/1000.0
       -            << " s" << "\t(" << 100.0*t_findNSstressTensor/t_sum << " %)\n"
       -            << "  - findNSdivphiviv:\t\t" << t_findNSdivphiviv/1000.0
       -            << " s" << "\t(" << 100.0*t_findNSdivphiviv/t_sum << " %)\n"
       -            << "  - findNSdivtau:\t\t" << t_findNSdivtau/1000.0
       -            << " s" << "\t(" << 100.0*t_findNSdivtau/t_sum << " %)\n"
       -            << "  - findPredNSvelocities:\t" << t_findPredNSvelocities/1000.0
       -            << " s" << "\t(" << 100.0*t_findPredNSvelocities/t_sum << " %)\n"
       -            << "  - setNSepsilon:\t\t" << t_setNSepsilon/1000.0
       -            << " s" << "\t(" << 100.0*t_setNSepsilon/t_sum << " %)\n"
       -            << "  - setNSdirichlet:\t\t" << t_setNSdirichlet/1000.0
       -            << " s" << "\t(" << 100.0*t_setNSdirichlet/t_sum << " %)\n"
       -            << "  - setNSghostNodesDev:\t\t" << t_setNSghostNodesDev/1000.0
       -            << " s" << "\t(" << 100.0*t_setNSghostNodesDev/t_sum << " %)\n"
       -            << "  - findNSforcing:\t\t" << t_findNSforcing/1000.0 << " s"
       -            << "\t(" << 100.0*t_findNSforcing/t_sum << " %)\n"
       -            << "  - jacobiIterationNS:\t\t" << t_jacobiIterationNS/1000.0 << " s"
       -            << "\t(" << 100.0*t_jacobiIterationNS/t_sum << " %)\n"
       -            << "  - updateNSvelocityPressure:\t"
       -            << t_updateNSvelocityPressure/1000.0 << " s"
       -            << "\t(" << 100.0*t_updateNSvelocityPressure/t_sum << " %)\n";
       +                << " s" << "\t(" << 100.0*t_findPorositiesDev/t_sum << " %)\n"
       +                << "  - findNSstressTensor:\t\t" << t_findNSstressTensor/1000.0
       +                << " s" << "\t(" << 100.0*t_findNSstressTensor/t_sum << " %)\n"
       +                << "  - findNSdivphiviv:\t\t" << t_findNSdivphiviv/1000.0
       +                << " s" << "\t(" << 100.0*t_findNSdivphiviv/t_sum << " %)\n"
       +                << "  - findNSdivtau:\t\t" << t_findNSdivtau/1000.0
       +                << " s" << "\t(" << 100.0*t_findNSdivtau/t_sum << " %)\n"
       +                << "  - findPredNSvelocities:\t" <<
       +                t_findPredNSvelocities/1000.0 << " s" << "\t(" <<
       +                100.0*t_findPredNSvelocities/t_sum << " %)\n"
       +                << "  - setNSepsilon:\t\t" << t_setNSepsilon/1000.0
       +                << " s" << "\t(" << 100.0*t_setNSepsilon/t_sum << " %)\n"
       +                << "  - setNSdirichlet:\t\t" << t_setNSdirichlet/1000.0
       +                << " s" << "\t(" << 100.0*t_setNSdirichlet/t_sum << " %)\n"
       +                << "  - setNSghostNodesDev:\t\t" << t_setNSghostNodesDev/1000.0
       +                << " s" << "\t(" << 100.0*t_setNSghostNodesDev/t_sum << " %)\n"
       +                << "  - findNSforcing:\t\t" << t_findNSforcing/1000.0 << " s"
       +                << "\t(" << 100.0*t_findNSforcing/t_sum << " %)\n"
       +                << "  - jacobiIterationNS:\t\t" << t_jacobiIterationNS/1000.0
       +                << " s"
       +                << "\t(" << 100.0*t_jacobiIterationNS/t_sum << " %)\n"
       +                << "  - updateNSvelocityPressure:\t"
       +                << t_updateNSvelocityPressure/1000.0 << " s"
       +                << "\t(" << 100.0*t_updateNSvelocityPressure/t_sum << " %)\n";
       +        } else if (fluid == 1 && cfd_solver == 1) {
       +            cout << "  - findDarcyPorosities:\t" <<
       +                t_findDarcyPorosities/1000.0 << " s" << "\t(" <<
       +                100.0*t_findDarcyPorosities/t_sum << " %)\n"
       +                << "  - setDarcyGhostNodes:\t\t" <<
       +                t_setDarcyGhostNodes/1000.0 << " s" << "\t(" <<
       +                100.0*t_setDarcyGhostNodes/t_sum << " %)\n"
       +                << "  - findDarcyPressureForce:\t" <<
       +                t_findDarcyPressureForce/1000.0 << " s" << "\t(" <<
       +                100.0*t_findDarcyPressureForce/t_sum << " %)\n"
       +                << "  - setDarcyTopPressure:\t" <<
       +                t_setDarcyTopPressure/1000.0 << " s" << "\t(" <<
       +                100.0*t_setDarcyTopPressure/t_sum << " %)\n"
       +                << "  - findDarcyPermeabilities:\t" <<
       +                t_findDarcyPermeabilities/1000.0 << " s" << "\t(" <<
       +                100.0*t_findDarcyPermeabilities/t_sum << " %)\n"
       +                << "  - findDarcyPermeabilityGrads:\t" <<
       +                t_findDarcyPermeabilityGradients/1000.0 << " s" << "\t(" <<
       +                100.0*t_findDarcyPermeabilityGradients/t_sum << " %)\n"
       +                //<< "  - findDarcyPressureChange:\t" <<
       +                //t_findDarcyPressureChange/1000.0 << " s" << "\t(" <<
       +                //100.0*t_findDarcyPressureChange/t_sum << " %)\n"
       +                << "  - updateDarcySolution:\t" <<
       +                t_updateDarcySolution/1000.0 << " s" << "\t(" <<
       +                100.0*t_updateDarcySolution/t_sum << " %)\n"
       +                << "  - copyValues:\t\t\t" <<
       +                t_copyValues/1000.0 << " s" << "\t(" <<
       +                100.0*t_copyValues/t_sum << " %)\n"
       +                << "  - findDarcyVelocities:\t" <<
       +                t_findDarcyVelocities/1000.0 << " s" << "\t(" <<
       +                100.0*t_findDarcyVelocities/t_sum << " %)" << std::endl;
                }
            }
        
       -    // Free GPU device memory  
       +    // Free GPU device memory
            freeGlobalDeviceMemory();
            checkForCudaErrorsIter("After freeGlobalDeviceMemory()", iter);
        
       t@@ -1967,8 +2372,11 @@ __host__ void DEM::startTime()
            delete[] k.distmod;
            delete[] k.delta_t;
        
       -    if (navierstokes == 1) {
       -        endNS();
       +    if (fluid == 1) {
       +        if (cfd_solver == 0)
       +            endNS();
       +        else if (cfd_solver == 1)
       +            endDarcy();
            }
        
            cudaDeviceReset();
 (DIR) diff --git a/src/file_io.cpp b/src/file_io.cpp
       t@@ -254,73 +254,135 @@ void DEM::readbin(const char *target)
            if (verbose == 1)
                cout << "Done\n";
        
       -    if (navierstokes == 1) {    // Navier Stokes flow
       +    // Simulate fluid
       +    if (fluid == 1) {
        
       -        initNSmem();
       +        ifs.read(as_bytes(cfd_solver), sizeof(int));
        
       -        ifs.read(as_bytes(params.mu), sizeof(params.mu));
       +        if (cfd_solver < 0 || cfd_solver > 1) {
       +            std::cerr << "Value of cfd_solver not understood ("
       +                << cfd_solver << ")" << std::endl;
       +            exit(1);
       +        }
       +
       +        if (cfd_solver == 0) {    // Navier Stokes flow
       +
       +            initNSmem();
       +
       +            ifs.read(as_bytes(ns.mu), sizeof(Float));
        
       -        if (verbose == 1)
       -            cout << "  - Reading fluid values:\t\t\t  ";
       +            if (verbose == 1)
       +                cout << "  - Reading fluid values:\t\t\t  ";
        
       -        for (z = 0; z<grid.num[2]; ++z) {
       -            for (y = 0; y<grid.num[1]; ++y) {
       -                for (x = 0; x<grid.num[0]; ++x) {
       -                    i = idx(x,y,z);
       -                    ifs.read(as_bytes(ns.v[i].x), sizeof(Float));
       -                    ifs.read(as_bytes(ns.v[i].y), sizeof(Float));
       -                    ifs.read(as_bytes(ns.v[i].z), sizeof(Float));
       -                    ifs.read(as_bytes(ns.p[i]), sizeof(Float));
       -                    ifs.read(as_bytes(ns.phi[i]), sizeof(Float));
       -                    ifs.read(as_bytes(ns.dphi[i]), sizeof(Float));
       +            for (z = 0; z<grid.num[2]; ++z) {
       +                for (y = 0; y<grid.num[1]; ++y) {
       +                    for (x = 0; x<grid.num[0]; ++x) {
       +                        i = idx(x,y,z);
       +                        ifs.read(as_bytes(ns.v[i].x), sizeof(Float));
       +                        ifs.read(as_bytes(ns.v[i].y), sizeof(Float));
       +                        ifs.read(as_bytes(ns.v[i].z), sizeof(Float));
       +                        ifs.read(as_bytes(ns.p[i]), sizeof(Float));
       +                        ifs.read(as_bytes(ns.phi[i]), sizeof(Float));
       +                        ifs.read(as_bytes(ns.dphi[i]), sizeof(Float));
       +                    }
                        }
                    }
       -        }
        
       -        ifs.read(as_bytes(params.rho_f), sizeof(Float));
       -        ifs.read(as_bytes(ns.p_mod_A), sizeof(Float));
       -        ifs.read(as_bytes(ns.p_mod_f), sizeof(Float));
       -        ifs.read(as_bytes(ns.p_mod_phi), sizeof(Float));
       +            ifs.read(as_bytes(ns.rho_f), sizeof(Float));
       +            ifs.read(as_bytes(ns.p_mod_A), sizeof(Float));
       +            ifs.read(as_bytes(ns.p_mod_f), sizeof(Float));
       +            ifs.read(as_bytes(ns.p_mod_phi), sizeof(Float));
        
       -        ifs.read(as_bytes(ns.bc_bot), sizeof(int));
       -        ifs.read(as_bytes(ns.bc_top), sizeof(int));
       -        ifs.read(as_bytes(ns.free_slip_bot), sizeof(int));
       -        ifs.read(as_bytes(ns.free_slip_top), sizeof(int));
       +            ifs.read(as_bytes(ns.bc_bot), sizeof(int));
       +            ifs.read(as_bytes(ns.bc_top), sizeof(int));
       +            ifs.read(as_bytes(ns.free_slip_bot), sizeof(int));
       +            ifs.read(as_bytes(ns.free_slip_top), sizeof(int));
        
       -        ifs.read(as_bytes(ns.gamma), sizeof(Float));
       -        ifs.read(as_bytes(ns.theta), sizeof(Float));
       -        ifs.read(as_bytes(ns.beta), sizeof(Float));
       -        ifs.read(as_bytes(ns.tolerance), sizeof(Float));
       -        ifs.read(as_bytes(ns.maxiter), sizeof(unsigned int));
       -        ifs.read(as_bytes(ns.ndem), sizeof(unsigned int));
       +            ifs.read(as_bytes(ns.gamma), sizeof(Float));
       +            ifs.read(as_bytes(ns.theta), sizeof(Float));
       +            ifs.read(as_bytes(ns.beta), sizeof(Float));
       +            ifs.read(as_bytes(ns.tolerance), sizeof(Float));
       +            ifs.read(as_bytes(ns.maxiter), sizeof(unsigned int));
       +            ifs.read(as_bytes(ns.ndem), sizeof(unsigned int));
        
       -        ifs.read(as_bytes(ns.c_phi), sizeof(Float));
       -        ifs.read(as_bytes(ns.c_v), sizeof(Float));
       -        ifs.read(as_bytes(ns.dt_dem_fac), sizeof(Float));
       +            ifs.read(as_bytes(ns.c_phi), sizeof(Float));
       +            ifs.read(as_bytes(ns.c_v), sizeof(Float));
       +            ifs.read(as_bytes(ns.dt_dem_fac), sizeof(Float));
        
       -        for (i = 0; i<np; ++i) {
       -            ifs.read(as_bytes(ns.f_d[i].x), sizeof(Float));
       -            ifs.read(as_bytes(ns.f_d[i].y), sizeof(Float));
       -            ifs.read(as_bytes(ns.f_d[i].z), sizeof(Float));
       -        }
       -        for (i = 0; i<np; ++i) {
       -            ifs.read(as_bytes(ns.f_p[i].x), sizeof(Float));
       -            ifs.read(as_bytes(ns.f_p[i].y), sizeof(Float));
       -            ifs.read(as_bytes(ns.f_p[i].z), sizeof(Float));
       -        }
       -        for (i = 0; i<np; ++i) {
       -            ifs.read(as_bytes(ns.f_v[i].x), sizeof(Float));
       -            ifs.read(as_bytes(ns.f_v[i].y), sizeof(Float));
       -            ifs.read(as_bytes(ns.f_v[i].z), sizeof(Float));
       -        }
       -        for (i = 0; i<np; ++i) {
       -            ifs.read(as_bytes(ns.f_sum[i].x), sizeof(Float));
       -            ifs.read(as_bytes(ns.f_sum[i].y), sizeof(Float));
       -            ifs.read(as_bytes(ns.f_sum[i].z), sizeof(Float));
       -        }
       +            for (i = 0; i<np; ++i) {
       +                ifs.read(as_bytes(ns.f_d[i].x), sizeof(Float));
       +                ifs.read(as_bytes(ns.f_d[i].y), sizeof(Float));
       +                ifs.read(as_bytes(ns.f_d[i].z), sizeof(Float));
       +            }
       +            for (i = 0; i<np; ++i) {
       +                ifs.read(as_bytes(ns.f_p[i].x), sizeof(Float));
       +                ifs.read(as_bytes(ns.f_p[i].y), sizeof(Float));
       +                ifs.read(as_bytes(ns.f_p[i].z), sizeof(Float));
       +            }
       +            for (i = 0; i<np; ++i) {
       +                ifs.read(as_bytes(ns.f_v[i].x), sizeof(Float));
       +                ifs.read(as_bytes(ns.f_v[i].y), sizeof(Float));
       +                ifs.read(as_bytes(ns.f_v[i].z), sizeof(Float));
       +            }
       +            for (i = 0; i<np; ++i) {
       +                ifs.read(as_bytes(ns.f_sum[i].x), sizeof(Float));
       +                ifs.read(as_bytes(ns.f_sum[i].y), sizeof(Float));
       +                ifs.read(as_bytes(ns.f_sum[i].z), sizeof(Float));
       +            }
       +
       +            if (verbose == 1)
       +                cout << "Done" << std::endl;
        
       -        if (verbose == 1)
       -            cout << "Done" << std::endl;
       +        } else if (cfd_solver == 1) { // Darcy flow
       +
       +            initDarcyMem();
       +
       +            ifs.read(as_bytes(darcy.mu), sizeof(Float));
       +
       +            if (verbose == 1)
       +                cout << "  - Reading fluid values:\t\t\t  ";
       +
       +            for (z = 0; z<darcy.nz; ++z) {
       +                for (y = 0; y<darcy.ny; ++y) {
       +                    for (x = 0; x<darcy.nx; ++x) {
       +                        i = d_idx(x,y,z);
       +                        ifs.read(as_bytes(darcy.v[i].x), sizeof(Float));
       +                        ifs.read(as_bytes(darcy.v[i].y), sizeof(Float));
       +                        ifs.read(as_bytes(darcy.v[i].z), sizeof(Float));
       +                        ifs.read(as_bytes(darcy.p[i]), sizeof(Float));
       +                        ifs.read(as_bytes(darcy.phi[i]), sizeof(Float));
       +                        ifs.read(as_bytes(darcy.dphi[i]), sizeof(Float));
       +                    }
       +                }
       +            }
       +
       +            ifs.read(as_bytes(darcy.rho_f), sizeof(Float));
       +            ifs.read(as_bytes(darcy.p_mod_A), sizeof(Float));
       +            ifs.read(as_bytes(darcy.p_mod_f), sizeof(Float));
       +            ifs.read(as_bytes(darcy.p_mod_phi), sizeof(Float));
       +
       +            ifs.read(as_bytes(darcy.bc_bot), sizeof(int));
       +            ifs.read(as_bytes(darcy.bc_top), sizeof(int));
       +            ifs.read(as_bytes(darcy.free_slip_bot), sizeof(int));
       +            ifs.read(as_bytes(darcy.free_slip_top), sizeof(int));
       +
       +            ifs.read(as_bytes(darcy.tolerance), sizeof(Float));
       +            ifs.read(as_bytes(darcy.maxiter), sizeof(unsigned int));
       +            ifs.read(as_bytes(darcy.ndem), sizeof(unsigned int));
       +            ifs.read(as_bytes(darcy.c_phi), sizeof(Float));
       +
       +            for (i = 0; i<np; ++i) {
       +                ifs.read(as_bytes(darcy.f_p[i].x), sizeof(Float));
       +                ifs.read(as_bytes(darcy.f_p[i].y), sizeof(Float));
       +                ifs.read(as_bytes(darcy.f_p[i].z), sizeof(Float));
       +            }
       +
       +            ifs.read(as_bytes(darcy.beta_f), sizeof(Float));
       +            ifs.read(as_bytes(darcy.k_c), sizeof(Float));
       +
       +            if (verbose == 1)
       +                cout << "Done" << std::endl;
       +        }
            }
        
            for (i = 0; i<np; ++i)
       t@@ -484,73 +546,128 @@ void DEM::writebin(const char *target)
                    ofs.write(as_bytes(k.bonds_omega[i].z), sizeof(Float));
                }
        
       -        if (navierstokes == 1) { // Navier Stokes flow
       +        if (fluid == 1) {
        
       -            ofs.write(as_bytes(params.mu), sizeof(params.mu));
       +            ofs.write(as_bytes(cfd_solver), sizeof(int));
        
       -            int x, y, z;
       -            for (z=0; z<ns.nz; z++) {
       -                for (y=0; y<ns.ny; y++) {
       -                    for (x=0; x<ns.nx; x++) {
       -                        i = idx(x,y,z);
       +            if (cfd_solver == 0) { // Navier Stokes flow
       +
       +                ofs.write(as_bytes(ns.mu), sizeof(Float));
       +
       +                int x, y, z;
       +                for (z=0; z<ns.nz; z++) {
       +                    for (y=0; y<ns.ny; y++) {
       +                        for (x=0; x<ns.nx; x++) {
       +                            i = idx(x,y,z);
        
       -                        // Interpolated cell-center velocities
       -                        ofs.write(as_bytes(ns.v[i].x), sizeof(Float));
       -                        ofs.write(as_bytes(ns.v[i].y), sizeof(Float));
       -                        ofs.write(as_bytes(ns.v[i].z), sizeof(Float));
       +                            // Interpolated cell-center velocities
       +                            ofs.write(as_bytes(ns.v[i].x), sizeof(Float));
       +                            ofs.write(as_bytes(ns.v[i].y), sizeof(Float));
       +                            ofs.write(as_bytes(ns.v[i].z), sizeof(Float));
        
       -                        // Cell-face velocities
       -                        //ofs.write(as_bytes(ns.v_x[vidx(x,y,z)]), sizeof(Float));
       -                        //ofs.write(as_bytes(ns.v_y[vidx(x,y,z)]), sizeof(Float));
       -                        //ofs.write(as_bytes(ns.v_z[vidx(x,y,z)]), sizeof(Float));
       +                            // Cell-face velocities
       +                            //ofs.write(as_bytes(ns.v_x[vidx(x,y,z)]), sizeof(Float));
       +                            //ofs.write(as_bytes(ns.v_y[vidx(x,y,z)]), sizeof(Float));
       +                            //ofs.write(as_bytes(ns.v_z[vidx(x,y,z)]), sizeof(Float));
        
       -                        ofs.write(as_bytes(ns.p[i]), sizeof(Float));
       -                        ofs.write(as_bytes(ns.phi[i]), sizeof(Float));
       -                        ofs.write(as_bytes(ns.dphi[i]), sizeof(Float));
       +                            ofs.write(as_bytes(ns.p[i]), sizeof(Float));
       +                            ofs.write(as_bytes(ns.phi[i]), sizeof(Float));
       +                            ofs.write(as_bytes(ns.dphi[i]), sizeof(Float));
       +                        }
                            }
                        }
       -            }
        
       -            ofs.write(as_bytes(params.rho_f), sizeof(Float));
       -            ofs.write(as_bytes(ns.p_mod_A), sizeof(Float));
       -            ofs.write(as_bytes(ns.p_mod_f), sizeof(Float));
       -            ofs.write(as_bytes(ns.p_mod_phi), sizeof(Float));
       +                ofs.write(as_bytes(ns.rho_f), sizeof(Float));
       +                ofs.write(as_bytes(ns.p_mod_A), sizeof(Float));
       +                ofs.write(as_bytes(ns.p_mod_f), sizeof(Float));
       +                ofs.write(as_bytes(ns.p_mod_phi), sizeof(Float));
       +
       +                ofs.write(as_bytes(ns.bc_bot), sizeof(int));
       +                ofs.write(as_bytes(ns.bc_top), sizeof(int));
       +                ofs.write(as_bytes(ns.free_slip_bot), sizeof(int));
       +                ofs.write(as_bytes(ns.free_slip_top), sizeof(int));
       +
       +                ofs.write(as_bytes(ns.gamma), sizeof(Float));
       +                ofs.write(as_bytes(ns.theta), sizeof(Float));
       +                ofs.write(as_bytes(ns.beta), sizeof(Float));
       +                ofs.write(as_bytes(ns.tolerance), sizeof(Float));
       +                ofs.write(as_bytes(ns.maxiter), sizeof(unsigned int));
       +                ofs.write(as_bytes(ns.ndem), sizeof(unsigned int));
       +
       +                ofs.write(as_bytes(ns.c_phi), sizeof(Float));
       +                ofs.write(as_bytes(ns.c_v), sizeof(Float));
       +                ofs.write(as_bytes(ns.dt_dem_fac), sizeof(Float));
       +
       +                for (i = 0; i<np; ++i) {
       +                    ofs.write(as_bytes(ns.f_d[i].x), sizeof(Float));
       +                    ofs.write(as_bytes(ns.f_d[i].y), sizeof(Float));
       +                    ofs.write(as_bytes(ns.f_d[i].z), sizeof(Float));
       +                }
       +                for (i = 0; i<np; ++i) {
       +                    ofs.write(as_bytes(ns.f_p[i].x), sizeof(Float));
       +                    ofs.write(as_bytes(ns.f_p[i].y), sizeof(Float));
       +                    ofs.write(as_bytes(ns.f_p[i].z), sizeof(Float));
       +                }
       +                for (i = 0; i<np; ++i) {
       +                    ofs.write(as_bytes(ns.f_v[i].x), sizeof(Float));
       +                    ofs.write(as_bytes(ns.f_v[i].y), sizeof(Float));
       +                    ofs.write(as_bytes(ns.f_v[i].z), sizeof(Float));
       +                }
       +                for (i = 0; i<np; ++i) {
       +                    ofs.write(as_bytes(ns.f_sum[i].x), sizeof(Float));
       +                    ofs.write(as_bytes(ns.f_sum[i].y), sizeof(Float));
       +                    ofs.write(as_bytes(ns.f_sum[i].z), sizeof(Float));
       +                }
       +
       +            } else if (cfd_solver == 1) {    // Darcy flow
        
       -            ofs.write(as_bytes(ns.bc_bot), sizeof(int));
       -            ofs.write(as_bytes(ns.bc_top), sizeof(int));
       -            ofs.write(as_bytes(ns.free_slip_bot), sizeof(int));
       -            ofs.write(as_bytes(ns.free_slip_top), sizeof(int));
       +                ofs.write(as_bytes(darcy.mu), sizeof(Float));
        
       -            ofs.write(as_bytes(ns.gamma), sizeof(Float));
       -            ofs.write(as_bytes(ns.theta), sizeof(Float));
       -            ofs.write(as_bytes(ns.beta), sizeof(Float));
       -            ofs.write(as_bytes(ns.tolerance), sizeof(Float));
       -            ofs.write(as_bytes(ns.maxiter), sizeof(unsigned int));
       -            ofs.write(as_bytes(ns.ndem), sizeof(unsigned int));
       +                int x, y, z;
       +                for (z=0; z<darcy.nz; z++) {
       +                    for (y=0; y<darcy.ny; y++) {
       +                        for (x=0; x<darcy.nx; x++) {
       +                            i = d_idx(x,y,z);
        
       -            ofs.write(as_bytes(ns.c_phi), sizeof(Float));
       -            ofs.write(as_bytes(ns.c_v), sizeof(Float));
       -            ofs.write(as_bytes(ns.dt_dem_fac), sizeof(Float));
       +                            // Interpolated cell-center velocities
       +                            ofs.write(as_bytes(darcy.v[i].x), sizeof(Float));
       +                            ofs.write(as_bytes(darcy.v[i].y), sizeof(Float));
       +                            ofs.write(as_bytes(darcy.v[i].z), sizeof(Float));
        
       -            for (i = 0; i<np; ++i) {
       -                ofs.write(as_bytes(ns.f_d[i].x), sizeof(Float));
       -                ofs.write(as_bytes(ns.f_d[i].y), sizeof(Float));
       -                ofs.write(as_bytes(ns.f_d[i].z), sizeof(Float));
       -            }
       -            for (i = 0; i<np; ++i) {
       -                ofs.write(as_bytes(ns.f_p[i].x), sizeof(Float));
       -                ofs.write(as_bytes(ns.f_p[i].y), sizeof(Float));
       -                ofs.write(as_bytes(ns.f_p[i].z), sizeof(Float));
       -            }
       -            for (i = 0; i<np; ++i) {
       -                ofs.write(as_bytes(ns.f_v[i].x), sizeof(Float));
       -                ofs.write(as_bytes(ns.f_v[i].y), sizeof(Float));
       -                ofs.write(as_bytes(ns.f_v[i].z), sizeof(Float));
       -            }
       -            for (i = 0; i<np; ++i) {
       -                ofs.write(as_bytes(ns.f_sum[i].x), sizeof(Float));
       -                ofs.write(as_bytes(ns.f_sum[i].y), sizeof(Float));
       -                ofs.write(as_bytes(ns.f_sum[i].z), sizeof(Float));
       +                            // Cell-face velocities
       +                            //ofs.write(as_bytes(darcy.v_x[vidx(x,y,z)]), sizeof(Float));
       +                            //ofs.write(as_bytes(darcy.v_y[vidx(x,y,z)]), sizeof(Float));
       +                            //ofs.write(as_bytes(darcy.v_z[vidx(x,y,z)]), sizeof(Float));
       +
       +                            ofs.write(as_bytes(darcy.p[i]), sizeof(Float));
       +                            ofs.write(as_bytes(darcy.phi[i]), sizeof(Float));
       +                            ofs.write(as_bytes(darcy.dphi[i]), sizeof(Float));
       +                        }
       +                    }
       +                }
       +
       +                ofs.write(as_bytes(darcy.rho_f), sizeof(Float));
       +                ofs.write(as_bytes(darcy.p_mod_A), sizeof(Float));
       +                ofs.write(as_bytes(darcy.p_mod_f), sizeof(Float));
       +                ofs.write(as_bytes(darcy.p_mod_phi), sizeof(Float));
       +
       +                ofs.write(as_bytes(darcy.bc_bot), sizeof(int));
       +                ofs.write(as_bytes(darcy.bc_top), sizeof(int));
       +                ofs.write(as_bytes(darcy.free_slip_bot), sizeof(int));
       +                ofs.write(as_bytes(darcy.free_slip_top), sizeof(int));
       +
       +                ofs.write(as_bytes(darcy.tolerance), sizeof(Float));
       +                ofs.write(as_bytes(darcy.maxiter), sizeof(unsigned int));
       +                ofs.write(as_bytes(darcy.ndem), sizeof(unsigned int));
       +                ofs.write(as_bytes(darcy.c_phi), sizeof(Float));
       +
       +                for (i = 0; i<np; ++i) {
       +                    ofs.write(as_bytes(darcy.f_p[i].x), sizeof(Float));
       +                    ofs.write(as_bytes(darcy.f_p[i].y), sizeof(Float));
       +                    ofs.write(as_bytes(darcy.f_p[i].z), sizeof(Float));
       +                }
       +                ofs.write(as_bytes(darcy.beta_f), sizeof(Float));
       +                ofs.write(as_bytes(darcy.k_c), sizeof(Float));
                    }
                }
        
 (DIR) diff --git a/src/navierstokes.cpp b/src/navierstokes.cpp
       t@@ -122,7 +122,7 @@ void DEM::checkNSstability()
            const Float dmin = fmin(dx, fmin(dy, dz));
        
            // Check the diffusion term using von Neumann stability analysis
       -    if (params.mu*time.dt/(dmin*dmin) > 0.5) {
       +    if (ns.mu*time.dt/(dmin*dmin) > 0.5) {
                std::cerr << "Error: The time step is too large to ensure stability in "
                    "the diffusive term of the fluid momentum equation.\n"
                    "Decrease the viscosity, decrease the time step, and/or increase "
       t@@ -279,7 +279,7 @@ double DEM::maxNormResNS()
            for (int z=0; z<grid.num[2]; ++z) {
                for (int y=0; y<grid.num[1]; ++y) {
                    for (int x=0; x<grid.num[0]; ++x) {
       -                norm_res = static_cast<double>(ns.norm[idx(x,y,z)]);
       +                norm_res = fabs(static_cast<double>(ns.norm[idx(x,y,z)]));
                        if (norm_res != norm_res) {
                            std::cerr << "\nError: normalized residual is NaN ("
                                << norm_res << ") in cell "
 (DIR) diff --git a/src/navierstokes.cuh b/src/navierstokes.cuh
       t@@ -1309,7 +1309,7 @@ __global__ void findPorositiesVelocitiesDiametersSpherical(
                    dev_ns_vp_avg[cellidx] = v_avg;
                    dev_ns_d_avg[cellidx]  = d_avg;
        
       -#ifdef CHECK_NS_FINITE
       +#ifdef CHECK_FLUID_FINITE
                    (void)checkFiniteFloat("phi", x, y, z, phi);
                    (void)checkFiniteFloat("dphi", x, y, z, dphi);
                    (void)checkFiniteFloat3("v_avg", x, y, z, v_avg);
       t@@ -1526,7 +1526,7 @@ __global__ void findPorositiesVelocitiesDiametersSphericalGradient(
                    dev_ns_vp_avg[cellidx] = v_avg;
                    dev_ns_d_avg[cellidx]  = d_avg;
        
       -#ifdef CHECK_NS_FINITE
       +#ifdef CHECK_FLUID_FINITE
                    (void)checkFiniteFloat("phi", x, y, z, phi);
                    (void)checkFiniteFloat("dphi", x, y, z, dphi);
                    (void)checkFiniteFloat3("v_avg", x, y, z, v_avg);
       t@@ -1578,7 +1578,7 @@ __global__ void setUpperPressureNS(
                dev_ns_epsilon_new[cellidx] = epsilon;
                dev_ns_p[cellidx] = new_pressure;
        
       -#ifdef CHECK_NS_FINITE
       +#ifdef CHECK_FLUID_FINITE
                (void)checkFiniteFloat("epsilon", x, y, z, epsilon);
                (void)checkFiniteFloat("new_pressure", x, y, z, new_pressure);
        #endif
       t@@ -1715,7 +1715,7 @@ __device__ Float3 divergence_tensor(
                (t_yz_yp - t_yz_yn)/dy +
                (t_zz_zp - t_zz_zn)/dz);
        
       -#ifdef CHECK_NS_FINITE
       +#ifdef CHECK_FLUID_FINITE
            (void)checkFiniteFloat3("div_tensor", x, y, z, div_tensor);
        #endif
            return div_tensor;
       t@@ -1755,7 +1755,7 @@ __global__ void findNSgradientsDev(
                __syncthreads();
                dev_vectorfield[cellidx] = grad;
        
       -#ifdef CHECK_NS_FINITE
       +#ifdef CHECK_FLUID_FINITE
                (void)checkFiniteFloat3("grad", x, y, z, grad);
        #endif
            }
       t@@ -1799,7 +1799,7 @@ __global__ void findvvOuterProdNS(
                dev_ns_v_prod[cellidx6+4] = v.y*v.z;
                dev_ns_v_prod[cellidx6+5] = v.z*v.z;
        
       -#ifdef CHECK_NS_FINITE
       +#ifdef CHECK_FLUID_FINITE
                (void)checkFiniteFloat("v_prod[0]", x, y, z, v.x*v.x);
                (void)checkFiniteFloat("v_prod[1]", x, y, z, v.x*v.y);
                (void)checkFiniteFloat("v_prod[2]", x, y, z, v.x*v.z);
       t@@ -1815,6 +1815,7 @@ __global__ void findvvOuterProdNS(
        // values in 3D.
        __global__ void findNSstressTensor(
            const Float3* __restrict__ dev_ns_v,       // in
       +    const Float mu,                            // in
            Float* __restrict__ dev_ns_tau)     // out
        {
            // 3D thread index
       t@@ -1860,17 +1861,17 @@ __global__ void findNSstressTensor(
                const Float3 zn = dev_ns_v[idx(x,y,z-1)];
        
                // The diagonal stress tensor components
       -        const Float tau_xx = 2.0*devC_params.mu*(xp.x - xn.x)/(2.0*dx);
       -        const Float tau_yy = 2.0*devC_params.mu*(yp.y - yn.y)/(2.0*dy);
       -        const Float tau_zz = 2.0*devC_params.mu*(zp.z - zn.z)/(2.0*dz);
       +        const Float tau_xx = 2.0*mu*(xp.x - xn.x)/(2.0*dx);
       +        const Float tau_yy = 2.0*mu*(yp.y - yn.y)/(2.0*dy);
       +        const Float tau_zz = 2.0*mu*(zp.z - zn.z)/(2.0*dz);
        
                // The off-diagonal stress tensor components
                const Float tau_xy =
       -            devC_params.mu*((yp.x - yn.x)/(2.0*dy) + (xp.y - xn.y)/(2.0*dx));
       +            mu*((yp.x - yn.x)/(2.0*dy) + (xp.y - xn.y)/(2.0*dx));
                const Float tau_xz =
       -            devC_params.mu*((zp.x - zn.x)/(2.0*dz) + (xp.z - xn.z)/(2.0*dx));
       +            mu*((zp.x - zn.x)/(2.0*dz) + (xp.z - xn.z)/(2.0*dx));
                const Float tau_yz =
       -            devC_params.mu*((zp.y - zn.y)/(2.0*dz) + (yp.z - yn.z)/(2.0*dy));
       +            mu*((zp.y - zn.y)/(2.0*dz) + (yp.z - yn.z)/(2.0*dy));
        
                /*
                  if (x == 0 && y == 0 && z == 0)
       t@@ -1892,7 +1893,7 @@ __global__ void findNSstressTensor(
                dev_ns_tau[cellidx6+4] = tau_yz;
                dev_ns_tau[cellidx6+5] = tau_zz;
        
       -#ifdef CHECK_NS_FINITE
       +#ifdef CHECK_FLUID_FINITE
                (void)checkFiniteFloat("tau_xx", x, y, z, tau_xx);
                (void)checkFiniteFloat("tau_xy", x, y, z, tau_xy);
                (void)checkFiniteFloat("tau_xz", x, y, z, tau_xz);
       t@@ -2036,7 +2037,7 @@ __global__ void findNSdivphiviv(
                //printf("div(phi*v*v) [%d,%d,%d] = %f, %f, %f\n", x,y,z,
                //div_phi_vi_v.x, div_phi_vi_v.y, div_phi_vi_v.z);
        
       -#ifdef CHECK_NS_FINITE
       +#ifdef CHECK_FLUID_FINITE
                (void)checkFiniteFloat3("div_phi_vi_v", x, y, z, div_phi_vi_v);
        #endif
            }
       t@@ -2175,7 +2176,7 @@ __global__ void findNSdivphitau(
                __syncthreads();
                dev_ns_div_phi_tau[cellidx] = div_phi_tau;
        
       -#ifdef CHECK_NS_FINITE
       +#ifdef CHECK_FLUID_FINITE
                (void)checkFiniteFloat3("div_phi_tau", x, y, z, div_phi_tau);
        #endif
            }
       t@@ -2263,7 +2264,7 @@ __global__ void findNSdivphivv(
                __syncthreads();
                dev_ns_div_phi_v_v[cellidx] = div;
        
       -#ifdef CHECK_NS_FINITE
       +#ifdef CHECK_FLUID_FINITE
                (void)checkFiniteFloat3("div_phi_v_v", x, y, z, div);
        #endif
            }
       t@@ -2290,6 +2291,8 @@ __global__ void findPredNSvelocities(
            const unsigned int ndem,                           // in
            const unsigned int wall0_iz,                       // in
            const Float   c_v,                                 // in
       +    const Float   mu,                                  // in
       +    const Float   rho_f,                               // in
            Float* __restrict__ dev_ns_v_p_x,           // out
            Float* __restrict__ dev_ns_v_p_y,           // out
            Float* __restrict__ dev_ns_v_p_z)           // out
       t@@ -2326,7 +2329,7 @@ __global__ void findPredNSvelocities(
                //printf("v in v* [%d,%d,%d] = %f, %f, %f\n", x,y,z, v.x, v.y, v.z);
        
                Float3 div_tau = MAKE_FLOAT3(0.0, 0.0, 0.0);
       -        if (devC_params.mu > 0.0) {
       +        if (mu > 0.0) {
                    div_tau = MAKE_FLOAT3(
                        dev_ns_div_tau_x[fidx],
                        dev_ns_div_tau_y[fidx],
       t@@ -2362,7 +2365,7 @@ __global__ void findPredNSvelocities(
                // The particle-fluid interaction force should only be incoorporated if
                // there is a fluid viscosity
                Float3 f_i_c, f_i_xn, f_i_yn, f_i_zn;
       -        if (devC_params.mu > 0.0 && devC_np > 0) {
       +        if (mu > 0.0 && devC_np > 0) {
                    f_i_c  = dev_ns_F_pf[cellidx];
                    f_i_xn = dev_ns_F_pf[idx(x-1,y,z)];
                    f_i_yn = dev_ns_F_pf[idx(x,y-1,z)];
       t@@ -2379,7 +2382,6 @@ __global__ void findPredNSvelocities(
                    amean(f_i_c.z, f_i_zn.z));
        
                const Float dt = ndem*devC_dt;
       -        const Float rho = devC_params.rho_f;
        
                // The pressure gradient is not needed in Chorin's projection method
                // (ns.beta=0), so only has to be looked up in pressure-dependant
       t@@ -2396,10 +2398,10 @@ __global__ void findPredNSvelocities(
                        (p - p_yn)/dy,
                        (p - p_zn)/dz);
        #ifdef SET_1
       -            pressure_term = -beta*dt/(rho*phi)*grad_p;
       +            pressure_term = -beta*dt/(rho_f*phi)*grad_p;
        #endif
        #ifdef SET_2
       -            pressure_term = -beta*dt/rho*grad_p;
       +            pressure_term = -beta*dt/rho_f*grad_p;
        #endif
                }
        
       t@@ -2409,16 +2411,16 @@ __global__ void findPredNSvelocities(
                    amean(div_phi_vi_v_zn.x, div_phi_vi_v_c.z));
        
                // Determine the terms of the predicted velocity change
       -        const Float3 interaction_term = -dt/(rho*phi)*f_i;
       +        const Float3 interaction_term = -dt/(rho_f*phi)*f_i;
                const Float3 gravity_term = MAKE_FLOAT3(
                    devC_params.g[0], devC_params.g[1], devC_params.g[2])*dt;
                const Float3 advection_term = -1.0*div_phi_vi_v*dt/phi;
                const Float3 porosity_term = -1.0*v*dphi/phi;
        #ifdef SET_1
       -        const Float3 diffusion_term = dt/(rho*phi)*div_tau;
       +        const Float3 diffusion_term = dt/(rho_f*phi)*div_tau;
        #endif
        #ifdef SET_2
       -        const Float3 diffusion_term = dt/rho*div_tau;
       +        const Float3 diffusion_term = dt/rho_f*div_tau;
        #endif
        
                // Predict new velocity and add scaling parameters
       t@@ -2481,7 +2483,7 @@ __global__ void findPredNSvelocities(
                dev_ns_v_p_y[fidx] = v_p.y;
                dev_ns_v_p_z[fidx] = v_p.z;
        
       -#ifdef CHECK_NS_FINITE
       +#ifdef CHECK_FLUID_FINITE
                (void)checkFiniteFloat3("v_p", x, y, z, v_p);
        #endif
            }
       t@@ -2503,6 +2505,7 @@ __global__ void findNSforcing(
            const unsigned int nijac,                      // in
            const unsigned int ndem,                       // in
            const Float c_v,                               // in
       +    const Float rho_f,                             // in
            Float*  __restrict__ dev_ns_f1,                // out
            Float3* __restrict__ dev_ns_f2,                // out
            Float*  __restrict__ dev_ns_f)                 // out
       t@@ -2554,14 +2557,14 @@ __global__ void findNSforcing(
        
                    // Find forcing function terms
        #ifdef SET_1
       -            const Float t1 = devC_params.rho_f*phi*div_v_p/(c_v*dt);
       -            const Float t2 = devC_params.rho_f*dot(grad_phi, v_p)/(c_v*dt);
       -            const Float t4 = devC_params.rho_f*dphi/(dt*dt*c_v);
       +            const Float t1 = rho_f*phi*div_v_p/(c_v*dt);
       +            const Float t2 = rho_f*dot(grad_phi, v_p)/(c_v*dt);
       +            const Float t4 = rho_f*dphi/(dt*dt*c_v);
        #endif
        #ifdef SET_2
       -            const Float t1 = devC_params.rho_f*div_v_p/(c_v*dt);
       -            const Float t2 = devC_params.rho_f*dot(grad_phi, v_p)/(c_v*dt*phi);
       -            const Float t4 = devC_params.rho_f*dphi/(dt*dt*phi*c_v);
       +            const Float t1 = rho_f*div_v_p/(c_v*dt);
       +            const Float t2 = rho_f*dot(grad_phi, v_p)/(c_v*dt*phi);
       +            const Float t4 = rho_f*dphi/(dt*dt*phi*c_v);
        #endif
                    f1 = t1 + t2 + t4;
                    f2 = grad_phi/phi; // t3/grad(epsilon)
       t@@ -2603,7 +2606,7 @@ __global__ void findNSforcing(
                __syncthreads();
                dev_ns_f[cellidx] = f;
        
       -#ifdef CHECK_NS_FINITE
       +#ifdef CHECK_FLUID_FINITE
                (void)checkFiniteFloat("f", x, y, z, f);
        #endif
            }
       t@@ -2666,7 +2669,7 @@ __global__ void smoothing(
                  " e_zn = %f, e_zp = %f\n", x,y,z, e_xn, e_xp,
                  e_yn, e_yp, e_zn, e_zp);*/
        
       -#ifdef CHECK_NS_FINITE
       +#ifdef CHECK_FLUID_FINITE
                (void)checkFiniteFloat("e_smooth", x, y, z, e_smooth);
        #endif
            }
       t@@ -2775,7 +2778,7 @@ __global__ void jacobiIterationNS(
                dev_ns_epsilon_new[cellidx] = e_relax;
                dev_ns_norm[cellidx] = res_norm;
        
       -#ifdef CHECK_NS_FINITE
       +#ifdef CHECK_FLUID_FINITE
                (void)checkFiniteFloat("e_new", x, y, z, e_new);
                (void)checkFiniteFloat("e_relax", x, y, z, e_relax);
                //(void)checkFiniteFloat("res_norm", x, y, z, res_norm);
       t@@ -2861,12 +2864,13 @@ __global__ void findNormalizedResiduals(
        
                // Find the normalized residual value. A small value is added to the
                // denominator to avoid a divide by zero.
       -        const Float res_norm = (e_new - e)*(e_new - e)/(e_new*e_new + 1.0e-16);
       +        //const Float res_norm = (e_new - e)*(e_new - e)/(e_new*e_new + 1.0e-16);
       +        const Float res_norm = (e_new - e)/(e + 1.0e-16);
        
                __syncthreads();
                dev_ns_norm[cellidx] = res_norm;
        
       -#ifdef CHECK_NS_FINITE
       +#ifdef CHECK_FLUID_FINITE
                checkFiniteFloat("res_norm", x, y, z, res_norm);
        #endif
            }
       t@@ -2902,7 +2906,7 @@ __global__ void updateNSpressure(
                __syncthreads();
                dev_ns_p[cellidx] = p;
        
       -#ifdef CHECK_NS_FINITE
       +#ifdef CHECK_FLUID_FINITE
                checkFiniteFloat("p", x, y, z, p);
        #endif
            }
       t@@ -2919,6 +2923,7 @@ __global__ void updateNSvelocity(
            const int    bc_top,          // in
            const unsigned int ndem,      // in
            const Float  c_v,             // in
       +    const Float  rho_f,           // in
            const unsigned int wall0_iz,  // in
            const unsigned int iter,      // in
            Float* __restrict__ dev_ns_v_x,      // out
       t@@ -2978,10 +2983,10 @@ __global__ void updateNSvelocity(
                // Find new velocity
                Float3 v = v_p
        #ifdef SET_1
       -            - c_v*ndem*devC_dt/(phi*devC_params.rho_f)*grad_epsilon;
       +            - c_v*ndem*devC_dt/(phi*rho_f)*grad_epsilon;
        #endif
        #ifdef SET_2
       -            - c_v*ndem*devC_dt/devC_params.rho_f*grad_epsilon;
       +            - c_v*ndem*devC_dt/rho_f*grad_epsilon;
        #endif
        
                if ((z == 0 && bc_bot == 1) || (z > nz-1 && bc_top == 1))
       t@@ -3043,7 +3048,7 @@ __global__ void updateNSvelocity(
                dev_ns_v_y[cellidx] = v.y;
                dev_ns_v_z[cellidx] = v.z;
        
       -#ifdef CHECK_NS_FINITE
       +#ifdef CHECK_FLUID_FINITE
                checkFiniteFloat3("v", x, y, z, v);
        #endif
            }
       t@@ -3112,7 +3117,7 @@ __global__ void findAvgParticleVelocityDiameter(
                dev_ns_vp_avg[cellidx] = v_avg;
                dev_ns_d_avg[cellidx]  = d_avg;
        
       -#ifdef CHECK_NS_FINITE
       +#ifdef CHECK_FLUID_FINITE
                checkFiniteFloat3("v_avg", x, y, z, v_avg);
                checkFiniteFloat("d_avg", x, y, z, d_avg);
        #endif
       t@@ -3144,6 +3149,8 @@ __global__ void findInteractionForce(
            const Float*  __restrict__ dev_ns_div_tau_x,// in
            const Float*  __restrict__ dev_ns_div_tau_y,// in
            const Float*  __restrict__ dev_ns_div_tau_z,// in
       +    const Float mu,                             // in
       +    const Float rho_f,                          // in
            //const Float c_v,                       // in
            Float3* __restrict__ dev_ns_f_pf,     // out
            Float4* __restrict__ dev_force,       // out
       t@@ -3209,7 +3216,7 @@ __global__ void findInteractionForce(
                const Float  v_rel_length = length(v_rel);
        
                const Float V_p = dx*dy*dz - phi*dx*dy*dz;
       -        const Float Re  = devC_params.rho_f*d*phi*v_rel_length/devC_params.mu;
       +        const Float Re  = rho_f*d*phi*v_rel_length/mu;
                Float Cd  = pow(0.63 + 4.8/pow(Re, 0.5), 2.0);
                Float chi = 3.7 - 0.65*exp(-pow(1.5 - log10(Re), 2.0)/2.0);
        
       t@@ -3219,7 +3226,7 @@ __global__ void findInteractionForce(
                }
        
                // Drag force
       -        const Float3 f_d = 0.125*Cd*devC_params.rho_f*M_PI*d*d*phi*phi
       +        const Float3 f_d = 0.125*Cd*rho_f*M_PI*d*d*phi*phi
                    *v_rel_length*v_rel*pow(phi, -chi);
        
                // Pressure gradient force
       t@@ -3233,7 +3240,7 @@ __global__ void findInteractionForce(
                __syncthreads();
                const Float3 f_pf = f_d + f_p + f_v;
        
       -#ifdef CHECK_NS_FINITE
       +#ifdef CHECK_FLUID_FINITE
                /*
                  printf("\nfindInteractionForce %d [%d,%d,%d]\n"
                  "\tV_p = %f Re=%f Cd=%f chi=%f\n"
       t@@ -3325,7 +3332,7 @@ __global__ void applyInteractionForceToFluid(
        
                const Float3 F_pf = fi/(dx*dy*dz);
        
       -#ifdef CHECK_NS_FINITE
       +#ifdef CHECK_FLUID_FINITE
                checkFiniteFloat3("F_pf", x, y, z, F_pf);
        #endif
                //printf("F_pf [%d,%d,%d] = %f,%f,%f\n", x,y,z, F_pf.x, F_pf.y, F_pf.z);
       t@@ -3418,12 +3425,13 @@ __global__ void interpolateFaceToCenter(
        // Warning: The grid-corner values will be invalid, along with the non-normal
        // components of the ghost nodes
        __global__ void findFaceDivTau(
       -    const Float* __restrict__ dev_ns_v_x,
       -    const Float* __restrict__ dev_ns_v_y,
       -    const Float* __restrict__ dev_ns_v_z,
       -    Float* __restrict__ dev_ns_div_tau_x,
       -    Float* __restrict__ dev_ns_div_tau_y,
       -    Float* __restrict__ dev_ns_div_tau_z)
       +    const Float* __restrict__ dev_ns_v_x,   // in
       +    const Float* __restrict__ dev_ns_v_y,   // in
       +    const Float* __restrict__ dev_ns_v_z,   // in
       +    const Float mu,                         // in
       +    Float* __restrict__ dev_ns_div_tau_x,   // out
       +    Float* __restrict__ dev_ns_div_tau_y,   // out
       +    Float* __restrict__ dev_ns_div_tau_z)   // out
        {
            // 3D thread index
            const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
       t@@ -3472,17 +3480,17 @@ __global__ void findFaceDivTau(
                const Float v_z_zp = dev_ns_v_z[vidx(x,y,z+1)];
        
                const Float div_tau_x =
       -            devC_params.mu*(
       +            mu*(
                        (v_x_xp - 2.0*v_x + v_x_xn)/(dx*dx) +
                        (v_x_yp - 2.0*v_x + v_x_yn)/(dy*dy) +
                        (v_x_zp - 2.0*v_x + v_x_zn)/(dz*dz));
                const Float div_tau_y =
       -            devC_params.mu*(
       +            mu*(
                        (v_y_xp - 2.0*v_y + v_y_xn)/(dx*dx) +
                        (v_y_yp - 2.0*v_y + v_y_yn)/(dy*dy) +
                        (v_y_zp - 2.0*v_y + v_y_zn)/(dz*dz));
                const Float div_tau_z =
       -            devC_params.mu*(
       +            mu*(
                        (v_z_xp - 2.0*v_z + v_z_xn)/(dx*dx) +
                        (v_z_yp - 2.0*v_z + v_z_yn)/(dy*dy) +
                        (v_z_zp - 2.0*v_z + v_z_zn)/(dz*dz));
 (DIR) diff --git a/src/sphere.cpp b/src/sphere.cpp
       t@@ -21,7 +21,7 @@ DEM::DEM(const std::string inputbin,
                 const int transferConstMem,
                 const int fluidFlow,
                 const int device)
       -: verbose(verbosity), navierstokes(fluidFlow), device(device)
       +: verbose(verbosity), fluid(fluidFlow), device(device)
        {
            using std::cout;
            using std::cerr;
       t@@ -50,8 +50,16 @@ DEM::DEM(const std::string inputbin,
            if (dry == 1)
                exit(0);
        
       -    if (navierstokes == 1) {
       -        initNS();
       +    if (fluid == 1) {
       +        if (cfd_solver == 0)
       +            initNS();
       +        else if (cfd_solver == 1)
       +            initDarcy();
       +        else {
       +            std::cerr << "DEM::DEM Error: Value of cfd_solver not understood ("
       +                << cfd_solver << ")" << std::endl;
       +            exit(1);
       +        }
            }
        
            if (initCuda == 1) {
       t@@ -64,8 +72,17 @@ DEM::DEM(const std::string inputbin,
                    transferToConstantDeviceMemory();
                }
        
       -        if (navierstokes == 1) {
       -            initNSmemDev();
       +        if (fluid == 1) {
       +            if (cfd_solver == 0)
       +                initNSmemDev();
       +            else if (cfd_solver == 1)
       +                initDarcyMemDev();
       +            else {
       +                std::cerr
       +                    << "DEM::DEM Error: Value of cfd_solver not understood ("
       +                    << cfd_solver << ")" << std::endl;
       +                exit(1);
       +            }
                }
        
                // Allocate device memory for particle variables,
       t@@ -224,8 +241,8 @@ void DEM::checkValues(void)
        
                checkIfNaN(x, "position", i);
                checkIfNaN(vel, "velocity", i);
       -        checkIfNaN(angpos, "angular position", i);
       -        checkIfNaN(angvel, "angular velocity", i);
       +        //checkIfNaN(angpos, "angular position", i);
       +        //checkIfNaN(angvel, "angular velocity", i);
        
                // Check that all particles are inside of the grid
                if (x.x < grid.origo[0] ||
       t@@ -281,6 +298,44 @@ void DEM::checkValues(void)
                cerr << "Error: rho = " << params.rho << " kg/m3" << endl;
                exit(1);
            }
       +
       +    if (fluid == 1) {
       +
       +        // Navier-Stokes tests
       +        if (cfd_solver == 0) {
       +            if (ns.rho_f <= 0.0) {
       +                cerr << "Error: rho = " << params.rho << " kg/m3" << endl;
       +                exit(1);
       +            }
       +        }
       +
       +        // Darcy tests
       +        else if (cfd_solver == 1) {
       +            if (darcy.rho_f <= 0.0) {
       +                cerr << "Error: rho_f = " << darcy.rho_f << " kg/m3" << endl;
       +                exit(1);
       +            }
       +
       +            if (darcy.mu <= 0.0) {
       +                cerr << "Error: mu = " << darcy.mu << " Pa s" << endl;
       +                exit(1);
       +            }
       +
       +            if (darcy.beta_f <= 0.0) {
       +                cerr << "Error: beta_f = " << darcy.beta_f << " 1/Pa" << endl;
       +                exit(1);
       +            }
       +
       +            if (darcy.k_c <= 0.0) {
       +                cerr << "Error: k_c = " << darcy.k_c << " m*m" << endl;
       +                exit(1);
       +            }
       +        } else {
       +            cerr << "Solver type value not understood (cfd_solver = "
       +                << cfd_solver << endl;
       +            exit(1);
       +        }
       +    }
        }
        
        
 (DIR) diff --git a/src/sphere.h b/src/sphere.h
       t@@ -173,7 +173,8 @@ class DEM {
                Float4 *dev_v_rho;  // Device equivalent
        
                //// Porous flow 
       -        int navierstokes;  // 0: no, 1: yes
       +        int fluid;      // 0: no, 1: yes
       +        int cfd_solver; // 0: Navier Stokes, 1: Darcy
        
                // Navier Stokes values, host
                NavierStokes ns;
       t@@ -262,7 +263,7 @@ class DEM {
                unsigned int idx(const int x, const int y, const int z); // pres. nodes
                unsigned int vidx(const int x, const int y, const int z); // vel. nodes
        
       -        // Initialize Darcy values and arrays
       +        // Initialize Navier Stokes values and arrays
                void initNS();
        
                // Clean up Navier Stokes arrays
       t@@ -289,6 +290,50 @@ class DEM {
                void transferNSepsilonFromGlobalDeviceMemory();
                void transferNSepsilonNewFromGlobalDeviceMemory();
        
       +        // Darcy values, host
       +        Darcy darcy;
       +
       +        // Darcy values, device
       +        Float*  dev_darcy_p_old;     // Previous cell hydraulic pressure
       +        Float*  dev_darcy_dpdt;      // Previous cell hydraulic pressure
       +        Float*  dev_darcy_p;         // Cell hydraulic pressure
       +        Float*  dev_darcy_p_new;     // Updated cell hydraulic pressure
       +        Float3* dev_darcy_v;         // Cell fluid velocity
       +        Float*  dev_darcy_phi;       // Cell porosity
       +        Float*  dev_darcy_dphi;      // Cell porosity change
       +        Float*  dev_darcy_norm;      // Normalized residual of epsilon values
       +        Float4* dev_darcy_f_p;       // Pressure gradient force on particles
       +        Float*  dev_darcy_k;         // Cell hydraulic permeability
       +        Float3* dev_darcy_grad_k;    // Spatial gradient of permeability
       +
       +        // Darcy functions
       +        void initDarcyMem();
       +        Float largestDarcyPermeability();
       +        Float smallestDarcyPorosity();
       +        void initDarcyMemDev();
       +        unsigned int darcyCells();
       +        unsigned int darcyCellsVelocity();
       +        void transferDarcyToGlobalDeviceMemory(int statusmsg);
       +        void transferDarcyFromGlobalDeviceMemory(int statusmsg);
       +        void transferDarcyNormFromGlobalDeviceMemory();
       +        void transferDarcyPressuresFromGlobalDeviceMemory();
       +        void freeDarcyMem();
       +        void freeDarcyMemDev();
       +        unsigned int d_idx(const int x, const int y, const int z);
       +        unsigned int d_vidx(const int x, const int y, const int z);
       +        void checkDarcyStability();
       +        void printDarcyArray(FILE* stream, Float* arr);
       +        void printDarcyArray(FILE* stream, Float* arr, std::string desc);
       +        void printDarcyArray(FILE* stream, Float3* arr);
       +        void printDarcyArray(FILE* stream, Float3* arr, std::string desc);
       +        double avgNormResDarcy();
       +        double maxNormResDarcy();
       +        void initDarcy();
       +        void writeDarcyArray(Float* arr, const char* filename);
       +        void writeDarcyArray(Float3* arr, const char* filename);
       +        void endDarcy();
       +        void endDarcyDev();
       +
        
            public:
                // Values and functions accessible from the outside
 (DIR) diff --git a/src/utility.cu b/src/utility.cu
       t@@ -15,10 +15,14 @@ void DEM::diagnostics()
            checkValues();
        
            // Clean up memory before exiting
       -    if (navierstokes == 1) {
       +    if (fluid == 1 && cfd_solver == 0) {
                freeNSmemDev();
                freeNSmem();
            }
       +    if (fluid == 1 && cfd_solver == 1) {
       +        freeDarcyMemDev();
       +        freeDarcyMem();
       +    }
            freeGlobalDeviceMemory();
            // CPU memory freed upon object destruction
        }
 (DIR) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
       t@@ -32,3 +32,15 @@ add_test(cfd_tests_neumann-c_v=0.1 ${PYTHON_EXECUTABLE}
        
        add_test(fluid_particle_interaction ${PYTHON_EXECUTABLE} 
            ${CMAKE_CURRENT_BINARY_DIR}/fluid_particle_interaction.py)
       +
       +add_test(cfd_tests_darcy ${PYTHON_EXECUTABLE} 
       +    ${CMAKE_CURRENT_BINARY_DIR}/cfd_tests_darcy.py)
       +
       +add_test(cfd_tests_neumann_darcy ${PYTHON_EXECUTABLE} 
       +    ${CMAKE_CURRENT_BINARY_DIR}/cfd_tests_neumann_darcy.py)
       +
       +add_test(fluid_particle_interaction_darcy ${PYTHON_EXECUTABLE} 
       +    ${CMAKE_CURRENT_BINARY_DIR}/fluid_particle_interaction_darcy.py)
       +
       +add_test(cfd_tests_darcy_particles ${PYTHON_EXECUTABLE} 
       +    ${CMAKE_CURRENT_BINARY_DIR}/cfd_tests_darcy_particles.py)
 (DIR) diff --git a/tests/cfd_tests_darcy.py b/tests/cfd_tests_darcy.py
       t@@ -0,0 +1,151 @@
       +#!/usr/bin/env python
       +from pytestutils import *
       +
       +import sphere
       +import sys
       +import numpy
       +import matplotlib.pyplot as plt
       +
       +print("### CFD tests - Dirichlet BCs ###")
       +
       +# Iteration and conservation of mass test
       +# No gravity, no pressure gradients => no flow
       +orig = sphere.sim(np = 0, nd = 3, nw = 0, sid = "cfdtest", fluid = True)
       +cleanup(orig)
       +orig.defaultParams()
       +#orig.defineWorldBoundaries([1.0,1.0,1.0], dx=0.1)
       +orig.defineWorldBoundaries([0.4,0.3,0.4], dx=0.1)
       +orig.initFluid(cfd_solver = 1)
       +#orig.initFluid(mu = 8.9e-4)
       +orig.initTemporal(total = 0.2, file_dt = 0.01, dt = 1.0e-7)
       +#orig.g[2] = -10.0
       +orig.time_file_dt = orig.time_dt*0.99
       +orig.time_total = orig.time_dt*10
       +#orig.run(dry=True)
       +orig.run(verbose=False)
       +#orig.run(verbose=True)
       +py = sphere.sim(sid = orig.sid, fluid = True)
       +
       +ones = numpy.ones((orig.num))
       +py.readlast(verbose = False)
       +compareNumpyArrays(ones, py.p_f, "Conservation of pressure:")
       +
       +# Convergence rate (1/3)
       +it = numpy.loadtxt("../output/" + orig.sid + "-conv.log")
       +compare(it[:,1].sum(), 0.0, "Convergence rate (1/3):\t")
       +
       +# Fluid flow should be very small
       +if ((numpy.abs(py.v_f[:,:,:,:]) < 1.0e-6).all()):
       +    print("Flow field:\t\t" + passed())
       +else:
       +    print("Flow field:\t\t" + failed())
       +    print(numpy.min(py.v_f))
       +    print(numpy.mean(py.v_f))
       +    print(numpy.max(py.v_f))
       +    raise Exception("Failed")
       +
       +
       +# Add pressure gradient
       +# This test passes with BETA=0.0 and tolerance=1.0e-9
       +orig.p_f[:,:,-1] = 1.1
       +#orig.setTolerance(1.0e-8)
       +orig.cleanup()
       +#orig.time_file_dt = orig.time_dt*0.99
       +#orig.time_total = orig.time_dt*1
       +orig.run(verbose=False)
       +#orig.run(verbose=True)
       +py.readlast(verbose = False)
       +ideal_grad_p_z = numpy.linspace(orig.p_f[0,0,0], orig.p_f[0,0,-1], orig.num[2])
       +#orig.writeVTKall()
       +compareNumpyArraysClose(numpy.zeros((1,orig.num[2])),\
       +        ideal_grad_p_z - py.p_f[0,0,:],\
       +        "Pressure gradient:\t", tolerance=1.0e-1)
       +        #"Pressure gradient:\t", tolerance=1.0e-2)
       +
       +# Fluid flow direction, opposite of gradient (i.e. towards -z)
       +if ((py.v_f[:,:,:,2] < 0.0).all() and (py.v_f[:,:,:,0:1] < 1.0e-7).all()):
       +    print("Flow field:\t\t" + passed())
       +else:
       +    print("Flow field:\t\t" + failed())
       +    raise Exception("Failed")
       +
       +# Convergence rate (2/3)
       +it = numpy.loadtxt("../output/" + orig.sid + "-conv.log")
       +if ((it[0:6,1] < 1000).all() and (it[6:,1] < 20).all()):
       +    print("Convergence rate (2/3):\t" + passed())
       +else:
       +    print("Convergence rate (2/3):\t" + failed())
       +#'''
       +
       +# Long test
       +'''
       +#orig.p_f[:,:,-1] = 1.1
       +orig.time_total[0] = 0.1
       +orig.time_file_dt[0] = orig.time_total[0]/10.0
       +orig.run(verbose=False)
       +py.readlast(verbose = False)
       +ideal_grad_p_z = numpy.linspace(orig.p_f[0,0,0], orig.p_f[0,0,-1], orig.num[2])
       +#py.writeVTKall()
       +compareNumpyArraysClose(numpy.zeros((1,orig.num[2])),\
       +        ideal_grad_p_z - py.p_f[0,0,:],\
       +        "Pressure gradient (long test):", tolerance=1.0e-2)
       +
       +# Fluid flow direction, opposite of gradient (i.e. towards -z)
       +if ((py.v_f[:,:,:,2] < 0.0).all() and (py.v_f[:,:,:,0:1] < 1.0e-7).all()):
       +    print("Flow field:\t\t" + passed())
       +else:
       +    print("Flow field:\t\t" + failed())
       +
       +# Convergence rate (3/3)
       +# This test passes with BETA=0.0 and tolerance=1.0e-9
       +it = numpy.loadtxt("../output/" + orig.sid + "-conv.log")
       +if (it[0,1] < 700 and it[1,1] < 250 and (it[2:,1] < 20).all()):
       +    print("Convergence rate (3/3):\t" + passed())
       +else:
       +    print("Convergence rate (3/3):\t" + failed())
       +'''
       +
       +'''
       +# Slow pressure modulation test
       +orig.cleanup()
       +orig.time_total[0] = 1.0e-1
       +orig.time_file_dt[0] = 0.101*orig.time_total[0]
       +orig.setFluidPressureModulation(A=1.0, f=1.0/orig.time_total[0])
       +#orig.plotPrescribedFluidPressures()
       +orig.run(verbose=False)
       +#py.readlast()
       +#py.plotConvergence()
       +#py.plotFluidDiffAdvPresZ()
       +#py.writeVTKall()
       +for it in range(1,py.status()): # gradient should be smooth in all output files
       +    py.readstep(it, verbose=False)
       +    ideal_grad_p_z =\
       +            numpy.linspace(py.p_f[0,0,0], py.p_f[0,0,-1], py.num[2])
       +    compareNumpyArraysClose(numpy.zeros((1,py.num[2])),\
       +            ideal_grad_p_z - py.p_f[0,0,:],\
       +            'Slow pressure modulation (' + 
       +            str(it+1) + '/' + str(py.status()) + '):', tolerance=1.0e-1)
       +'''
       +
       +#'''
       +# Fast pressure modulation test
       +orig.time_total[0] = 1.0e-2
       +orig.time_file_dt[0] = 0.101*orig.time_total[0]
       +orig.setFluidPressureModulation(A=1.0, f=1.0/orig.time_total[0])
       +#orig.plotPrescribedFluidPressures()
       +orig.run(verbose=False)
       +#py.plotConvergence()
       +#py.plotFluidDiffAdvPresZ()
       +#py.writeVTKall()
       +for it in range(1,py.status()+1): # gradient should be smooth in all output files
       +    py.readstep(it, verbose=False)
       +    #py.plotFluidDiffAdvPresZ()
       +    ideal_grad_p_z =\
       +            numpy.linspace(py.p_f[0,0,0], py.p_f[0,0,-1], py.num[2])
       +    compareNumpyArraysClose(numpy.zeros((1,py.num[2])),\
       +            ideal_grad_p_z - py.p_f[0,0,:],\
       +            'Fast pressure modulation (' + 
       +            str(it) + '/' + str(py.status()) + '):', tolerance=5.0e-1)
       +#'''
       +
       +cleanup(orig)
 (DIR) diff --git a/tests/cfd_tests_darcy_particles.py b/tests/cfd_tests_darcy_particles.py
       t@@ -0,0 +1,165 @@
       +#!/usr/bin/env python
       +from pytestutils import *
       +import sphere
       +import numpy
       +
       +#'''
       +print("### Steady state, no gravity, no forcing, Dirichlet+Dirichlet BCs")
       +orig = sphere.sim('darcy_particles', np = 1000)
       +orig.cleanup()
       +#orig.generateRadii(histogram=False, psd='uni', radius_mean=5.0e-4, radius_variance=5.0e-5)
       +orig.defaultParams()
       +orig.generateRadii(psd='uni', mean=5.0e-2, variance=5.0e-5)
       +orig.initRandomGridPos([20, 20, 200])
       +orig.initTemporal(total=0.005, file_dt=0.001)
       +orig.initFluid(cfd_solver=1)
       +#orig.p_f[5,3,2] *= 1.5
       +#orig.k_c[0] = 4.6e-15
       +orig.k_c[0] = 4.6e-10
       +#orig.g[2] = -10.0
       +orig.setStiffnessNormal(36.4e9)
       +orig.setStiffnessTangential(36.4e9/3.0)
       +orig.run(verbose=False)
       +#orig.writeVTKall()
       +py = sphere.sim(sid = orig.sid, fluid = True)
       +py.readlast(verbose=False)
       +
       +ones = numpy.ones((orig.num))
       +py.readlast(verbose = False)
       +compareNumpyArrays(ones, py.p_f, "Conservation of pressure:")
       +
       +# Fluid flow should be very small
       +if ((numpy.abs(py.v_f[:,:,:,:]) < 1.0e-6).all()):
       +    print("Flow field:\t\t" + passed())
       +else:
       +    print("Flow field:\t\t" + failed())
       +    print(numpy.min(py.v_f))
       +    print(numpy.mean(py.v_f))
       +    print(numpy.max(py.v_f))
       +    raise Exception("Failed")
       +
       +
       +
       +print("### Steady state, no gravity, no forcing, Neumann+Dirichlet BCs")
       +orig = sphere.sim('darcy_particles', np = 1000)
       +orig.cleanup()
       +#orig.generateRadii(histogram=False, psd='uni', radius_mean=5.0e-4, radius_variance=5.0e-5)
       +orig.defaultParams()
       +orig.generateRadii(psd='uni', mean=5.0e-2, variance=5.0e-5)
       +orig.initRandomGridPos([20, 20, 200])
       +orig.initTemporal(total=0.005, file_dt=0.001)
       +orig.initFluid(cfd_solver=1)
       +#orig.p_f[5,3,2] *= 1.5
       +#orig.k_c[0] = 4.6e-15
       +orig.k_c[0] = 4.6e-10
       +orig.setFluidBottomNoFlow()
       +#orig.g[2] = -10.0
       +orig.setStiffnessNormal(36.4e9)
       +orig.setStiffnessTangential(36.4e9/3.0)
       +orig.run(verbose=False)
       +#orig.writeVTKall()
       +py = sphere.sim(sid = orig.sid, fluid = True)
       +py.readlast(verbose=False)
       +
       +ones = numpy.ones((orig.num))
       +py.readlast(verbose = False)
       +compareNumpyArrays(ones, py.p_f, "Conservation of pressure:")
       +
       +# Fluid flow should be very small
       +if ((numpy.abs(py.v_f[:,:,:,:]) < 1.0e-6).all()):
       +    print("Flow field:\t\t" + passed())
       +else:
       +    print("Flow field:\t\t" + failed())
       +    print(numpy.min(py.v_f))
       +    print(numpy.mean(py.v_f))
       +    print(numpy.max(py.v_f))
       +    raise Exception("Failed")
       +
       +
       +
       +print("### Steady state, no gravity, no forcing, Neumann+Neumann BCs")
       +orig = sphere.sim('darcy_particles', np = 1000)
       +orig.cleanup()
       +#orig.generateRadii(histogram=False, psd='uni', radius_mean=5.0e-4, radius_variance=5.0e-5)
       +orig.defaultParams()
       +orig.generateRadii(psd='uni', mean=5.0e-2, variance=5.0e-5)
       +orig.initRandomGridPos([20, 20, 200])
       +orig.initTemporal(total=0.005, file_dt=0.001)
       +orig.initFluid(cfd_solver=1)
       +#orig.p_f[5,3,2] *= 1.5
       +#orig.k_c[0] = 4.6e-15
       +orig.k_c[0] = 4.6e-10
       +orig.setFluidBottomNoFlow()
       +orig.setFluidTopNoFlow()
       +#orig.g[2] = -10.0
       +orig.setStiffnessNormal(36.4e9)
       +orig.setStiffnessTangential(36.4e9/3.0)
       +orig.run(verbose=False)
       +#orig.writeVTKall()
       +py = sphere.sim(sid = orig.sid, fluid = True)
       +py.readlast(verbose=False)
       +
       +ones = numpy.ones((orig.num))
       +py.readlast(verbose = False)
       +compareNumpyArrays(ones, py.p_f, "Conservation of pressure:")
       +
       +# Fluid flow should be very small
       +if ((numpy.abs(py.v_f[:,:,:,:]) < 1.0e-6).all()):
       +    print("Flow field:\t\t" + passed())
       +else:
       +    print("Flow field:\t\t" + failed())
       +    print(numpy.min(py.v_f))
       +    print(numpy.mean(py.v_f))
       +    print(numpy.max(py.v_f))
       +    raise Exception("Failed")
       +#'''
       +
       +
       +print("### Fluidization test: Transient, gravity, Dirichlet+Dirichlet BCs")
       +#orig = sphere.sim('diffusivity-relax', fluid=False)
       +orig = sphere.sim('cube-init', fluid=False)
       +orig.readlast(verbose=False)
       +orig.num[2] /= 2
       +orig.L[2] /= 2.0
       +orig.id('darcy_fluidization')
       +orig.cleanup()
       +orig.initTemporal(total=0.005, file_dt=0.001)
       +orig.initFluid(cfd_solver=1)
       +orig.g[2] = -10.0
       +#orig.k_c[0] = numpy.mean(orig.radius)**2/540.0
       +
       +mean_porosity = orig.bulkPorosity()
       +fluidize_pressure = numpy.abs((orig.rho - orig.rho_f) \
       +        *(1.0 - mean_porosity)*numpy.abs(orig.g[2]))
       +
       +fluid_pressure_gradient = numpy.array([0.9, 2.0])
       +
       +for i in numpy.arange(fluid_pressure_gradient.size):
       +
       +    orig.id('fluidization-' + str(fluid_pressure_gradient[i]))
       +    # set pressure gradient
       +    dpdz = fluid_pressure_gradient[i] * fluidize_pressure
       +    dp = dpdz * orig.L[2]
       +    base_p = 0.0
       +    orig.p_f[:,:,0] = base_p + dp  # high pressure at bottom
       +    orig.p_f[:,:,-1] = base_p      # low pressure at top
       +
       +    orig.run(verbose=False)
       +    orig.writeVTKall()
       +    py = sphere.sim(sid = orig.sid, fluid = True)
       +    py.readlast(verbose=False)
       +
       +    """print('Mean particle velocity: '
       +            + str(numpy.mean(py.vel[:,0])) + ', '
       +            + str(numpy.mean(py.vel[:,1])) + ', '
       +            + str(numpy.mean(py.vel[:,2])) + ' m/s')"""
       +
       +    z_vel_threshold = 0.001
       +    if fluid_pressure_gradient[i] < 1.0:
       +        test(numpy.mean(py.vel[:,2]) < z_vel_threshold, 
       +                'Fluidization (' + str(fluid_pressure_gradient[i]) + '):\t')
       +    elif fluid_pressure_gradient[i] > 1.0:
       +        test(numpy.mean(py.vel[:,2]) > z_vel_threshold, 
       +                'Fluidization (' + str(fluid_pressure_gradient[i]) + '):\t')
       +
       +
 (DIR) diff --git a/tests/cfd_tests_neumann_darcy.py b/tests/cfd_tests_neumann_darcy.py
       t@@ -0,0 +1,41 @@
       +#!/usr/bin/env python
       +from pytestutils import *
       +
       +import sphere
       +import sys
       +import numpy
       +import matplotlib.pyplot as plt
       +
       +print('### CFD tests - Dirichlet/Neumann BCs ###')
       +
       +print('''# Neumann bottom, Dirichlet top BC.
       +# No gravity, no pressure gradients => no flow''')
       +
       +orig = sphere.sim("neumann", fluid = True)
       +cleanup(orig)
       +orig.defaultParams(mu_s = 0.4, mu_d = 0.4)
       +orig.defineWorldBoundaries([0.4, 0.4, 1], dx = 0.1)
       +orig.initFluid(cfd_solver = 1)
       +orig.initTemporal(total = 0.05, file_dt = 0.005, dt = 1.0e-4)
       +py = sphere.sim(sid = orig.sid, fluid = True)
       +orig.bc_bot[0] = 1      # No-flow BC at bottom (Neumann)
       +#orig.run(dry=True)
       +orig.run(verbose=False)
       +#orig.writeVTKall()
       +py.readlast(verbose = False)
       +ones = numpy.ones((orig.num))
       +py.readlast(verbose = False)
       +compareNumpyArraysClose(ones, py.p_f, "Conservation of pressure:",
       +        tolerance = 1.0e-5)
       +
       +# Fluid flow along z should be very small
       +if ((numpy.abs(py.v_f[:,:,:,:]) < 1.0e-6).all()):
       +    print("Flow field:\t\t" + passed())
       +else:
       +    print("Flow field:\t\t" + failed())
       +    print(numpy.min(py.v_f))
       +    print(numpy.mean(py.v_f))
       +    print(numpy.max(py.v_f))
       +    raise Exception("Failed")
       +
       +orig.cleanup()
 (DIR) diff --git a/tests/fluid_particle_interaction_darcy.py b/tests/fluid_particle_interaction_darcy.py
       t@@ -0,0 +1,31 @@
       +#!/usr/bin/env python
       +import sphere
       +from pytestutils import *
       +
       +sim = sphere.sim('fluid_particle_interaction', fluid=True)
       +sim.cleanup()
       +
       +sim.defineWorldBoundaries([1.0, 1.0, 1.0], dx = 0.1)
       +sim.initFluid(cfd_solver = 1)
       +
       +
       +# No gravity, pressure gradient enforced by Dirichlet boundaries.
       +# The particle should be sucked towards the low pressure
       +print('# Test 1: Test pressure gradient force')
       +sim.p_f[:,:,0]  = 1.0
       +sim.p_f[:,:,-1] = 1.1
       +sim.addParticle([0.5, 0.5, 0.5], 0.01)
       +sim.initTemporal(total=0.001, file_dt=0.0001)
       +#sim.time_file_dt[0] = sim.time_dt[0]
       +#sim.time_total[0] = sim.time_dt[0]
       +
       +#sim.run(verbose=False)
       +sim.run()
       +#sim.run(dry=True)
       +#sim.run(cudamemcheck=True)
       +#sim.writeVTKall()
       +
       +sim.readlast()
       +test(sim.vel[0,2] < 0.0, 'Particle velocity:')
       +
       +sim.cleanup()
 (DIR) diff --git a/tests/highlighttext.py b/tests/highlighttext.py
       t@@ -0,0 +1,51 @@
       +#!/usr/bin/env python
       +# ANSI color coded text if the terminal supports it. No external requirements.
       +# Inspired by
       +# http://korbinin.blogspot.dk/2012/10/color-text-output-from-python.html
       +
       +import sys
       +
       +def highlight(string, fg, bold=False):
       +    '''
       +    Return `string` with ANSI color codes.
       +
       +    :param string: String to colorize.
       +    :type string: str
       +    :param fg: Foreground text color.  Possible values: 'g', 'green', 'r', 'red',
       +        'y', 'yellow', 'b', 'blue', 'm' or 'magenta'
       +    :type fg: str
       +    :param bold: Bold text
       +    :type bold: bool
       +
       +    :returns: ANSI formatted string, can be `print()`'ed directly.
       +    :return type: str
       +    '''
       +
       +    attr = []
       +    if sys.stdout.isatty():
       +
       +        if fg == 'g' or fg == 'green':
       +            attr.append('32')
       +
       +        elif fg == 'r' or fg == 'red':
       +            attr.append('31')
       +
       +        elif fg == 'y' or fg == 'yellow':
       +            attr.append('33')
       +
       +        elif fg == 'b' or fg == 'blue':
       +            attr.append('34')
       +
       +        elif fg == 'm' or fg == 'magenta':
       +            attr.append('35')
       +
       +        else:
       +            raise Exception('Error: Foreground color `fg` value not understood')
       +
       +        if bold:
       +            attr.append('1')
       +
       +        return '\x1b[%sm%s\x1b[0m' % (';'.join(attr), string)
       +
       +    else:
       +        return string
 (DIR) diff --git a/tests/io_tests_fluid.py b/tests/io_tests_fluid.py
       t@@ -3,10 +3,11 @@ from pytestutils import *
        import sphere
        
        #### Input/output tests ####
       -print("### Fluid input/output tests ###")
       +print("### Fluid input/output tests - Navier Stokes CFD solver ###")
        
        # Generate data in python
        orig = sphere.sim(np=100, sid="test-initgrid-fluid", fluid=True)
       +orig.cleanup()
        orig.generateRadii(histogram=False, radius_mean=1.0)
        orig.defaultParams()
        orig.initRandomGridPos()
       t@@ -22,7 +23,56 @@ py.readbin("../input/" + orig.sid + ".bin", verbose=False)
        compare(orig, py, "Python IO:")
        
        # Test C++ IO routines
       -orig.run()
       +orig.run(verbose=False)
       +#orig.run(dry=True)
       +#orig.run(verbose=True, hideinputfile=False, cudamemcheck=True)
       +cpp = sphere.sim(fluid=True)
       +cpp.readbin("../output/" + orig.sid + ".output00000.bin", verbose=False)
       +compare(orig, cpp, "C++ IO:   ")
       +
       +# Test CUDA IO routines
       +cuda = sphere.sim(fluid=True)
       +cuda.readbin("../output/" + orig.sid + ".output00001.bin", verbose=False)
       +cuda.time_current = orig.time_current
       +cuda.time_step_count = orig.time_step_count
       +compareNumpyArraysClose(orig.v_f, cuda.v_f, "cuda.v_f:", tolerance=1e-5)
       +cuda.v_f = orig.v_f
       +#compareNumpyArraysClose(orig.p_f, cuda.p_f, "cuda.p_f:", tolerance=0.1)
       +cuda.p_f = orig.p_f
       +if numpy.allclose(orig.x, cuda.x, 0.01):
       +    cuda.x = orig.x  # ignore small changes
       +if numpy.max(numpy.abs(cuda.vel - orig.vel)) < 1.0e-5:
       +    cuda.vel = orig.vel  # ignore small changes
       +    cuda.xyzsum = orig.xyzsum
       +    cuda.force = orig.force
       +compare(orig, cuda, "CUDA IO:  ")
       +
       +
       +
       +#### Input/output tests ####
       +print("### Fluid input/output tests - Darcy CFD solver ###")
       +
       +# Generate data in python
       +orig = sphere.sim(np=100, sid="test-initgrid-fluid", fluid=True)
       +orig.cleanup()
       +orig.generateRadii(histogram=False, radius_mean=1.0)
       +orig.defaultParams()
       +orig.initRandomGridPos()
       +
       +orig.initFluid(cfd_solver = 1)
       +orig.setMaxIterations(10)
       +orig.initTemporal(current=0.0, total=0.0)
       +orig.time_total=2.0*orig.time_dt
       +orig.time_file_dt = orig.time_dt
       +orig.writebin(verbose=False)
       +
       +# Test Python IO routines
       +py = sphere.sim(fluid=True)
       +py.readbin("../input/" + orig.sid + ".bin", verbose=False)
       +compare(orig, py, "Python IO:")
       +
       +# Test C++ IO routines
       +orig.run(verbose=False)
        #orig.run(dry=True)
        #orig.run(verbose=True, hideinputfile=False, cudamemcheck=True)
        cpp = sphere.sim(fluid=True)
 (DIR) diff --git a/tests/pytestutils.py b/tests/pytestutils.py
       t@@ -1,14 +1,15 @@
        #!/usr/bin/env python
        
        from sphere import *
       +from highlighttext import highlight
        import subprocess
        import sys
        
        def passed():
       -    return "\tPassed"
       +    return "\t" + highlight("Passed", "green")
        
        def failed():
       -    return "\tFailed"
       +    return "\t" + highlight("Failed", "red", True)
        
        def test(statement, string):
            if (statement == True):