timplemented ndem functionality into Darcy solver, not tested when ndem!=1 - sphere - GPU-based 3D discrete element method algorithm with optional fluid coupling
 (HTM) git clone git://src.adamsgaard.dk/sphere
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) LICENSE
       ---
 (DIR) commit ea8067eeef0eaeb962695192e3b978fa6b413fc5
 (DIR) parent 629b0c945feb44b5d12abbd98e5e47f67cbab702
 (HTM) Author: Anders Damsgaard <anders.damsgaard@geo.au.dk>
       Date:   Thu,  6 Nov 2014 15:45:59 +0100
       
       implemented ndem functionality into Darcy solver, not tested when ndem!=1
       
       Diffstat:
         M python/halfshear-darcy-starter.py   |       6 +++---
         M src/darcy.cuh                       |      36 +++++++++++++++++++++++--------
         M src/device.cu                       |     414 ++++++++++++++++---------------
       
       3 files changed, 242 insertions(+), 214 deletions(-)
       ---
 (DIR) diff --git a/python/halfshear-darcy-starter.py b/python/halfshear-darcy-starter.py
       t@@ -34,8 +34,8 @@ sim.cleanup()
        sim.adjustUpperWall()
        sim.zeroKinematics()
        
       -#sim.shear(0.0/20.0)
       -sim.shear(1.0/20.0)
       +sim.shear(0.0/20.0)
       +#sim.shear(1.0/20.0)
        
        if fluid:
            #sim.num[2] *= 2
       t@@ -63,7 +63,7 @@ sim.setDampingTangential(0.0)
        sim.fixvel[:] = -1.0
        
        sim.initTemporal(total = 20.0, file_dt = 0.01, epsilon=0.07)
       -sim.time_dt[0] *= 1.0e-2
       +#sim.time_dt[0] *= 1.0e-2
        #sim.initTemporal(total = 1.0e-4, file_dt = 1.0e-5, epsilon=0.07)
        
        # Fix lowermost particles
 (DIR) diff --git a/src/darcy.cuh b/src/darcy.cuh
       t@@ -170,6 +170,23 @@ __global__ void setDarcyNormZero(
            }
        }
        
       +// Set an array of scalars to 0.0 inside devC_grid
       +    template<typename T>
       +__global__ void setDarcyZeros(T* __restrict__ dev_scalarfield)
       +{
       +    // 3D thread index
       +    const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
       +    const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
       +    const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
       +
       +    // check that we are not outside the fluid grid
       +    if (x < devC_grid.num[0] && y < devC_grid.num[1] && z < devC_grid.num[2]) {
       +        __syncthreads();
       +        dev_scalarfield[d_idx(x,y,z)] = 0.0;
       +    }
       +}
       +
       +
        // Update a field in the ghost nodes from their parent cell values. The edge
        // (diagonal) cells are not written since they are not read. Launch this kernel
        // for all cells in the grid using
       t@@ -234,7 +251,7 @@ __global__ void findDarcyPorosities(
                const unsigned int np,                            // in
                const Float c_phi,                                // in
                Float*  __restrict__ dev_darcy_phi,               // in + out
       -        Float*  __restrict__ dev_darcy_dphi)              // out
       +        Float*  __restrict__ dev_darcy_dphi)              // in + out
        {
            // 3D thread index
            const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
       t@@ -395,8 +412,8 @@ __global__ void findDarcyPorosities(
                    __syncthreads();
                    //phi = 0.5; dphi = 0.0; // disable porosity effects
                    const unsigned int cellidx = d_idx(x,y,z);
       -            dev_darcy_phi[cellidx]  = phi*c_phi;
       -            dev_darcy_dphi[cellidx] = dphi*c_phi;
       +            dev_darcy_phi[cellidx]   = phi*c_phi;
       +            dev_darcy_dphi[cellidx] += dphi*c_phi;
                    //dev_darcy_vp_avg[cellidx] = v_avg;
                    //dev_darcy_d_avg[cellidx]  = d_avg;
        
       t@@ -719,10 +736,11 @@ __global__ void findDarcyPermeabilityGradients(
        
        // Find the temporal gradient in pressure using the backwards Euler method
        __global__ void findDarcyPressureChange(
       -        const Float* __restrict__ dev_darcy_p_old,
       -        const Float* __restrict__ dev_darcy_p,
       -        const unsigned int iter,
       -        Float* __restrict__ dev_darcy_dpdt)
       +        const Float* __restrict__ dev_darcy_p_old,    // in
       +        const Float* __restrict__ dev_darcy_p,        // in
       +        const unsigned int iter,                      // in
       +        const unsigned int ndem,                      // in
       +        Float* __restrict__ dev_darcy_dpdt)           // out
        {
            // 3D thread index
            const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
       t@@ -739,7 +757,7 @@ __global__ void findDarcyPressureChange(
                const Float p_old = dev_darcy_p_old[cellidx];
                const Float p     = dev_darcy_p[cellidx];
        
       -        Float dpdt = (p - p_old)/devC_dt;
       +        Float dpdt = (p - p_old)/(ndem*devC_dt);
        
                // Ignore the large initial pressure gradients caused by solver "warm
                // up" towards hydrostatic pressure distribution
       t@@ -862,7 +880,7 @@ __global__ void updateDarcySolution(
                    /(2.0*(dxdx*dydy + dxdx*dzdz + dydy*dzdz));*/
        
                Float p_new = p_old
       -            + devC_dt/(beta_f*phi*mu)*(k*laplace_p + dot(grad_k, grad_p))
       +            + (ndem*devC_dt)/(beta_f*phi*mu)*(k*laplace_p + dot(grad_k, grad_p))
                    - dphi/(beta_f*phi*(1.0 - phi));
        
                // Dirichlet BC at dynamic top wall. wall0_iz will be larger than the
 (DIR) diff --git a/src/device.cu b/src/device.cu
       t@@ -1745,8 +1745,7 @@ __host__ void DEM::startTime()
                    else if (cfd_solver == 1) { 
        
        #if defined(REPORT_EPSILON) || defined(REPORT_FORCING_TERMS)
       -                    std::cout
       -                        << "\n\n@@@@@@ TIME STEP " << iter << " @@@"
       +                std::cout << "\n\n@@@@@@ TIME STEP " << iter << " @@@"
                                << std::endl;
        #endif
        
       t@@ -1796,116 +1795,77 @@ __host__ void DEM::startTime()
                                    iter);
                        }
        
       -                // Modulate the pressures at the upper boundary cells
       -                if ((darcy.p_mod_A > 1.0e-5 || darcy.p_mod_A < -1.0e-5) &&
       -                        darcy.p_mod_f > 1.0e-7) {
       -                    // original pressure
       -                    Float new_pressure = darcy.p[d_idx(0,0,darcy.nz-1)] //orig p
       -                        + darcy.p_mod_A*sin(2.0*M_PI*darcy.p_mod_f*time.current
       -                                + darcy.p_mod_phi);
       +                if ((iter % ns.ndem) == 0) {
       +
       +                    // Modulate the pressures at the upper boundary cells
       +                    if ((darcy.p_mod_A > 1.0e-5 || darcy.p_mod_A < -1.0e-5) &&
       +                            darcy.p_mod_f > 1.0e-7) {
       +                        // original pressure
       +                        Float new_pressure = darcy.p[d_idx(0,0,darcy.nz-1)] //orig p
       +                            + darcy.p_mod_A*sin(2.0*M_PI*darcy.p_mod_f*time.current
       +                                    + darcy.p_mod_phi);
       +                        if (PROFILING == 1)
       +                            startTimer(&kernel_tic);
       +                        setDarcyTopPressure<<<dimGridFluid, dimBlockFluid>>>(
       +                                new_pressure,
       +                                dev_darcy_p);
       +                        if (PROFILING == 1)
       +                            stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
       +                                    &t_setDarcyTopPressure);
       +                        cudaThreadSynchronize();
       +                        checkForCudaErrorsIter("Post setUpperPressureNS", iter);
       +                    }
       +
       +                    if (walls.nw > 0 && walls.wmode[0] == 1) {
       +                        wall0_iz = walls.nx->w/(grid.L[2]/grid.num[2]);
       +                        /*setDarcyTopWallPressure<<<dimGridFluid, dimBlockFluid>>>(
       +                          new_pressure,
       +                          wall0_iz,
       +                          dev_darcy_p);
       +                          cudaThreadSynchronize();
       +                          checkForCudaErrorsIter("Post setDarcyTopWallPressure",
       +                          iter);*/
       +                    }
       +
                            if (PROFILING == 1)
                                startTimer(&kernel_tic);
       -                    setDarcyTopPressure<<<dimGridFluid, dimBlockFluid>>>(
       -                            new_pressure,
       -                            dev_darcy_p);
       +                    findDarcyPermeabilities<<<dimGridFluid, dimBlockFluid>>>(
       +                            darcy.k_c, dev_darcy_phi, dev_darcy_k);
       +                    cudaThreadSynchronize();
                            if (PROFILING == 1)
                                stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
       -                                &t_setDarcyTopPressure);
       -                    cudaThreadSynchronize();
       -                    checkForCudaErrorsIter("Post setUpperPressureNS", iter);
       -                }
       -
       -                if (walls.nw > 0 && walls.wmode[0] == 1) {
       -                    wall0_iz = walls.nx->w/(grid.L[2]/grid.num[2]);
       -                    /*setDarcyTopWallPressure<<<dimGridFluid, dimBlockFluid>>>(
       -                            new_pressure,
       -                            wall0_iz,
       -                            dev_darcy_p);
       -                    cudaThreadSynchronize();
       -                    checkForCudaErrorsIter("Post setDarcyTopWallPressure",
       -                            iter);*/
       -                }
       -
       -                if (PROFILING == 1)
       -                    startTimer(&kernel_tic);
       -                findDarcyPermeabilities<<<dimGridFluid, dimBlockFluid>>>(
       -                        darcy.k_c, dev_darcy_phi, dev_darcy_k);
       -                cudaThreadSynchronize();
       -                if (PROFILING == 1)
       -                    stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
       -                            &t_findDarcyPermeabilities);
       -                checkForCudaErrorsIter("Post findDarcyPermeabilities",
       -                        iter);
       -
       -                if (PROFILING == 1)
       -                    startTimer(&kernel_tic);
       -                setDarcyGhostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
       -                        dev_darcy_k, darcy.bc_bot, darcy.bc_top);
       -                cudaThreadSynchronize();
       -                if (PROFILING == 1)
       -                    stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
       -                            &t_setDarcyGhostNodes);
       -                checkForCudaErrorsIter("Post setDarcyGhostNodes(dev_darcy_k)",
       -                        iter);
       -
       -                if (PROFILING == 1)
       -                    startTimer(&kernel_tic);
       -                findDarcyPermeabilityGradients<<<dimGridFluid, dimBlockFluid>>>(
       -                        dev_darcy_k, dev_darcy_grad_k);
       -                cudaThreadSynchronize();
       -                if (PROFILING == 1)
       -                    stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
       -                            &t_findDarcyPermeabilityGradients);
       -                checkForCudaErrorsIter("Post findDarcyPermeabilityGradients",
       -                        iter);
       +                                &t_findDarcyPermeabilities);
       +                    checkForCudaErrorsIter("Post findDarcyPermeabilities",
       +                            iter);
        
       -                if (iter == 0) {
       -                    setDarcyNormZero<<<dimGridFluid, dimBlockFluid>>>(
       -                            dev_darcy_norm);
       +                    if (PROFILING == 1)
       +                        startTimer(&kernel_tic);
       +                    setDarcyGhostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
       +                            dev_darcy_k, darcy.bc_bot, darcy.bc_top);
                            cudaThreadSynchronize();
       -                    checkForCudaErrorsIter("Post setDarcyNormZero", iter);
       +                    if (PROFILING == 1)
       +                        stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
       +                                &t_setDarcyGhostNodes);
       +                    checkForCudaErrorsIter("Post setDarcyGhostNodes(dev_darcy_k)",
       +                            iter);
        
                            if (PROFILING == 1)
                                startTimer(&kernel_tic);
       -                    copyValues<Float><<<dimGridFluid, dimBlockFluid>>>(
       -                            dev_darcy_p,
       -                            dev_darcy_p_old);
       +                    findDarcyPermeabilityGradients<<<dimGridFluid, dimBlockFluid>>>(
       +                            dev_darcy_k, dev_darcy_grad_k);
                            cudaThreadSynchronize();
                            if (PROFILING == 1)
                                stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
       -                                &t_copyValues);
       -                    checkForCudaErrorsIter("Post copyValues(p -> p_old)",
       +                                &t_findDarcyPermeabilityGradients);
       +                    checkForCudaErrorsIter("Post findDarcyPermeabilityGradients",
                                    iter);
       -                }
       -
       -                /*if (PROFILING == 1)
       -                    startTimer(&kernel_tic);
       -                findDarcyPressureChange<<<dimGridFluid, dimBlockFluid>>>(
       -                        dev_darcy_p_old,
       -                        dev_darcy_p,
       -                        iter,
       -                        dev_darcy_dpdt);
       -                cudaThreadSynchronize();
       -                if (PROFILING == 1)
       -                    stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
       -                            &t_findDarcyPressureChange);
       -                checkForCudaErrorsIter("Post findDarcyPressureChange", iter);*/
       -
       -                // Solve the system of epsilon using a Jacobi iterative solver.
       -                // The average normalized residual is initialized to a large
       -                // value.
       -                //double avg_norm_res;
       -                double max_norm_res;
       -
       -                // Write a log file of the normalized residuals during the
       -                // Jacobi iterations
       -                std::ofstream reslog;
       -                if (write_res_log == 1)
       -                    reslog.open("max_res_norm.dat");
        
       -                for (unsigned int nijac = 0; nijac<darcy.maxiter; ++nijac) {
       +                    if (iter == 0) {
       +                        setDarcyNormZero<<<dimGridFluid, dimBlockFluid>>>(
       +                                dev_darcy_norm);
       +                        cudaThreadSynchronize();
       +                        checkForCudaErrorsIter("Post setDarcyNormZero", iter);
        
       -                    if (nijac == 0) {
                                if (PROFILING == 1)
                                    startTimer(&kernel_tic);
                                copyValues<Float><<<dimGridFluid, dimBlockFluid>>>(
       t@@ -1919,130 +1879,180 @@ __host__ void DEM::startTime()
                                        iter);
                            }
        
       -                    if (PROFILING == 1)
       -                        startTimer(&kernel_tic);
       -                    setDarcyGhostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
       -                            dev_darcy_p, darcy.bc_bot, darcy.bc_top);
       -                    cudaThreadSynchronize();
       -                    if (PROFILING == 1)
       -                        stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
       -                                &t_setDarcyGhostNodes);
       -                    checkForCudaErrorsIter("Post setDarcyGhostNodes("
       -                            "dev_darcy_p) in Jacobi loop", iter);
       +                    /*if (PROFILING == 1)
       +                      startTimer(&kernel_tic);
       +                      findDarcyPressureChange<<<dimGridFluid, dimBlockFluid>>>(
       +                      dev_darcy_p_old,
       +                      dev_darcy_p,
       +                      iter,
       +                      dev_darcy_dpdt);
       +                      cudaThreadSynchronize();
       +                      if (PROFILING == 1)
       +                      stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
       +                      &t_findDarcyPressureChange);
       +                      checkForCudaErrorsIter("Post findDarcyPressureChange", iter);*/
        
       -                    if (PROFILING == 1)
       -                        startTimer(&kernel_tic);
       -                    updateDarcySolution<<<dimGridFluid, dimBlockFluid>>>(
       -                            dev_darcy_p_old,
       -                            //dev_darcy_dpdt,
       -                            dev_darcy_p,
       -                            dev_darcy_k,
       -                            dev_darcy_phi,
       -                            dev_darcy_dphi,
       -                            dev_darcy_grad_k,
       -                            darcy.beta_f,
       -                            darcy.mu,
       -                            darcy.bc_bot,
       -                            darcy.bc_top,
       -                            darcy.ndem,
       -                            wall0_iz,
       -                            dev_darcy_p_new,
       -                            dev_darcy_norm);
       -                    cudaThreadSynchronize();
       -                    if (PROFILING == 1)
       -                        stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
       -                                &t_updateDarcySolution);
       -                    checkForCudaErrorsIter("Post updateDarcySolution", iter);
       +                    // Solve the system of epsilon using a Jacobi iterative solver.
       +                    // The average normalized residual is initialized to a large
       +                    // value.
       +                    //double avg_norm_res;
       +                    double max_norm_res;
        
       -                    // Copy new values to current values
       -                    if (PROFILING == 1)
       -                        startTimer(&kernel_tic);
       -                    copyValues<Float><<<dimGridFluid, dimBlockFluid>>>(
       -                            dev_darcy_p_new,
       -                            dev_darcy_p);
       -                    cudaThreadSynchronize();
       -                    if (PROFILING == 1)
       -                        stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
       -                                &t_copyValues);
       -                    checkForCudaErrorsIter("Post copyValues(p_new -> p)", iter);
       +                    // Write a log file of the normalized residuals during the
       +                    // Jacobi iterations
       +                    std::ofstream reslog;
       +                    if (write_res_log == 1)
       +                        reslog.open("max_res_norm.dat");
       +
       +                    for (unsigned int nijac = 0; nijac<darcy.maxiter; ++nijac) {
       +
       +                        if (nijac == 0) {
       +                            if (PROFILING == 1)
       +                                startTimer(&kernel_tic);
       +                            copyValues<Float><<<dimGridFluid, dimBlockFluid>>>(
       +                                    dev_darcy_p,
       +                                    dev_darcy_p_old);
       +                            cudaThreadSynchronize();
       +                            if (PROFILING == 1)
       +                                stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
       +                                        &t_copyValues);
       +                            checkForCudaErrorsIter("Post copyValues(p -> p_old)",
       +                                    iter);
       +                        }
       +
       +                        if (PROFILING == 1)
       +                            startTimer(&kernel_tic);
       +                        setDarcyGhostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
       +                                dev_darcy_p, darcy.bc_bot, darcy.bc_top);
       +                        cudaThreadSynchronize();
       +                        if (PROFILING == 1)
       +                            stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
       +                                    &t_setDarcyGhostNodes);
       +                        checkForCudaErrorsIter("Post setDarcyGhostNodes("
       +                                "dev_darcy_p) in Jacobi loop", iter);
       +
       +                        if (PROFILING == 1)
       +                            startTimer(&kernel_tic);
       +                        updateDarcySolution<<<dimGridFluid, dimBlockFluid>>>(
       +                                dev_darcy_p_old,
       +                                //dev_darcy_dpdt,
       +                                dev_darcy_p,
       +                                dev_darcy_k,
       +                                dev_darcy_phi,
       +                                dev_darcy_dphi,
       +                                dev_darcy_grad_k,
       +                                darcy.beta_f,
       +                                darcy.mu,
       +                                darcy.bc_bot,
       +                                darcy.bc_top,
       +                                darcy.ndem,
       +                                wall0_iz,
       +                                dev_darcy_p_new,
       +                                dev_darcy_norm);
       +                        cudaThreadSynchronize();
       +                        if (PROFILING == 1)
       +                            stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
       +                                    &t_updateDarcySolution);
       +                        checkForCudaErrorsIter("Post updateDarcySolution", iter);
       +
       +                        // Copy new values to current values
       +                        if (PROFILING == 1)
       +                            startTimer(&kernel_tic);
       +                        copyValues<Float><<<dimGridFluid, dimBlockFluid>>>(
       +                                dev_darcy_p_new,
       +                                dev_darcy_p);
       +                        cudaThreadSynchronize();
       +                        if (PROFILING == 1)
       +                            stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
       +                                    &t_copyValues);
       +                        checkForCudaErrorsIter("Post copyValues(p_new -> p)", iter);
        
        #ifdef REPORT_EPSILON
       -                    std::cout << "\n###### JACOBI ITERATION "
       -                        << nijac << " after copyValues ######" << std::endl;
       -                    transferDarcyPressuresFromGlobalDeviceMemory();
       -                    printDarcyArray(stdout, darcy.p, "p");
       +                        std::cout << "\n###### JACOBI ITERATION "
       +                            << nijac << " after copyValues ######" << std::endl;
       +                        transferDarcyPressuresFromGlobalDeviceMemory();
       +                        printDarcyArray(stdout, darcy.p, "p");
        #endif
        
       -                    if (nijac % nijacnorm == 0) {
       -                        // Read the normalized residuals from the device
       -                        transferDarcyNormFromGlobalDeviceMemory();
       +                        if (nijac % nijacnorm == 0) {
       +                            // Read the normalized residuals from the device
       +                            transferDarcyNormFromGlobalDeviceMemory();
        
       -                        // Write the normalized residuals to the terminal
       -                        //printDarcyArray(stdout, darcy.norm, "norm");
       +                            // Write the normalized residuals to the terminal
       +                            //printDarcyArray(stdout, darcy.norm, "norm");
        
       -                        // Find the maximum value of the normalized residuals
       -                        max_norm_res = maxNormResDarcy();
       +                            // Find the maximum value of the normalized residuals
       +                            max_norm_res = maxNormResDarcy();
        
       -                        // Write the Jacobi iteration number and maximum value
       -                        // of the normalized residual to the log file
       -                        if (write_res_log == 1)
       -                            reslog << nijac << '\t' << max_norm_res
       -                                << std::endl;
       +                            // Write the Jacobi iteration number and maximum value
       +                            // of the normalized residual to the log file
       +                            if (write_res_log == 1)
       +                                reslog << nijac << '\t' << max_norm_res
       +                                    << std::endl;
        
       -                        if (max_norm_res <= darcy.tolerance) {
       -                            if (write_conv_log == 1
       -                                    && iter % conv_log_interval == 0)
       -                                convlog << iter+1 << '\t' << nijac << std::endl;
       +                            if (max_norm_res <= darcy.tolerance) {
       +                                if (write_conv_log == 1
       +                                        && iter % conv_log_interval == 0)
       +                                    convlog << iter+1 << '\t' << nijac << std::endl;
        
       -                            break;  // solution has converged, exit Jacobi loop
       +                                break;  // solution has converged, exit Jacobi loop
       +                            }
                                }
       -                    }
        
       -                    if (nijac == darcy.maxiter-1) {
       +                        if (nijac == darcy.maxiter-1) {
        
       -                        if (write_conv_log == 1)
       -                            convlog << iter+1 << '\t' << nijac << std::endl;
       +                            if (write_conv_log == 1)
       +                                convlog << iter+1 << '\t' << nijac << std::endl;
        
       -                        std::cerr << "\nIteration " << iter << ", time " 
       -                            << iter*time.dt << " s: "
       -                            "Error, the pressure solution in the fluid "
       -                            "calculations did not converge. Try increasing "
       -                            "the value of 'darcy.maxiter' ("
       -                            << darcy.maxiter
       -                            << ") or increase 'darcy.tolerance' ("
       -                            << darcy.tolerance << ")." << std::endl;
       -                    }
       +                            std::cerr << "\nIteration " << iter << ", time " 
       +                                << iter*time.dt << " s: "
       +                                "Error, the pressure solution in the fluid "
       +                                "calculations did not converge. Try increasing "
       +                                "the value of 'darcy.maxiter' ("
       +                                << darcy.maxiter
       +                                << ") or increase 'darcy.tolerance' ("
       +                                << darcy.tolerance << ")." << std::endl;
       +                        }
        
       -                    if (write_res_log == 1)
       -                        reslog.close();
       +                        if (write_res_log == 1)
       +                            reslog.close();
        
       -                    //break; // end after first iteration
       -                }
       +                        //break; // end after first iteration
       +                    }
        
       -                if (PROFILING == 1)
       -                    startTimer(&kernel_tic);
       -                setDarcyGhostNodes<Float> <<<dimGridFluid, dimBlockFluid>>>
       -                    (dev_darcy_p, darcy.bc_bot, darcy.bc_top);
       -                cudaThreadSynchronize();
       -                if (PROFILING == 1)
       -                    stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
       -                            &t_setDarcyGhostNodes);
       -                checkForCudaErrorsIter("Post setDarcyGhostNodes("
       -                        "dev_darcy_p) after Jacobi loop", iter);
       +                    // Zero all dphi values right after they are used in fluid
       +                    // solution
       +                    setDarcyZeros<Float> <<<dimGridFluid, dimBlockFluid>>>
       +                        (dev_darcy_dphi);
       +                    cudaThreadSynchronize();
       +                    checkForCudaErrorsIter("After setDarcyZeros(dev_darcy_dphi)",
       +                            iter);
        
       -                if (PROFILING == 1)
       -                    startTimer(&kernel_tic);
       -                findDarcyVelocities<<<dimGridFluid, dimBlockFluid>>>(
       -                        dev_darcy_p,
       -                        dev_darcy_phi,
       -                        dev_darcy_k,
       -                        darcy.mu,
       -                        dev_darcy_v);
       -                cudaThreadSynchronize();
       -                if (PROFILING == 1)
       -                    stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
       -                            &t_findDarcyVelocities);
       -                checkForCudaErrorsIter("Post findDarcyVelocities", iter);
       +                    if (PROFILING == 1)
       +                        startTimer(&kernel_tic);
       +                    setDarcyGhostNodes<Float> <<<dimGridFluid, dimBlockFluid>>>
       +                        (dev_darcy_p, darcy.bc_bot, darcy.bc_top);
       +                    cudaThreadSynchronize();
       +                    if (PROFILING == 1)
       +                        stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
       +                                &t_setDarcyGhostNodes);
       +                    checkForCudaErrorsIter("Post setDarcyGhostNodes("
       +                            "dev_darcy_p) after Jacobi loop", iter);
       +
       +                    if (PROFILING == 1)
       +                        startTimer(&kernel_tic);
       +                    findDarcyVelocities<<<dimGridFluid, dimBlockFluid>>>(
       +                            dev_darcy_p,
       +                            dev_darcy_phi,
       +                            dev_darcy_k,
       +                            darcy.mu,
       +                            dev_darcy_v);
       +                    cudaThreadSynchronize();
       +                    if (PROFILING == 1)
       +                        stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
       +                                &t_findDarcyVelocities);
       +                    checkForCudaErrorsIter("Post findDarcyVelocities", iter);
       +                }
                    }
                }
                //break; // end after first iteration