tseveral functions moved outside np>0 conditional statement - sphere - GPU-based 3D discrete element method algorithm with optional fluid coupling
 (HTM) git clone git://src.adamsgaard.dk/sphere
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) LICENSE
       ---
 (DIR) commit e772cad25f722b8f37f9372ece530db5dbbd0b57
 (DIR) parent 7111bba6f711055826d031e25d69b12ad8e5d4bd
 (HTM) Author: Anders Damsgaard <anders.damsgaard@geo.au.dk>
       Date:   Wed, 18 Jun 2014 09:42:24 +0200
       
       several functions moved outside np>0 conditional statement
       
       Diffstat:
         M src/debug.h                         |       8 ++++----
         M src/device.cu                       |     100 ++++++++++++++++----------------
         M src/navierstokes.cuh                |      45 ++++++++++++++++++-------------
         M tests/cfd_simple.py                 |       3 ++-
       
       4 files changed, 82 insertions(+), 74 deletions(-)
       ---
 (DIR) diff --git a/src/debug.h b/src/debug.h
       t@@ -21,8 +21,8 @@ const unsigned int nijacnorm = 10;
        const int write_res_log = 0;
        
        // Report epsilon values during Jacobi iterations to stdout
       -//#define REPORT_EPSILON
       -//#define REPORT_MORE_EPSILON
       +#define REPORT_EPSILON
       +#define REPORT_MORE_EPSILON
        
        // Report the number of iterations it took before convergence to logfile
        // 'output/<sid>-conv.dat'
       t@@ -40,10 +40,10 @@ const int conv_log_interval = 4;
        #define CHECK_NS_FINITE
        
        // Enable reporting of velocity prediction components to stdout
       -//#define REPORT_V_P_COMPONENTS
       +#define REPORT_V_P_COMPONENTS
        
        // Enable reporting of forcing function terms to stdout
       -//#define REPORT_FORCING_TERMS
       +#define REPORT_FORCING_TERMS
        
        // Choose solver model (see Zhou et al. 2010 "Discrete particle simulation of
        // particle-fluid flow: model formulations and their applicability", table. 1.
 (DIR) diff --git a/src/device.cu b/src/device.cu
       t@@ -874,65 +874,65 @@ __host__ void DEM::startTime()
                            << std::endl;
                        exit(1);
                    }*/
       +            if (iter == 0) {
       +                // set cell center ghost nodes
       +                setNSghostNodes<Float3><<<dimGridFluid, dimBlockFluid>>>(
       +                    dev_ns_v, ns.bc_bot, ns.bc_top);
        
       -            if (np > 0) {
       -
       -                if (iter == 0) {
       -                    // set cell center ghost nodes
       -                    setNSghostNodes<Float3><<<dimGridFluid, dimBlockFluid>>>(
       -                            dev_ns_v, ns.bc_bot, ns.bc_top);
       -
       -                    // find cell face velocities
       -                    interpolateCenterToFace
       -                        <<<dimGridFluidFace, dimBlockFluidFace>>>(
       -                            dev_ns_v,
       -                            dev_ns_v_x,
       -                            dev_ns_v_y,
       -                            dev_ns_v_z);
       -                    cudaThreadSynchronize();
       -                    checkForCudaErrors("Post interpolateCenterToFace");
       -                }
       -
       -                setNSghostNodesFace<Float>
       +                // find cell face velocities
       +                interpolateCenterToFace
                            <<<dimGridFluidFace, dimBlockFluidFace>>>(
       +                        dev_ns_v,
                                dev_ns_v_x,
                                dev_ns_v_y,
       -                        dev_ns_v_z,
       -                        ns.bc_bot,
       -                        ns.bc_top);
       +                        dev_ns_v_z);
                        cudaThreadSynchronize();
       -                checkForCudaErrorsIter("Post setNSghostNodesFace", iter);
       +                checkForCudaErrors("Post interpolateCenterToFace");
       +            }
        
       -                findFaceDivTau<<<dimGridFluidFace, dimBlockFluidFace>>>(
       -                        dev_ns_v_x,
       -                        dev_ns_v_y,
       -                        dev_ns_v_z,
       -                        dev_ns_div_tau_x,
       -                        dev_ns_div_tau_y,
       -                        dev_ns_div_tau_z);
       -                cudaThreadSynchronize();
       -                checkForCudaErrorsIter("Post findFaceDivTau", iter);
       +            setNSghostNodesFace<Float>
       +                <<<dimGridFluidFace, dimBlockFluidFace>>>(
       +                    dev_ns_v_x,
       +                    dev_ns_v_y,
       +                    dev_ns_v_z,
       +                    ns.bc_bot,
       +                    ns.bc_top);
       +            cudaThreadSynchronize();
       +            checkForCudaErrorsIter("Post setNSghostNodesFace", iter);
       +
       +            findFaceDivTau<<<dimGridFluidFace, dimBlockFluidFace>>>(
       +                dev_ns_v_x,
       +                dev_ns_v_y,
       +                dev_ns_v_z,
       +                dev_ns_div_tau_x,
       +                dev_ns_div_tau_y,
       +                dev_ns_div_tau_z);
       +            cudaThreadSynchronize();
       +            checkForCudaErrorsIter("Post findFaceDivTau", iter);
       +
       +            setNSghostNodesFace<Float>
       +                <<<dimGridFluidFace, dimBlockFluid>>>(
       +                    dev_ns_div_tau_x,
       +                    dev_ns_div_tau_y,
       +                    dev_ns_div_tau_z,
       +                    ns.bc_bot,
       +                    ns.bc_top);
       +            cudaThreadSynchronize();
       +            checkForCudaErrorsIter("Post setNSghostNodes(dev_ns_div_tau)",
       +                                   iter);
        
       -                setNSghostNodesFace<Float>
       -                    <<<dimGridFluidFace, dimBlockFluid>>>(
       -                        dev_ns_div_tau_x,
       -                        dev_ns_div_tau_y,
       -                        dev_ns_div_tau_z,
       -                        ns.bc_bot,
       -                        ns.bc_top);
       -                cudaThreadSynchronize();
       -                checkForCudaErrorsIter("Post setNSghostNodes(dev_ns_div_tau)",
       -                        iter);
       +            setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
       +                dev_ns_p, ns.bc_bot, ns.bc_top);
       +            cudaThreadSynchronize();
       +            checkForCudaErrorsIter("Post setNSghostNodes(dev_ns_p)", iter);
        
       -                setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
       -                        dev_ns_p, ns.bc_bot, ns.bc_top);
       -                cudaThreadSynchronize();
       -                checkForCudaErrorsIter("Post setNSghostNodes(dev_ns_p)", iter);
       +            setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
       +                dev_ns_phi, ns.bc_bot, ns.bc_top);
       +            cudaThreadSynchronize();
       +            checkForCudaErrorsIter("Post setNSghostNodes(dev_ns_p)", iter);
        
       -                setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
       -                        dev_ns_phi, ns.bc_bot, ns.bc_top);
       -                cudaThreadSynchronize();
       -                checkForCudaErrorsIter("Post setNSghostNodes(dev_ns_p)", iter);
       +
       +            if (np > 0) {
        
                        // Per particle, find the fluid-particle interaction force f_pf
                        // and apply it to the particle
 (DIR) diff --git a/src/navierstokes.cuh b/src/navierstokes.cuh
       t@@ -2115,7 +2115,7 @@ __global__ void findPredNSvelocities(
                int     bc_bot,                 // in
                int     bc_top,                 // in
                Float   beta,                   // in
       -        Float3* dev_ns_F_pf,              // in
       +        Float3* dev_ns_F_pf,            // in
                unsigned int ndem,              // in
                Float*  dev_ns_v_p_x,           // out
                Float*  dev_ns_v_p_y,           // out
       t@@ -2150,10 +2150,15 @@ __global__ void findPredNSvelocities(
                        dev_ns_v_x[fidx],
                        dev_ns_v_y[fidx],
                        dev_ns_v_z[fidx]);
       -        const Float3 div_tau = MAKE_FLOAT3(
       +
       +        Float3 div_tau = MAKE_FLOAT3(0.0, 0.0, 0.0);
       +        // TODO: enable when div_tau values are fixed to avoid uneccesary reads
       +        //if (devC_params.mu > 0.0) {
       +            div_tau = MAKE_FLOAT3(
                        dev_ns_div_tau_x[fidx],
                        dev_ns_div_tau_y[fidx],
                        dev_ns_div_tau_z[fidx]);
       +            //}
        
                // cell center values
                const Float phi_xn    = dev_ns_phi[idx(x-1,y,z)];
       t@@ -2259,21 +2264,23 @@ __global__ void findPredNSvelocities(
        #ifdef REPORT_V_P_COMPONENTS
                // Report velocity components to stdout for debugging
                printf("\n[%d,%d,%d]"
       -                "\tv_p      = %+e %+e %+e\n"
       -                "\tpres     = %+e %+e %+e\n"
       -                "\tinteract = %+e %+e %+e\n"
       -                "\tdiff     = %+e %+e %+e\n"
       -                "\tgrav     = %+e %+e %+e\n"
       -                "\tporos    = %+e %+e %+e\n"
       -                "\tadv      = %+e %+e %+e\n",
       -                x, y, z,
       -                v_p.x, v_p.y, v_p.z,
       -                pressure_term.x, pressure_term.y, pressure_term.z, 
       -                interaction_term.x, interaction_term.y, interaction_term.z, 
       -                diffusion_term.x, diffusion_term.y, diffusion_term.z, 
       -                gravity_term.x, gravity_term.y, gravity_term.z, 
       -                porosity_term.x, porosity_term.y, porosity_term.z, 
       -                advection_term.x, advection_term.y, advection_term.z);
       +               "\tv_p      = %+e %+e %+e\n"
       +               "\tpres     = %+e %+e %+e\n"
       +               "\tinteract = %+e %+e %+e\n"
       +               "\tdiff     = %+e %+e %+e\n"
       +               "\tgrav     = %+e %+e %+e\n"
       +               "\tporos    = %+e %+e %+e\n"
       +               "\tadv      = %+e %+e %+e\n"
       +               "\tdiv_tau  = %+e %+e %+e\n",
       +               x, y, z,
       +               v_p.x, v_p.y, v_p.z,
       +               pressure_term.x, pressure_term.y, pressure_term.z, 
       +               interaction_term.x, interaction_term.y, interaction_term.z, 
       +               diffusion_term.x, diffusion_term.y, diffusion_term.z, 
       +               gravity_term.x, gravity_term.y, gravity_term.z, 
       +               porosity_term.x, porosity_term.y, porosity_term.z, 
       +               advection_term.x, advection_term.y, advection_term.z,
       +               div_tau.x, div_tau.y, div_tau.z);
        #endif
        
                // Enforce Neumann BC if specified
       t@@ -3250,8 +3257,8 @@ __global__ void findFaceDivTau(
                            (v_z_zp - 2.0*v_z + v_z_zn)/(dz*dz));
        
                __syncthreads();
       -        //printf("div_tau [%d,%d,%d] = %f, %f, %f\n", x,y,z,
       -                //div_tau_x, div_tau_y, div_tau_z);
       +        printf("div_tau [%d,%d,%d] = %f, %f, %f\n", x,y,z,
       +                div_tau_x, div_tau_y, div_tau_z);
                dev_ns_div_tau_x[faceidx] = div_tau_x;
                dev_ns_div_tau_y[faceidx] = div_tau_y;
                dev_ns_div_tau_z[faceidx] = div_tau_z;
 (DIR) diff --git a/tests/cfd_simple.py b/tests/cfd_simple.py
       t@@ -6,7 +6,8 @@ orig = sphere.sim('cfd_simple', fluid=True)
        orig.cleanup()
        orig.defineWorldBoundaries([0.3, 0.3, 0.3], dx = 0.1)
        orig.initFluid(mu=0.0)
       -orig.initTemporal(total = 0.5, file_dt = 0.05, dt = 1.0e-4)
       +#orig.initTemporal(total = 0.5, file_dt = 0.05, dt = 1.0e-4)
       +orig.initTemporal(total = 1.0e-3, file_dt = 1.0e-3, dt = 1.0e-3)
        #orig.bc_bot[0] = 1      # No-flow BC at bottom (Neumann)
        
        # Homogeneous pressure, no gravity