tdebugging, last commit before moving F_pf to cell faces - sphere - GPU-based 3D discrete element method algorithm with optional fluid coupling
 (HTM) git clone git://src.adamsgaard.dk/sphere
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) LICENSE
       ---
 (DIR) commit fec2c4c8bcb848ae61bd536475f44a96dfa55659
 (DIR) parent cef188661b6df4c706bda6a9c4c881e6c16f9ef1
 (HTM) Author: Anders Damsgaard <anders.damsgaard@geo.au.dk>
       Date:   Fri,  6 Jun 2014 13:28:13 +0200
       
       debugging, last commit before moving F_pf to cell faces
       
       Diffstat:
         M src/debug.h                         |       4 ++--
         M src/device.cu                       |      24 +++++++++++-------------
         M src/navierstokes.cuh                |      30 +++++++++++++++++-------------
         M tests/io_tests_fluid.py             |       6 +++---
       
       4 files changed, 33 insertions(+), 31 deletions(-)
       ---
 (DIR) diff --git a/src/debug.h b/src/debug.h
       t@@ -40,10 +40,10 @@ const int conv_log_interval = 4;
        #define CHECK_NS_FINITE
        
        // Enable reporting of velocity prediction components to stdout
       -#define REPORT_V_P_COMPONENTS
       +//#define REPORT_V_P_COMPONENTS
        
        // Enable reporting of forcing function terms to stdout
       -#define REPORT_FORCING_TERMS
       +//#define REPORT_FORCING_TERMS
        
        // Choose solver model (see Zhou et al. 2010 "Discrete particle simulation of
        // particle-fluid flow: model formulations and their applicability", table. 1.
 (DIR) diff --git a/src/device.cu b/src/device.cu
       t@@ -36,25 +36,25 @@ __host__ void DEM::initializeGPU(void)
        
            // Variables containing device properties
            cudaDeviceProp prop;
       -    int devicecount;
       +    int deviceCount;
            int cudaDriverVersion;
            int cudaRuntimeVersion;
        
            checkForCudaErrors("Before initializing CUDA device");
        
            // Register number of devices
       -    cudaGetDeviceCount(&devicecount);
       +    cudaGetDeviceCount(&deviceCount);
        
       -    if (devicecount == 0) {
       +    if (deviceCount == 0) {
                std::cerr << "\nERROR: No CUDA-enabled devices availible. Bye."
                    << std::endl;
                exit(EXIT_FAILURE);
       -    } else if (devicecount == 1) {
       +    } else if (deviceCount == 1) {
                if (verbose == 1)
                    cout << "  System contains 1 CUDA compatible device.\n";
            } else {
                if (verbose == 1)
       -            cout << "  System contains " << devicecount
       +            cout << "  System contains " << deviceCount
                        << " CUDA compatible devices.\n";
            }
        
       t@@ -143,10 +143,8 @@ __global__ void checkConstantValues(int* dev_equal,
                    dev_params->mu != devC_params.mu ||
                    dev_params->rho_f != devC_params.rho_f)
                *dev_equal = 2; // Not ok
       -
        }
        
       -
        // Copy the constant data components to device memory,
        // and check whether the values correspond to the 
        // values in constant memory.
       t@@ -915,12 +913,6 @@ __host__ void DEM::startTime()
                        cudaThreadSynchronize();
                        checkForCudaErrorsIter("Post findFaceDivTau", iter);
        
       -                /*setUpperDivTau<<<dimGridFluidFace, dimBlockFluid>>>(
       -                        dev_ns_p, ns.bc_bot, ns.bc_top);
       -                cudaThreadSynchronize();
       -                checkForCudaErrorsIter("Post setNSghostNodes(dev_ns_p)", iter)
       -                */
       -
                        setNSghostNodesFace<Float>
                            <<<dimGridFluidFace, dimBlockFluid>>>(
                                dev_ns_div_tau_x,
       t@@ -1050,6 +1042,12 @@ __host__ void DEM::startTime()
                        cudaThreadSynchronize();
                        checkForCudaErrorsIter("Post interpolateFaceToCenter", iter);
        
       +                // Set cell-center velocity ghost nodes
       +                setNSghostNodes<Float3><<<dimGridFluid, dimBlockFluid>>>(
       +                    dev_ns_v, ns.bc_bot, ns.bc_top);
       +                cudaThreadSynchronize();
       +                checkForCudaErrorsIter("Post setNSghostNodes(v)", iter);
       +
                        // Find the divergence of phi*vi*v, needed for predicting the
                        // fluid velocities
                        if (PROFILING == 1)
 (DIR) diff --git a/src/navierstokes.cuh b/src/navierstokes.cuh
       t@@ -1862,6 +1862,7 @@ __global__ void findNSdivphiviv(
        #endif
            }
        }
       +
        __global__ void findNSdivtau(
                Float*  dev_ns_tau,      // in
                Float3* dev_ns_div_tau)  // out
       t@@ -2249,14 +2250,14 @@ __global__ void findPredNSvelocities(
        
        #ifdef REPORT_V_P_COMPONENTS
                // Report velocity components to stdout for debugging
       -        printf("[%d,%d,%d]\t"
       -                "v_p = %e\t%e\t%e\t"
       -                "pres = %e\t%e\t%e\t"
       -                "interact = %e\t%e\t%e\t"
       -                "diff = %e\t%e\t%e\t"
       -                "grav = %e\t%e\t%e\t"
       -                "poros = %e\t%e\t%e\t"
       -                "adv = %e\t%e\t%e\n",
       +        printf("\n[%d,%d,%d]"
       +                "\tv_p      = %+e %+e %+e\n"
       +                "\tpres     = %+e %+e %+e\n"
       +                "\tinteract = %+e %+e %+e\n"
       +                "\tdiff     = %+e %+e %+e\n"
       +                "\tgrav     = %+e %+e %+e\n"
       +                "\tporos    = %+e %+e %+e\n"
       +                "\tadv      = %+e %+e %+e\n",
                        x, y, z,
                        v_p.x, v_p.y, v_p.z,
                        pressure_term.x, pressure_term.y, pressure_term.z, 
       t@@ -2997,9 +2998,12 @@ __global__ void findInteractionForce(
                const Float3 f_pf = f_d + f_p + f_v;
        
        #ifdef CHECK_NS_FINITE
       -        /*
       -        printf("%d [%d,%d,%d]\tV_p=%f Re=%f Cd=%f chi=%f\n"
       -        "  f_d=%f,%f,%f f_p=%f,%f,%f f_v=%f,%f,%f\n",
       +        //*
       +        printf("\n%d [%d,%d,%d]\n"
       +               "\tV_p = %f Re=%f Cd=%f chi=%f\n"
       +               "\tf_d = %+e %+e %+e\n"
       +               "\tf_p = %+e %+e %+e\n"
       +               "\tf_v = %+e %+e %+e\n",
                        i, i_x, i_y, i_z, V_p, Re, Cd, chi,
                        f_d.x, f_d.y, f_d.z,
                        f_p.x, f_p.y, f_p.z,
       t@@ -3191,8 +3195,8 @@ __global__ void findFaceDivTau(
            const Float dz = devC_grid.L[2]/nz;
        
            // Check that we are not outside the fluid grid
       -    if (x <= nx && y <= ny && z <= nz) {
       -    //if (x < nx && y < ny && z < nz) {
       +    //if (x <= nx && y <= ny && z <= nz) {
       +    if (x < nx && y < ny && z < nz) {
        
                const unsigned int faceidx = vidx(x,y,z);
        
 (DIR) diff --git a/tests/io_tests_fluid.py b/tests/io_tests_fluid.py
       t@@ -22,9 +22,9 @@ py.readbin("../input/" + orig.sid + ".bin", verbose=False)
        compare(orig, py, "Python IO:")
        
        # Test C++ IO routines
       -orig.run(verbose=True, hideinputfile=True)
       -#orig.run(dry=True)
       -#orig.run(verbose=True, hideinputfile=False, cudamemcheck=True)
       +#orig.run(verbose=True, hideinputfile=True)
       +orig.run(dry=True)
       +orig.run(verbose=True, hideinputfile=False, cudamemcheck=True)
        cpp = sphere.sim(fluid=True)
        cpp.readbin("../output/" + orig.sid + ".output00000.bin", verbose=False)
        compare(orig, cpp, "C++ IO:   ")