tinvestigating segfaultm small fixes - sphere - GPU-based 3D discrete element method algorithm with optional fluid coupling
 (HTM) git clone git://src.adamsgaard.dk/sphere
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) LICENSE
       ---
 (DIR) commit 0285d52a7249a194ceed526710d22e6a2c473c9a
 (DIR) parent 3c22af0f8ea4b02887e75dbb9e2c05796cbd14a5
 (HTM) Author: Anders Damsgaard <anders.damsgaard@geo.au.dk>
       Date:   Thu,  5 Jun 2014 07:31:38 +0200
       
       investigating segfaultm small fixes
       
       Diffstat:
         M src/device.cu                       |      10 +++++-----
         M src/navierstokes.cuh                |      18 ++++++++++--------
         M tests/io_tests_fluid.py             |       1 +
       
       3 files changed, 16 insertions(+), 13 deletions(-)
       ---
 (DIR) diff --git a/src/device.cu b/src/device.cu
       t@@ -571,11 +571,11 @@ __host__ void DEM::startTime()
            // Use 3D block and grid layout for cell-face fluid calculations
            dim3 dimBlockFluidFace(8, 8, 8);    // 512 threads per block
            dim3 dimGridFluidFace(
       -            iDivUp(grid.num[0]+1, dimBlockFluid.x),
       -            iDivUp(grid.num[1]+1, dimBlockFluid.y),
       -            iDivUp(grid.num[2]+1, dimBlockFluid.z));
       -    if (dimGridFluid.z > 64 && navierstokes == 1) {
       -        cerr << "Error: dimGridFluid.z > 64" << endl;
       +            iDivUp(grid.num[0]+1, dimBlockFluidFace.x),
       +            iDivUp(grid.num[1]+1, dimBlockFluidFace.y),
       +            iDivUp(grid.num[2]+1, dimBlockFluidFace.z));
       +    if (dimGridFluidFace.z > 64 && navierstokes == 1) {
       +        cerr << "Error: dimGridFluidFace.z > 64" << endl;
                exit(1);
            }
        
 (DIR) diff --git a/src/navierstokes.cuh b/src/navierstokes.cuh
       t@@ -59,7 +59,7 @@ __device__ int checkFiniteFloat3(
        void DEM::initNSmemDev(void)
        {
            // size of scalar field
       -    unsigned int memSizeF  = sizeof(Float)*NScells();
       +    unsigned int memSizeF = sizeof(Float)*NScells();
        
            // size of velocity arrays in staggered grid discretization
            unsigned int memSizeFvel = sizeof(Float)*NScellsVelocity();
       t@@ -3204,6 +3204,7 @@ __global__ void interpolateCenterToFace(
                const Float z_val = (center.z - zn.z)/2.0;
        
                __syncthreads();
       +        //printf("c2f [%d,%d,%d] = %f,%f,%f\n", x,y,z, x_val, y_val, z_val);
                dev_out_x[faceidx] = x_val;
                dev_out_y[faceidx] = y_val;
                dev_out_z[faceidx] = z_val;
       t@@ -3225,15 +3226,13 @@ __global__ void interpolateFaceToCenter(
            // Check that we are not outside the fluid grid
            if (x < devC_grid.num[0] && y < devC_grid.num[1] && z < devC_grid.num[2]) {
        
       -        const unsigned int cellidx = idx(x,y,z);
       -
                __syncthreads();
                const Float x_n = dev_in_x[vidx(x,y,z)];
                const Float x_p = dev_in_x[vidx(x+1,y,z)];
       -        const Float y_n = dev_in_x[vidx(x,y,z)];
       -        const Float y_p = dev_in_x[vidx(x,y+1,z)];
       -        const Float z_n = dev_in_x[vidx(x,y,z)];
       -        const Float z_p = dev_in_x[vidx(x,y,z+1)];
       +        const Float y_n = dev_in_y[vidx(x,y,z)];
       +        const Float y_p = dev_in_y[vidx(x,y+1,z)];
       +        const Float z_n = dev_in_z[vidx(x,y,z)];
       +        const Float z_p = dev_in_z[vidx(x,y,z+1)];
        
                const Float3 val = MAKE_FLOAT3(
                        (x_n + x_p)/2.0,
       t@@ -3241,7 +3240,8 @@ __global__ void interpolateFaceToCenter(
                        (z_n + z_p)/2.0);
        
                __syncthreads();
       -        dev_out[cellidx] = val;
       +        //printf("[%d,%d,%d] = %f\n", x,y,z, val);
       +        dev_out[idx(x,y,z)] = val;
            }
        }
        
       t@@ -3317,6 +3317,8 @@ __global__ void findFaceDivTau(
                            (v_z_zp - 2.0*v_z + v_z_zn)/(dz*dz));
        
                __syncthreads();
       +        printf("div_tau [%d,%d,%d] = %f, %f, %f\n", x,y,z,
       +                div_tau_x, div_tau_y, div_tau_z);
                dev_ns_div_tau_x[faceidx] = div_tau_x;
                dev_ns_div_tau_y[faceidx] = div_tau_y;
                dev_ns_div_tau_z[faceidx] = div_tau_z;
 (DIR) diff --git a/tests/io_tests_fluid.py b/tests/io_tests_fluid.py
       t@@ -23,6 +23,7 @@ compare(orig, py, "Python IO:")
        
        # Test C++ IO routines
        #orig.run(verbose=True, hideinputfile=True)
       +orig.run(dry=True)
        orig.run(verbose=True, hideinputfile=False, cudamemcheck=True)
        cpp = sphere.sim(fluid=True)
        cpp.readbin("../output/" + orig.sid + ".output00000.bin", verbose=False)