trenamed tau_eff to tau_eff_x to reflect scalar value - sphere - GPU-based 3D discrete element method algorithm with optional fluid coupling
 (HTM) git clone git://src.adamsgaard.dk/sphere
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) LICENSE
       ---
 (DIR) commit 5d835dd04a17763eb395529531f7c6b5c05245db
 (DIR) parent cbdda5df5cf0c5d7ce15213aa4591a78737b6b4e
 (HTM) Author: Anders Damsgaard <anders.damsgaard@geo.au.dk>
       Date:   Wed, 14 Jan 2015 13:21:13 +0100
       
       renamed tau_eff to tau_eff_x to reflect scalar value
       
       Diffstat:
         M src/device.cu                       |       8 ++++++++
         M src/integration.cuh                 |      32 ++++++++++++++++++++++++++++++-
         M src/sphere.h                        |       2 ++
       
       3 files changed, 41 insertions(+), 1 deletion(-)
       ---
 (DIR) diff --git a/src/device.cu b/src/device.cu
       t@@ -382,9 +382,11 @@ __host__ void DEM::allocateGlobalDeviceMemory(void)
            cudaMalloc((void**)&dev_walls_nx, sizeof(Float4)*walls.nw);
            cudaMalloc((void**)&dev_walls_mvfd, sizeof(Float4)*walls.nw);
            cudaMalloc((void**)&dev_walls_tau_x, sizeof(Float)*walls.nw);
       +    cudaMalloc((void**)&dev_walls_tau_eff_x_pp, sizeof(Float)*walls.nw*np);
            cudaMalloc((void**)&dev_walls_force_pp, sizeof(Float)*walls.nw*np);
            cudaMalloc((void**)&dev_walls_acc, sizeof(Float)*walls.nw);
            // dev_walls_force_partial allocated later
       +    // dev_walls_tau_eff_x_partial allocated later
        
            checkForCudaErrors("End of allocateGlobalDeviceMemory");
            if (verbose == 1)
       t@@ -551,6 +553,8 @@ __host__ void DEM::freeGlobalDeviceMemory()
            cudaFree(dev_walls_force_partial);
            cudaFree(dev_walls_force_pp);
            cudaFree(dev_walls_acc);
       +    cudaFree(dev_walls_tau_eff_x_pp);
       +    cudaFree(dev_walls_tau_eff_x_partial);
        
            // Fluid arrays
            if (fluid == 1 && cfd_solver == 0) {
       t@@ -801,6 +805,10 @@ __host__ void DEM::startTime()
            cudaMalloc((void**)&dev_walls_force_partial,
                    sizeof(Float)*dimGrid.x*walls.nw);
        
       +    // Pre-sum of shear stress per wall
       +    cudaMalloc((void**)&dev_walls_tau_eff_x_partial,
       +            sizeof(Float)*dimGrid.x*walls.nw);
       +
            // Report to stdout
            if (verbose == 1) {
                cout << "\n  Device memory allocation and transfer complete.\n"
 (DIR) diff --git a/src/integration.cuh b/src/integration.cuh
       t@@ -399,11 +399,41 @@ __global__ void integrateWalls(
                    __syncthreads();
                    dev_walls_nx[idx]   = w_nx;
                    dev_walls_mvfd[idx] = w_mvfd;
       -            dev_walls_acc[idx] = acc;
       +            dev_walls_acc[idx]  = acc;
                }
            }
        } // End of integrateWalls(...)
        
        
       +// Finds shear stresses on particles adjacent to top wall (idx=0).
       +// The fixvel value is saved in vel.w.
       +__global__ void findShearStressOnFixedMovingParticles(
       +    const Float4* __restrict__ dev_x,
       +    const Float4* __restrict__ dev_vel,
       +    const Float4* __restrict__ dev_force,
       +    Float* __restrict__ dev_walls_tau_eff_x_pp)
       +{
       +    unsigned int idx = threadIdx.x + blockIdx.x * blockDim.x; // Thread id
       +
       +    if (idx < devC_np) { // Condition prevents block size error
       +
       +        // Copy data to temporary arrays to avoid any potential
       +        // read-after-write, write-after-read, or write-after-write hazards. 
       +        __syncthreads();
       +        const Float4 x     = dev_x[idx];
       +        const Float4 force = dev_force[orig_idx];
       +
       +        Float4 f_x = 0.0;
       +
       +        // Only select fixed velocity (fixvel > 0.0, fixvel = vel.w) particles
       +        // at the top boundary (z > L[0]/2)
       +        if (vel.w > 0.0 && x.z > devC_grid.L[2]*0.5)
       +            f_x = force.x;
       +
       +        __syncthreads();
       +        // Convert force to shear stress and save
       +        dev_walls_tau_eff_x_pp[idx] = f_x/(devC_grid.L[0]*devC_grid.L[1];
       +    }
       +}
        #endif
        // vim: tabstop=8 expandtab shiftwidth=4 softtabstop=4
 (DIR) diff --git a/src/sphere.h b/src/sphere.h
       t@@ -101,6 +101,8 @@ class DEM {
                Float         *dev_walls_force_partial; // Pre-sum per wall
                Float         *dev_walls_force_pp;  // Force per particle per wall
                Float         *dev_walls_acc;       // Wall acceleration
       +        Float         *dev_walls_tau_eff_x_pp;      // Shear force per particle
       +        Float         *dev_walls_tau_eff_x_partial; // Pre-sum of shear force
        
                // Bond arrays
                uint2         *dev_bonds;           // Particle bond pairs