timplement shear stress BC - sphere - GPU-based 3D discrete element method algorithm with optional fluid coupling
 (HTM) git clone git://src.adamsgaard.dk/sphere
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) LICENSE
       ---
 (DIR) commit d299ad9ed8edbb6652677fe9568ce4f9a78b6299
 (DIR) parent cbd624d82308f27331fa8781ba282eff5661645f
 (HTM) Author: Anders Damsgaard <anders.damsgaard@geo.au.dk>
       Date:   Wed, 14 Jan 2015 13:54:53 +0100
       
       implement shear stress BC
       
       Diffstat:
         M src/device.cu                       |      38 ++++++++++++++++++++++++++++++-
         M src/file_io.cpp                     |       4 ++--
         M src/integration.cuh                 |      24 +++++++++++++++---------
       
       3 files changed, 54 insertions(+), 12 deletions(-)
       ---
 (DIR) diff --git a/src/device.cu b/src/device.cu
       t@@ -2159,6 +2159,36 @@ __host__ void DEM::startTime()
                //break; // end after first iteration
        
                if (np > 0) {
       +
       +            // Find shear stresses on upper fixed particles if a shear stress BC
       +            // is specified (wmode[0] == 3)
       +            if (walls.nw > 0 && walls.wmode[0] == 3) {
       +
       +                if (PROFILING == 1)
       +                    startTimer(&kernel_tic);
       +                findShearStressOnFixedMovingParticles<<<dimGrid, dimBlock>>>
       +                    (dev_x,
       +                     dev_vel,
       +                     dev_force,
       +                     dev_walls_tau_eff_x_pp);
       +                cudaThreadSynchronize();
       +                if (PROFILING == 1)
       +                    stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
       +                            &t_summation);
       +                checkForCudaErrorsIter(
       +                        "Post findShearStressOnFixedMovingParticles", iter);
       +
       +                if (PROFILING == 1)
       +                    startTimer(&kernel_tic);
       +                summation<<<dimGrid, dimBlock>>>(dev_walls_tau_eff_x_pp,
       +                        dev_walls_tau_eff_x_partial);
       +                cudaThreadSynchronize();
       +                if (PROFILING == 1)
       +                    stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
       +                            &t_summation);
       +                checkForCudaErrorsIter("Post shear stress summation", iter);
       +            }
       +
                    // Update particle kinematics
                    if (PROFILING == 1)
                        startTimer(&kernel_tic);
       t@@ -2177,7 +2207,13 @@ __host__ void DEM::startTime()
                            dev_angvel0,
                            dev_xyzsum,
                            dev_gridParticleIndex,
       -                    iter);
       +                    iter,
       +                    dev_walls_wmode,
       +                    dev_walls_mvfd,
       +                    dev_walls_tau_eff_x_partial,
       +                    dev_walls_tau_x,
       +                    walls.tau_x[0],
       +                    blocksPerGrid);
                    cudaThreadSynchronize();
                    checkForCudaErrorsIter("Post integrate", iter);
                    if (PROFILING == 1)
 (DIR) diff --git a/src/file_io.cpp b/src/file_io.cpp
       t@@ -220,7 +220,7 @@ void DEM::readbin(const char *target)
            }
            ifs.read(as_bytes(params.sigma0_A), sizeof(params.sigma0_A));
            ifs.read(as_bytes(params.sigma0_f), sizeof(params.sigma0_f));
       -    ifs.read(as_bytes(walls.tau_x), sizeof(walls.tau_x));
       +    ifs.read(as_bytes(walls.tau_x[0]), sizeof(walls.tau_x[0]));
        
            // Read bond parameters
            ifs.read(as_bytes(params.lambda_bar), sizeof(params.lambda_bar));
       t@@ -522,7 +522,7 @@ void DEM::writebin(const char *target)
                }
                ofs.write(as_bytes(params.sigma0_A), sizeof(params.sigma0_A));
                ofs.write(as_bytes(params.sigma0_f), sizeof(params.sigma0_f));
       -        ofs.write(as_bytes(walls.tau_x), sizeof(walls.tau_x));
       +        ofs.write(as_bytes(walls.tau_x[0]), sizeof(walls.tau_x[0]));
        
                // Write bond parameters
                ofs.write(as_bytes(params.lambda_bar), sizeof(params.lambda_bar));
 (DIR) diff --git a/src/integration.cuh b/src/integration.cuh
       t@@ -29,8 +29,10 @@ __global__ void integrate(
            const unsigned int* __restrict__ dev_gridParticleIndex,
            const unsigned int iter,
            const int* __restrict__ dev_walls_wmode,
       +    const Float4* __restrict__ dev_walls_mvfd,
            const Float* __restrict__ dev_walls_tau_eff_x_partial,
            const Float* __restrict__ dev_walls_tau_x,
       +    const Float tau_x,
            const unsigned int blocksPerGrid)
        {
            unsigned int idx = threadIdx.x + blockIdx.x * blockDim.x; // Thread id
       t@@ -50,12 +52,16 @@ __global__ void integrate(
                Float4 xyzsum = dev_xyzsum[orig_idx];
        
                // Read the mode of the top wall
       -        const int wall0mode = dev_walls_wmode[0];
       -        const Float wall0mass = dev_walls_mvfd[0].x;
       +        int wall0mode;
       +        Float wall0mass;
       +        if (devC_nw > 0) {
       +            wall0mode = dev_walls_wmode[0];
       +            wall0mass = dev_walls_mvfd[0].x;
       +        }
        
                // Find the final sum of shear stresses on the top particles
                Float tau_eff_x = 0.0;
       -        if (wall0mode == 3)
       +        if (devC_nw > 0 && wall0mode == 3)
                    for (int i=0; i<blocksPerGrid; ++i)
                        tau_eff_x += dev_walls_tau_eff_x_partial[i];
        
       t@@ -108,9 +114,9 @@ __global__ void integrate(
                // Fixed shear stress BC
                if (wall0mode == 3 && vel.w > 0.0001 && x.z > devC_grid.L[2]*0.5) {
        
       -            // the force should be positive when abs(tau) > abs(tau_eff_x)
       +            // the force should be positive when abs(tau_x) > abs(tau_eff_x)
                    const Float f_tau_x =
       -                (tau + tau_eff_x)*devC_grid.L[0]*devC_grid.L[1];
       +                (tau_x + tau_eff_x)*devC_grid.L[0]*devC_grid.L[1];
        
                    acc.x = f_tau_x/wall0mass;  // acceleration = force/mass
                    acc.y = 0.0;
       t@@ -449,15 +455,15 @@ __global__ void findShearStressOnFixedMovingParticles(
                // Copy data to temporary arrays to avoid any potential
                // read-after-write, write-after-read, or write-after-write hazards. 
                __syncthreads();
       -        const Float4 x     = dev_x[idx];
       -        const Float4 force = dev_force[orig_idx];
       +        const Float4 x       = dev_x[idx];
       +        const Float  force_x = dev_force[idx].x;
        
       -        Float4 f_x = 0.0;
       +        Float f_x = 0.0;
        
                // Only select fixed velocity (fixvel > 0.0, fixvel = vel.w) particles
                // at the top boundary (z > L[0]/2)
                if (vel.w > 0.0 && x.z > devC_grid.L[2]*0.5)
       -            f_x = force.x;
       +            f_x = force_x;
        
                __syncthreads();
                // Convert force to shear stress and save