tdarcy.cuh: remove synchronization calls for coalesced darcy calls - sphere - GPU-based 3D discrete element method algorithm with optional fluid coupling
 (HTM) git clone git://src.adamsgaard.dk/sphere
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) LICENSE
       ---
 (DIR) commit 7da84647e15a6ce230d9c2f0fcbbe1111fea6bf8
 (DIR) parent e6637c4d721e810b55d400e45a2ad74a0d7cf239
 (HTM) Author: Anders Damsgaard <anders@adamsgaard.dk>
       Date:   Tue, 21 Feb 2023 15:01:35 +0100
       
       darcy.cuh: remove synchronization calls for coalesced darcy calls
       
       Diffstat:
         M src/darcy.cuh                       |      48 ++-----------------------------
       
       1 file changed, 2 insertions(+), 46 deletions(-)
       ---
 (DIR) diff --git a/src/darcy.cuh b/src/darcy.cuh
       t@@ -164,10 +164,8 @@ __global__ void setDarcyNormZero(
            const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
        
            // check that we are not outside the fluid grid
       -    if (x < devC_grid.num[0] && y < devC_grid.num[1] && z < devC_grid.num[2]) {
       -        __syncthreads();
       +    if (x < devC_grid.num[0] && y < devC_grid.num[1] && z < devC_grid.num[2])
                dev_darcy_norm[d_idx(x,y,z)] = 0.0;
       -    }
        }
        
        // Set an array of scalars to 0.0 inside devC_grid
       t@@ -180,10 +178,8 @@ __global__ void setDarcyZeros(T* __restrict__ dev_scalarfield)
            const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
        
            // check that we are not outside the fluid grid
       -    if (x < devC_grid.num[0] && y < devC_grid.num[1] && z < devC_grid.num[2]) {
       -        __syncthreads();
       +    if (x < devC_grid.num[0] && y < devC_grid.num[1] && z < devC_grid.num[2])
                dev_scalarfield[d_idx(x,y,z)] = 0.0;
       -    }
        }
        
        
       t@@ -455,21 +451,18 @@ __global__ void findDarcyPorositiesLinear(
                                        * targetCell.z;
        
                                    // Lowest particle index in cell
       -                            __syncthreads();
                                    startIdx = dev_cellStart[cellID];
        
                                    // Make sure cell is not empty
                                    if (startIdx != 0xffffffff) {
        
                                        // Highest particle index in cell
       -                                __syncthreads();
                                        endIdx = dev_cellEnd[cellID];
        
                                        // Iterate over cell particles
                                        for (i=startIdx; i<endIdx; ++i) {
        
                                            // Read particle position and radius
       -                                    __syncthreads();
                                            xr = dev_x_sorted[i];
                                            v  = dev_vel_sorted[i];
                                            //x3 = MAKE_FLOAT3(xr.x, xr.y, xr.z);
       t@@ -582,7 +575,6 @@ __global__ void copyDarcyPorositiesToEdges(
                    const unsigned int readidx = d_idx(x, y_read, z);
                    const unsigned int writeidx = d_idx(x, y, z);
        
       -            __syncthreads();
                    dev_darcy_phi[writeidx] = dev_darcy_phi[readidx];
                    dev_darcy_dphi[writeidx] = dev_darcy_dphi[readidx];
                    dev_darcy_div_v_p[writeidx] = dev_darcy_div_v_p[readidx];
       t@@ -616,7 +608,6 @@ __global__ void copyDarcyPorositiesToBottom(
                    const unsigned int readidx = d_idx(x, y, 1);
                    const unsigned int writeidx = d_idx(x, y, z);
        
       -            __syncthreads();
                    dev_darcy_phi[writeidx] = dev_darcy_phi[readidx];
                    dev_darcy_dphi[writeidx] = dev_darcy_dphi[readidx];
                    dev_darcy_div_v_p[writeidx] = dev_darcy_div_v_p[readidx];
       t@@ -679,7 +670,6 @@ __global__ void findDarcyPorosities(
                    //unsigned int n = 0;
        
                    // Read old porosity
       -            __syncthreads();
                    Float phi_0 = dev_darcy_phi[d_idx(x,y,z)];
        
                    // The cell 3d index
       t@@ -713,21 +703,18 @@ __global__ void findDarcyPorosities(
                                        * targetCell.z;
        
                                    // Lowest particle index in cell
       -                            __syncthreads();
                                    startIdx = dev_cellStart[cellID];
        
                                    // Make sure cell is not empty
                                    if (startIdx != 0xffffffff) {
        
                                        // Highest particle index in cell
       -                                __syncthreads();
                                        endIdx = dev_cellEnd[cellID];
        
                                        // Iterate over cell particles
                                        for (i=startIdx; i<endIdx; ++i) {
        
                                            // Read particle position and radius
       -                                    __syncthreads();
                                            xr = dev_x_sorted[i];
                                            v  = dev_vel_sorted[i];
                                            r = xr.w;
       t@@ -796,7 +783,6 @@ __global__ void findDarcyPorosities(
                        dphi = 0.5*(phi_new - phi_0);
        
                    // Save porosity and porosity change
       -            __syncthreads();
                    //phi = 0.5; dphi = 0.0; // disable porosity effects
                    const unsigned int cellidx = d_idx(x,y,z);
                    dev_darcy_phi[cellidx]  = phi*c_phi;
       t@@ -823,10 +809,7 @@ __global__ void findDarcyPorosities(
                    (void)checkFiniteFloat("dphi", x, y, z, dphi);
        #endif
                } else {
       -
       -            __syncthreads();
                    const unsigned int cellidx = d_idx(x,y,z);
       -
                    dev_darcy_phi[cellidx]  = 0.999;
                    dev_darcy_dphi[cellidx] = 0.0;
                }
       t@@ -854,7 +837,6 @@ __global__ void findDarcyParticleVelocityDivergence(
            if (x < nx && y < ny && z < nz) {
        
                // read values
       -        __syncthreads();
                Float v_p_xn = dev_darcy_v_p_x[d_vidx(x,y,z)];
                Float v_p_xp = dev_darcy_v_p_x[d_vidx(x+1,y,z)];
                Float v_p_yn = dev_darcy_v_p_y[d_vidx(x,y,z)];
       t@@ -873,7 +855,6 @@ __global__ void findDarcyParticleVelocityDivergence(
                    (v_p_yp - v_p_yn)/dy +
                    (v_p_zp - v_p_zn)/dz;
        
       -        __syncthreads();
                dev_darcy_div_v_p[d_idx(x,y,z)] = div_v_p;
        
        #ifdef CHECK_FLUID_FINITE
       t@@ -900,7 +881,6 @@ __global__ void findDarcyPressureGradient(
            if (x < nx && y < ny && z < nz) {
        
                // read values
       -        __syncthreads();
                Float p_xn = dev_darcy_p[d_idx(x-1,y,z)];
                Float p_xp = dev_darcy_p[d_idx(x+1,y,z)];
                Float p_yn = dev_darcy_p[d_idx(x,y-1,z)];
       t@@ -919,7 +899,6 @@ __global__ void findDarcyPressureGradient(
                        (p_yp - p_yn)/(dy + dy),
                        (p_zp - p_zn)/(dz + dz));
        
       -        __syncthreads();
                dev_darcy_grad_p[d_idx(x,y,z)] = grad_p;
        
                //if (grad_p.x != 0.0 || grad_p.y != 0.0 || grad_p.z != 0.0)
       t@@ -957,7 +936,6 @@ __global__ void findDarcyPressureForceLinear(
            if (i < devC_np) {
        
                // read particle information
       -        __syncthreads();
                const Float4 x = dev_x[i];
                const Float3 x3 = MAKE_FLOAT3(x.x, x.y, x.z);
        
       t@@ -979,7 +957,6 @@ __global__ void findDarcyPressureForceLinear(
                const Float dz = devC_grid.L[2]/nz;
        
                // read fluid information
       -        __syncthreads();
                //const Float phi = dev_darcy_phi[cellidx];
        
                // Cell center position
       t@@ -1002,7 +979,6 @@ __global__ void findDarcyPressureForceLinear(
                            iy_n = i_y + d_iy;
                            iz_n = i_z + d_iz;
        
       -                    __syncthreads();
                            grad_p_iter = dev_darcy_grad_p[d_idx(ix_n, iy_n, iz_n)];
        
                            // make sure edge and corner ghost nodes aren't read
       t@@ -1092,7 +1068,6 @@ __global__ void findDarcyPressureForceLinear(
                checkFiniteFloat3("f_p", i_x, i_y, i_z, f_p);
        #endif
                // save force
       -        __syncthreads();
                dev_force[i]    += MAKE_FLOAT4(f_p.x, f_p.y, f_p.z, 0.0);
                dev_darcy_f_p[i] = MAKE_FLOAT4(f_p.x, f_p.y, f_p.z, 0.0);
            }
       t@@ -1115,7 +1090,6 @@ __global__ void findDarcyPressureForce(
            if (i < devC_np) {
        
                // read particle information
       -        __syncthreads();
                const Float4 x = dev_x[i];
        
                // determine fluid cell containing the particle
       t@@ -1133,7 +1107,6 @@ __global__ void findDarcyPressureForce(
                const Float dz = devC_grid.L[2]/devC_grid.num[2];
        
                // read fluid information
       -        __syncthreads();
                //const Float phi = dev_darcy_phi[cellidx];
                const Float p_xn = dev_darcy_p[d_idx(i_x-1,i_y,i_z)];
                const Float p    = dev_darcy_p[cellidx];
       t@@ -1182,7 +1155,6 @@ __global__ void findDarcyPressureForce(
                checkFiniteFloat3("f_p", i_x, i_y, i_z, f_p);
        #endif
                // save force
       -        __syncthreads();
                dev_force[i]    += MAKE_FLOAT4(f_p.x, f_p.y, f_p.z, 0.0);
                dev_darcy_f_p[i] = MAKE_FLOAT4(f_p.x, f_p.y, f_p.z, 0.0);
            }
       t@@ -1207,7 +1179,6 @@ __global__ void setDarcyTopPressure(
                const unsigned int cellidx = idx(x,y,z);
        
                // Write the new pressure the top boundary cells
       -        __syncthreads();
                dev_darcy_p[cellidx] = new_pressure;
            }
        }
       t@@ -1231,7 +1202,6 @@ __global__ void setDarcyTopWallPressure(
                const unsigned int cellidx = idx(x,y,z);
        
                // Write the new pressure the top boundary cells
       -        __syncthreads();
                dev_darcy_p[cellidx] = new_pressure;
            }
        }
       t@@ -1252,7 +1222,6 @@ __global__ void setDarcyTopWallFixedFlow(
                z == wall0_iz+1) {
        
                // Write the new pressure the top boundary cells
       -        __syncthreads();
                const Float new_pressure = dev_darcy_p[idx(x,y,z-2)];
                dev_darcy_p[idx(x,y,z)] = new_pressure;
            }
       t@@ -1280,7 +1249,6 @@ __global__ void findDarcyPermeabilities(
                // 1D thread index
                const unsigned int cellidx = d_idx(x,y,z);
        
       -        __syncthreads();
                Float phi = dev_darcy_phi[cellidx];
        
                // avoid division by zero
       t@@ -1300,7 +1268,6 @@ __global__ void findDarcyPermeabilities(
                //k = fmin(2.7e-9, k);
                k = fmin(2.7e-10, k);
        
       -        __syncthreads();
                dev_darcy_k[cellidx] = k;
        
        #ifdef CHECK_FLUID_FINITE
       t@@ -1335,7 +1302,6 @@ __global__ void findDarcyPermeabilityGradients(
                const unsigned int cellidx = d_idx(x,y,z);
        
                // read values
       -        __syncthreads();
                const Float k_xn = dev_darcy_k[d_idx(x-1,y,z)];
                const Float k_xp = dev_darcy_k[d_idx(x+1,y,z)];
                const Float k_yn = dev_darcy_k[d_idx(x,y-1,z)];
       t@@ -1350,7 +1316,6 @@ __global__ void findDarcyPermeabilityGradients(
                        (k_zp - k_zn)/(dz+dz));
        
                // write result
       -        __syncthreads();
                dev_darcy_grad_k[cellidx] = grad_k;
        
                /*printf("%d,%d,%d findK:\n"
       t@@ -1389,7 +1354,6 @@ __global__ void findDarcyPressureChange(
                const unsigned int cellidx = d_idx(x,y,z);
        
                // read values
       -        __syncthreads();
                const Float p_old = dev_darcy_p_old[cellidx];
                const Float p     = dev_darcy_p[cellidx];
        
       t@@ -1401,7 +1365,6 @@ __global__ void findDarcyPressureChange(
                    dpdt = 0.0;
        
                // write result
       -        __syncthreads();
                dev_darcy_dpdt[cellidx] = dpdt;
        
                /*printf("%d,%d,%d\n"
       t@@ -1461,7 +1424,6 @@ __global__ void firstDarcySolution(
                const unsigned int cellidx = d_idx(x,y,z);
        
                // read values
       -        __syncthreads();
                const Float  phi_xn = dev_darcy_phi[d_idx(x-1,y,z)];
                const Float  phi    = dev_darcy_phi[cellidx];
                const Float  phi_xp = dev_darcy_phi[d_idx(x+1,y,z)];
       t@@ -1585,7 +1547,6 @@ __global__ void firstDarcySolution(
        #endif
        
                // save explicit integrated pressure change
       -        __syncthreads();
                dev_darcy_dp_expl[cellidx] = dp_expl;
        
        #ifdef CHECK_FLUID_FINITE
       t@@ -1641,7 +1602,6 @@ __global__ void updateDarcySolution(
                const unsigned int cellidx = d_idx(x,y,z);
        
                // read values
       -        __syncthreads();
                const Float  phi_xn = dev_darcy_phi[d_idx(x-1,y,z)];
                const Float  phi    = dev_darcy_phi[cellidx];
                const Float  phi_xp = dev_darcy_phi[d_idx(x+1,y,z)];
       t@@ -1778,7 +1738,6 @@ __global__ void updateDarcySolution(
        #endif
        
                // save new pressure and the residual
       -        __syncthreads();
                dev_darcy_p_new[cellidx] = p_new;
                dev_darcy_norm[cellidx]  = res_norm;
        
       t@@ -1817,7 +1776,6 @@ __global__ void findNewPressure(
                const Float dp = dev_darcy_dp[cellidx];
        
                // save new pressure
       -        __syncthreads();
                dev_darcy_p[cellidx] += dp;
        
                /*printf("%d,%d,%d\tp = % f\tp_new = % f\tres_norm = % f\n",
       t@@ -1860,7 +1818,6 @@ __global__ void findDarcyVelocities(
        
                const unsigned int cellidx = d_idx(x,y,z);
        
       -        __syncthreads();
                const Float p_xn = dev_darcy_p[d_idx(x-1,y,z)];
                const Float p_xp = dev_darcy_p[d_idx(x+1,y,z)];
                const Float p_yn = dev_darcy_p[d_idx(x,y-1,z)];
       t@@ -1888,7 +1845,6 @@ __global__ void findDarcyVelocities(
                const Float3 v = (-k/mu * grad_p)/phi;
        
                // Save velocity
       -        __syncthreads();
                dev_darcy_v[cellidx] = v;
            }
        }