tdarcy.cuh: remove coalesced write synchronization for porosity calculation - sphere - GPU-based 3D discrete element method algorithm with optional fluid coupling
 (HTM) git clone git://src.adamsgaard.dk/sphere
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) LICENSE
       ---
 (DIR) commit e6637c4d721e810b55d400e45a2ad74a0d7cf239
 (DIR) parent b2a1cf15f12e85f9c396cb65dd3fbf997c27b738
 (HTM) Author: Anders Damsgaard <anders@adamsgaard.dk>
       Date:   Tue, 21 Feb 2023 11:42:45 +0100
       
       darcy.cuh: remove coalesced write synchronization for porosity calculation
       
       Diffstat:
         M src/darcy.cuh                       |      55 +++++++++++++++----------------
       
       1 file changed, 26 insertions(+), 29 deletions(-)
       ---
 (DIR) diff --git a/src/darcy.cuh b/src/darcy.cuh
       t@@ -321,7 +321,7 @@ __device__ Float3 abs(const Float3 v)
        {
            return MAKE_FLOAT3(abs(v.x), abs(v.y), abs(v.z));
        }
       -            
       +
        
        // Returns a weighting factor based on particle position and fluid center
        // position
       t@@ -354,8 +354,8 @@ __device__ Float weightDist(
                return 0.0;
        }
        
       -// Find the porosity in each cell on the base of a bilinear weighing scheme, 
       -// centered at the cell center. 
       +// Find the porosity in each cell on the base of a bilinear weighing scheme,
       +// centered at the cell center.
        __global__ void findDarcyPorositiesLinear(
                const unsigned int* __restrict__ dev_cellStart,   // in
                const unsigned int* __restrict__ dev_cellEnd,     // in
       t@@ -408,7 +408,6 @@ __global__ void findDarcyPorositiesLinear(
                    Float  vp_avg_denum = 0.0;
        
                    // Read old porosity
       -            __syncthreads();
                    Float phi_0 = dev_darcy_phi[d_idx(x,y,z)];
        
                    // The cell 3d index
       t@@ -478,7 +477,7 @@ __global__ void findDarcyPorositiesLinear(
        
                                            // Find center distance
                                            dist = MAKE_FLOAT3(
       -                                            X.x - xr.x, 
       +                                            X.x - xr.x,
                                                    X.y - xr.y,
                                                    X.z - xr.z);
                                            dist += distmod;
       t@@ -496,7 +495,7 @@ __global__ void findDarcyPorositiesLinear(
                                            // Eulerian update of positions
                                            xr += v*devC_dt;
                                            dist = MAKE_FLOAT3(
       -                                            X.x - xr.x, 
       +                                            X.x - xr.x,
                                                    X.y - xr.y,
                                                    X.z - xr.z) + distmod;
                                            solid_volume_new +=
       t@@ -528,7 +527,6 @@ __global__ void findDarcyPorositiesLinear(
                    }
        
                    // Save porosity and porosity change
       -            __syncthreads();
                    const unsigned int cellidx = d_idx(x,y,z);
                    dev_darcy_phi[cellidx]     = phi*c_phi;
                    dev_darcy_dphi[cellidx]    = dphi*c_phi;
       t@@ -540,7 +538,6 @@ __global__ void findDarcyPorositiesLinear(
        #endif
                } else {
        
       -            __syncthreads();
                    const unsigned int cellidx = d_idx(x,y,z);
        
                    dev_darcy_phi[cellidx]  = 0.9;
       t@@ -550,8 +547,8 @@ __global__ void findDarcyPorositiesLinear(
        }
        
        
       -// Copy the porosity, porosity change, div_v_p and vp_avg values to the grid 
       -// edges from the grid interior at the frictionless Y boundaries (grid.periodic 
       +// Copy the porosity, porosity change, div_v_p and vp_avg values to the grid
       +// edges from the grid interior at the frictionless Y boundaries (grid.periodic
        // == 2).
        __global__ void copyDarcyPorositiesToEdges(
                Float*  __restrict__ dev_darcy_phi,               // in + out
       t@@ -594,8 +591,8 @@ __global__ void copyDarcyPorositiesToEdges(
        }
        
        
       -// Copy the porosity, porosity change, div_v_p and vp_avg values to the grid 
       -// bottom from the grid interior at the frictionless Y boundaries (grid.periodic 
       +// Copy the porosity, porosity change, div_v_p and vp_avg values to the grid
       +// bottom from the grid interior at the frictionless Y boundaries (grid.periodic
        // == 2).
        __global__ void copyDarcyPorositiesToBottom(
                Float*  __restrict__ dev_darcy_phi,               // in + out
       t@@ -629,7 +626,7 @@ __global__ void copyDarcyPorositiesToBottom(
        
        
        // Find the porosity in each cell on the base of a sphere, centered at the cell
       -// center. 
       +// center.
        __global__ void findDarcyPorosities(
                const unsigned int* __restrict__ dev_cellStart,   // in
                const unsigned int* __restrict__ dev_cellEnd,     // in
       t@@ -737,7 +734,7 @@ __global__ void findDarcyPorosities(
        
                                            // Find center distance
                                            dist = MAKE_FLOAT3(
       -                                            X.x - xr.x, 
       +                                            X.x - xr.x,
                                                    X.y - xr.y,
                                                    X.z - xr.z);
                                            dist += distmod;
       t@@ -759,10 +756,10 @@ __global__ void findDarcyPorosities(
                                            //// Find projected new void volume
                                            // Eulerian update of positions
                                            xr += v*devC_dt;
       -                                    
       +
                                            // Find center distance
                                            dist = MAKE_FLOAT3(
       -                                            X.x - xr.x, 
       +                                            X.x - xr.x,
                                                    X.y - xr.y,
                                                    X.z - xr.z);
                                            dist += distmod;
       t@@ -935,7 +932,7 @@ __global__ void findDarcyPressureGradient(
                        p_xn, p_xp,
                        p_yn, p_yp,
                        p_zn, p_zp,
       -                grad_p.x, grad_p.y, grad_p.z); // */ 
       +                grad_p.x, grad_p.y, grad_p.z); // */
        #ifdef CHECK_FLUID_FINITE
                checkFiniteFloat3("grad_p", x, y, z, grad_p);
        #endif
       t@@ -1049,7 +1046,7 @@ __global__ void findDarcyPressureForceLinear(
                                    i_y, d_iy,
                                    i_z, d_iz,
                                    n.x, n.y, n.z,
       -                            grad_p_iter.x, grad_p_iter.y, grad_p_iter.z, 
       +                            grad_p_iter.x, grad_p_iter.y, grad_p_iter.z,
                                    s,
                                    s*grad_p_iter.x,
                                    s*grad_p_iter.y,
       t@@ -1201,7 +1198,7 @@ __global__ void setDarcyTopPressure(
            const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
            const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
            const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
       -    
       +
            // check that the thread is located at the top boundary or at the top wall
            if (x < devC_grid.num[0] &&
                y < devC_grid.num[1] &&
       t@@ -1225,7 +1222,7 @@ __global__ void setDarcyTopWallPressure(
            const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
            const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
            const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
       -    
       +
            // check that the thread is located at the top boundary
            if (x < devC_grid.num[0] &&
                y < devC_grid.num[1] &&
       t@@ -1248,7 +1245,7 @@ __global__ void setDarcyTopWallFixedFlow(
            const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
            const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
            const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
       -    
       +
            // check that the thread is located at the top boundary
            if (x < devC_grid.num[0] &&
                y < devC_grid.num[1] &&
       t@@ -1511,7 +1508,7 @@ __global__ void firstDarcySolution(
                        (phi_yp - phi_yn)/(dy + dy),
                        (phi_zp - phi_zn)/(dz + dz));
        
       -        // Solve div(k*grad(p)) as a single term, using harmonic mean for 
       +        // Solve div(k*grad(p)) as a single term, using harmonic mean for
                // permeability. k_HM*grad(p) is found between the pressure nodes.
                const Float div_k_grad_p =
                        (2.*k_xp*k/(k_xp + k) *
       t@@ -1537,8 +1534,8 @@ __global__ void firstDarcySolution(
                    -(ndem*devC_dt/(beta_f*phi*(1.0 - phi)))
                    *(dphi/(ndem*devC_dt) + dot(vp_avg, grad_phi_central));
        
       -        // Dirichlet BC at fixed-pressure boundaries and at the dynamic top 
       -        // wall.  wall0_iz will be larger than the grid if the wall isn't 
       +        // Dirichlet BC at fixed-pressure boundaries and at the dynamic top
       +        // wall.  wall0_iz will be larger than the grid if the wall isn't
                // dynamic
                if ((bc_bot == 0 && z == 0) || (bc_top == 0 && z == nz-1)
                        || (z >= wall0_iz && bc_top == 0)
       t@@ -1548,13 +1545,13 @@ __global__ void firstDarcySolution(
                    dp_expl = 0.0;
        
        #ifdef REPORT_FORCING_TERMS
       -            const Float dp_diff = 
       +            const Float dp_diff =
                        ndem*devC_dt/(beta_f*phi*mu)
                        *div_k_grad_p;
                    const Float dp_forc =
                        -(ndem*devC_dt/(beta_f*phi*(1.0 - phi)))
                        *(dphi/(ndem*devC_dt) + dot(vp_avg, grad_phi));
       -                
       +
                printf("\n%d,%d,%d firstDarcySolution\n"
                        "p           = %e\n"
                        "p_x         = %e, %e\n"
       t@@ -1694,7 +1691,7 @@ __global__ void updateDarcySolution(
                        (phi_yp - phi_yn)/(dy + dy),
                        (phi_zp - phi_zn)/(dz + dz));
        
       -        // Solve div(k*grad(p)) as a single term, using harmonic mean for 
       +        // Solve div(k*grad(p)) as a single term, using harmonic mean for
                // permeability. k_HM*grad(p) is found between the pressure nodes.
                const Float div_k_grad_p =
                        (2.*k_xp*k/(k_xp + k) *
       t@@ -1721,8 +1718,8 @@ __global__ void updateDarcySolution(
                    -(ndem*devC_dt/(beta_f*phi*(1.0 - phi)))
                    *(dphi/(ndem*devC_dt) + dot(vp_avg, grad_phi_central));
        
       -        // Dirichlet BC at fixed-pressure boundaries and at the dynamic top 
       -        // wall.  wall0_iz will be larger than the grid if the wall isn't 
       +        // Dirichlet BC at fixed-pressure boundaries and at the dynamic top
       +        // wall.  wall0_iz will be larger than the grid if the wall isn't
                // dynamic
                if ((bc_bot == 0 && z == 0) || (bc_top == 0 && z == nz-1)
                        || (z >= wall0_iz && bc_top == 0)