tdarcy.cuh: remove coalesced write synchronization for porosity calculation - sphere - GPU-based 3D discrete element method algorithm with optional fluid coupling
(HTM) git clone git://src.adamsgaard.dk/sphere
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) LICENSE
---
(DIR) commit e6637c4d721e810b55d400e45a2ad74a0d7cf239
(DIR) parent b2a1cf15f12e85f9c396cb65dd3fbf997c27b738
(HTM) Author: Anders Damsgaard <anders@adamsgaard.dk>
Date: Tue, 21 Feb 2023 11:42:45 +0100
darcy.cuh: remove coalesced write synchronization for porosity calculation
Diffstat:
M src/darcy.cuh | 55 +++++++++++++++----------------
1 file changed, 26 insertions(+), 29 deletions(-)
---
(DIR) diff --git a/src/darcy.cuh b/src/darcy.cuh
t@@ -321,7 +321,7 @@ __device__ Float3 abs(const Float3 v)
{
return MAKE_FLOAT3(abs(v.x), abs(v.y), abs(v.z));
}
-
+
// Returns a weighting factor based on particle position and fluid center
// position
t@@ -354,8 +354,8 @@ __device__ Float weightDist(
return 0.0;
}
-// Find the porosity in each cell on the base of a bilinear weighing scheme,
-// centered at the cell center.
+// Find the porosity in each cell on the base of a bilinear weighing scheme,
+// centered at the cell center.
__global__ void findDarcyPorositiesLinear(
const unsigned int* __restrict__ dev_cellStart, // in
const unsigned int* __restrict__ dev_cellEnd, // in
t@@ -408,7 +408,6 @@ __global__ void findDarcyPorositiesLinear(
Float vp_avg_denum = 0.0;
// Read old porosity
- __syncthreads();
Float phi_0 = dev_darcy_phi[d_idx(x,y,z)];
// The cell 3d index
t@@ -478,7 +477,7 @@ __global__ void findDarcyPorositiesLinear(
// Find center distance
dist = MAKE_FLOAT3(
- X.x - xr.x,
+ X.x - xr.x,
X.y - xr.y,
X.z - xr.z);
dist += distmod;
t@@ -496,7 +495,7 @@ __global__ void findDarcyPorositiesLinear(
// Eulerian update of positions
xr += v*devC_dt;
dist = MAKE_FLOAT3(
- X.x - xr.x,
+ X.x - xr.x,
X.y - xr.y,
X.z - xr.z) + distmod;
solid_volume_new +=
t@@ -528,7 +527,6 @@ __global__ void findDarcyPorositiesLinear(
}
// Save porosity and porosity change
- __syncthreads();
const unsigned int cellidx = d_idx(x,y,z);
dev_darcy_phi[cellidx] = phi*c_phi;
dev_darcy_dphi[cellidx] = dphi*c_phi;
t@@ -540,7 +538,6 @@ __global__ void findDarcyPorositiesLinear(
#endif
} else {
- __syncthreads();
const unsigned int cellidx = d_idx(x,y,z);
dev_darcy_phi[cellidx] = 0.9;
t@@ -550,8 +547,8 @@ __global__ void findDarcyPorositiesLinear(
}
-// Copy the porosity, porosity change, div_v_p and vp_avg values to the grid
-// edges from the grid interior at the frictionless Y boundaries (grid.periodic
+// Copy the porosity, porosity change, div_v_p and vp_avg values to the grid
+// edges from the grid interior at the frictionless Y boundaries (grid.periodic
// == 2).
__global__ void copyDarcyPorositiesToEdges(
Float* __restrict__ dev_darcy_phi, // in + out
t@@ -594,8 +591,8 @@ __global__ void copyDarcyPorositiesToEdges(
}
-// Copy the porosity, porosity change, div_v_p and vp_avg values to the grid
-// bottom from the grid interior at the frictionless Y boundaries (grid.periodic
+// Copy the porosity, porosity change, div_v_p and vp_avg values to the grid
+// bottom from the grid interior at the frictionless Y boundaries (grid.periodic
// == 2).
__global__ void copyDarcyPorositiesToBottom(
Float* __restrict__ dev_darcy_phi, // in + out
t@@ -629,7 +626,7 @@ __global__ void copyDarcyPorositiesToBottom(
// Find the porosity in each cell on the base of a sphere, centered at the cell
-// center.
+// center.
__global__ void findDarcyPorosities(
const unsigned int* __restrict__ dev_cellStart, // in
const unsigned int* __restrict__ dev_cellEnd, // in
t@@ -737,7 +734,7 @@ __global__ void findDarcyPorosities(
// Find center distance
dist = MAKE_FLOAT3(
- X.x - xr.x,
+ X.x - xr.x,
X.y - xr.y,
X.z - xr.z);
dist += distmod;
t@@ -759,10 +756,10 @@ __global__ void findDarcyPorosities(
//// Find projected new void volume
// Eulerian update of positions
xr += v*devC_dt;
-
+
// Find center distance
dist = MAKE_FLOAT3(
- X.x - xr.x,
+ X.x - xr.x,
X.y - xr.y,
X.z - xr.z);
dist += distmod;
t@@ -935,7 +932,7 @@ __global__ void findDarcyPressureGradient(
p_xn, p_xp,
p_yn, p_yp,
p_zn, p_zp,
- grad_p.x, grad_p.y, grad_p.z); // */
+ grad_p.x, grad_p.y, grad_p.z); // */
#ifdef CHECK_FLUID_FINITE
checkFiniteFloat3("grad_p", x, y, z, grad_p);
#endif
t@@ -1049,7 +1046,7 @@ __global__ void findDarcyPressureForceLinear(
i_y, d_iy,
i_z, d_iz,
n.x, n.y, n.z,
- grad_p_iter.x, grad_p_iter.y, grad_p_iter.z,
+ grad_p_iter.x, grad_p_iter.y, grad_p_iter.z,
s,
s*grad_p_iter.x,
s*grad_p_iter.y,
t@@ -1201,7 +1198,7 @@ __global__ void setDarcyTopPressure(
const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
-
+
// check that the thread is located at the top boundary or at the top wall
if (x < devC_grid.num[0] &&
y < devC_grid.num[1] &&
t@@ -1225,7 +1222,7 @@ __global__ void setDarcyTopWallPressure(
const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
-
+
// check that the thread is located at the top boundary
if (x < devC_grid.num[0] &&
y < devC_grid.num[1] &&
t@@ -1248,7 +1245,7 @@ __global__ void setDarcyTopWallFixedFlow(
const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
-
+
// check that the thread is located at the top boundary
if (x < devC_grid.num[0] &&
y < devC_grid.num[1] &&
t@@ -1511,7 +1508,7 @@ __global__ void firstDarcySolution(
(phi_yp - phi_yn)/(dy + dy),
(phi_zp - phi_zn)/(dz + dz));
- // Solve div(k*grad(p)) as a single term, using harmonic mean for
+ // Solve div(k*grad(p)) as a single term, using harmonic mean for
// permeability. k_HM*grad(p) is found between the pressure nodes.
const Float div_k_grad_p =
(2.*k_xp*k/(k_xp + k) *
t@@ -1537,8 +1534,8 @@ __global__ void firstDarcySolution(
-(ndem*devC_dt/(beta_f*phi*(1.0 - phi)))
*(dphi/(ndem*devC_dt) + dot(vp_avg, grad_phi_central));
- // Dirichlet BC at fixed-pressure boundaries and at the dynamic top
- // wall. wall0_iz will be larger than the grid if the wall isn't
+ // Dirichlet BC at fixed-pressure boundaries and at the dynamic top
+ // wall. wall0_iz will be larger than the grid if the wall isn't
// dynamic
if ((bc_bot == 0 && z == 0) || (bc_top == 0 && z == nz-1)
|| (z >= wall0_iz && bc_top == 0)
t@@ -1548,13 +1545,13 @@ __global__ void firstDarcySolution(
dp_expl = 0.0;
#ifdef REPORT_FORCING_TERMS
- const Float dp_diff =
+ const Float dp_diff =
ndem*devC_dt/(beta_f*phi*mu)
*div_k_grad_p;
const Float dp_forc =
-(ndem*devC_dt/(beta_f*phi*(1.0 - phi)))
*(dphi/(ndem*devC_dt) + dot(vp_avg, grad_phi));
-
+
printf("\n%d,%d,%d firstDarcySolution\n"
"p = %e\n"
"p_x = %e, %e\n"
t@@ -1694,7 +1691,7 @@ __global__ void updateDarcySolution(
(phi_yp - phi_yn)/(dy + dy),
(phi_zp - phi_zn)/(dz + dz));
- // Solve div(k*grad(p)) as a single term, using harmonic mean for
+ // Solve div(k*grad(p)) as a single term, using harmonic mean for
// permeability. k_HM*grad(p) is found between the pressure nodes.
const Float div_k_grad_p =
(2.*k_xp*k/(k_xp + k) *
t@@ -1721,8 +1718,8 @@ __global__ void updateDarcySolution(
-(ndem*devC_dt/(beta_f*phi*(1.0 - phi)))
*(dphi/(ndem*devC_dt) + dot(vp_avg, grad_phi_central));
- // Dirichlet BC at fixed-pressure boundaries and at the dynamic top
- // wall. wall0_iz will be larger than the grid if the wall isn't
+ // Dirichlet BC at fixed-pressure boundaries and at the dynamic top
+ // wall. wall0_iz will be larger than the grid if the wall isn't
// dynamic
if ((bc_bot == 0 && z == 0) || (bc_top == 0 && z == nz-1)
|| (z >= wall0_iz && bc_top == 0)