tdarcy.cuh: remove synchronization calls for coalesced darcy calls - sphere - GPU-based 3D discrete element method algorithm with optional fluid coupling
(HTM) git clone git://src.adamsgaard.dk/sphere
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) LICENSE
---
(DIR) commit 7da84647e15a6ce230d9c2f0fcbbe1111fea6bf8
(DIR) parent e6637c4d721e810b55d400e45a2ad74a0d7cf239
(HTM) Author: Anders Damsgaard <anders@adamsgaard.dk>
Date: Tue, 21 Feb 2023 15:01:35 +0100
darcy.cuh: remove synchronization calls for coalesced darcy calls
Diffstat:
M src/darcy.cuh | 48 ++-----------------------------
1 file changed, 2 insertions(+), 46 deletions(-)
---
(DIR) diff --git a/src/darcy.cuh b/src/darcy.cuh
t@@ -164,10 +164,8 @@ __global__ void setDarcyNormZero(
const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
// check that we are not outside the fluid grid
- if (x < devC_grid.num[0] && y < devC_grid.num[1] && z < devC_grid.num[2]) {
- __syncthreads();
+ if (x < devC_grid.num[0] && y < devC_grid.num[1] && z < devC_grid.num[2])
dev_darcy_norm[d_idx(x,y,z)] = 0.0;
- }
}
// Set an array of scalars to 0.0 inside devC_grid
t@@ -180,10 +178,8 @@ __global__ void setDarcyZeros(T* __restrict__ dev_scalarfield)
const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
// check that we are not outside the fluid grid
- if (x < devC_grid.num[0] && y < devC_grid.num[1] && z < devC_grid.num[2]) {
- __syncthreads();
+ if (x < devC_grid.num[0] && y < devC_grid.num[1] && z < devC_grid.num[2])
dev_scalarfield[d_idx(x,y,z)] = 0.0;
- }
}
t@@ -455,21 +451,18 @@ __global__ void findDarcyPorositiesLinear(
* targetCell.z;
// Lowest particle index in cell
- __syncthreads();
startIdx = dev_cellStart[cellID];
// Make sure cell is not empty
if (startIdx != 0xffffffff) {
// Highest particle index in cell
- __syncthreads();
endIdx = dev_cellEnd[cellID];
// Iterate over cell particles
for (i=startIdx; i<endIdx; ++i) {
// Read particle position and radius
- __syncthreads();
xr = dev_x_sorted[i];
v = dev_vel_sorted[i];
//x3 = MAKE_FLOAT3(xr.x, xr.y, xr.z);
t@@ -582,7 +575,6 @@ __global__ void copyDarcyPorositiesToEdges(
const unsigned int readidx = d_idx(x, y_read, z);
const unsigned int writeidx = d_idx(x, y, z);
- __syncthreads();
dev_darcy_phi[writeidx] = dev_darcy_phi[readidx];
dev_darcy_dphi[writeidx] = dev_darcy_dphi[readidx];
dev_darcy_div_v_p[writeidx] = dev_darcy_div_v_p[readidx];
t@@ -616,7 +608,6 @@ __global__ void copyDarcyPorositiesToBottom(
const unsigned int readidx = d_idx(x, y, 1);
const unsigned int writeidx = d_idx(x, y, z);
- __syncthreads();
dev_darcy_phi[writeidx] = dev_darcy_phi[readidx];
dev_darcy_dphi[writeidx] = dev_darcy_dphi[readidx];
dev_darcy_div_v_p[writeidx] = dev_darcy_div_v_p[readidx];
t@@ -679,7 +670,6 @@ __global__ void findDarcyPorosities(
//unsigned int n = 0;
// Read old porosity
- __syncthreads();
Float phi_0 = dev_darcy_phi[d_idx(x,y,z)];
// The cell 3d index
t@@ -713,21 +703,18 @@ __global__ void findDarcyPorosities(
* targetCell.z;
// Lowest particle index in cell
- __syncthreads();
startIdx = dev_cellStart[cellID];
// Make sure cell is not empty
if (startIdx != 0xffffffff) {
// Highest particle index in cell
- __syncthreads();
endIdx = dev_cellEnd[cellID];
// Iterate over cell particles
for (i=startIdx; i<endIdx; ++i) {
// Read particle position and radius
- __syncthreads();
xr = dev_x_sorted[i];
v = dev_vel_sorted[i];
r = xr.w;
t@@ -796,7 +783,6 @@ __global__ void findDarcyPorosities(
dphi = 0.5*(phi_new - phi_0);
// Save porosity and porosity change
- __syncthreads();
//phi = 0.5; dphi = 0.0; // disable porosity effects
const unsigned int cellidx = d_idx(x,y,z);
dev_darcy_phi[cellidx] = phi*c_phi;
t@@ -823,10 +809,7 @@ __global__ void findDarcyPorosities(
(void)checkFiniteFloat("dphi", x, y, z, dphi);
#endif
} else {
-
- __syncthreads();
const unsigned int cellidx = d_idx(x,y,z);
-
dev_darcy_phi[cellidx] = 0.999;
dev_darcy_dphi[cellidx] = 0.0;
}
t@@ -854,7 +837,6 @@ __global__ void findDarcyParticleVelocityDivergence(
if (x < nx && y < ny && z < nz) {
// read values
- __syncthreads();
Float v_p_xn = dev_darcy_v_p_x[d_vidx(x,y,z)];
Float v_p_xp = dev_darcy_v_p_x[d_vidx(x+1,y,z)];
Float v_p_yn = dev_darcy_v_p_y[d_vidx(x,y,z)];
t@@ -873,7 +855,6 @@ __global__ void findDarcyParticleVelocityDivergence(
(v_p_yp - v_p_yn)/dy +
(v_p_zp - v_p_zn)/dz;
- __syncthreads();
dev_darcy_div_v_p[d_idx(x,y,z)] = div_v_p;
#ifdef CHECK_FLUID_FINITE
t@@ -900,7 +881,6 @@ __global__ void findDarcyPressureGradient(
if (x < nx && y < ny && z < nz) {
// read values
- __syncthreads();
Float p_xn = dev_darcy_p[d_idx(x-1,y,z)];
Float p_xp = dev_darcy_p[d_idx(x+1,y,z)];
Float p_yn = dev_darcy_p[d_idx(x,y-1,z)];
t@@ -919,7 +899,6 @@ __global__ void findDarcyPressureGradient(
(p_yp - p_yn)/(dy + dy),
(p_zp - p_zn)/(dz + dz));
- __syncthreads();
dev_darcy_grad_p[d_idx(x,y,z)] = grad_p;
//if (grad_p.x != 0.0 || grad_p.y != 0.0 || grad_p.z != 0.0)
t@@ -957,7 +936,6 @@ __global__ void findDarcyPressureForceLinear(
if (i < devC_np) {
// read particle information
- __syncthreads();
const Float4 x = dev_x[i];
const Float3 x3 = MAKE_FLOAT3(x.x, x.y, x.z);
t@@ -979,7 +957,6 @@ __global__ void findDarcyPressureForceLinear(
const Float dz = devC_grid.L[2]/nz;
// read fluid information
- __syncthreads();
//const Float phi = dev_darcy_phi[cellidx];
// Cell center position
t@@ -1002,7 +979,6 @@ __global__ void findDarcyPressureForceLinear(
iy_n = i_y + d_iy;
iz_n = i_z + d_iz;
- __syncthreads();
grad_p_iter = dev_darcy_grad_p[d_idx(ix_n, iy_n, iz_n)];
// make sure edge and corner ghost nodes aren't read
t@@ -1092,7 +1068,6 @@ __global__ void findDarcyPressureForceLinear(
checkFiniteFloat3("f_p", i_x, i_y, i_z, f_p);
#endif
// save force
- __syncthreads();
dev_force[i] += MAKE_FLOAT4(f_p.x, f_p.y, f_p.z, 0.0);
dev_darcy_f_p[i] = MAKE_FLOAT4(f_p.x, f_p.y, f_p.z, 0.0);
}
t@@ -1115,7 +1090,6 @@ __global__ void findDarcyPressureForce(
if (i < devC_np) {
// read particle information
- __syncthreads();
const Float4 x = dev_x[i];
// determine fluid cell containing the particle
t@@ -1133,7 +1107,6 @@ __global__ void findDarcyPressureForce(
const Float dz = devC_grid.L[2]/devC_grid.num[2];
// read fluid information
- __syncthreads();
//const Float phi = dev_darcy_phi[cellidx];
const Float p_xn = dev_darcy_p[d_idx(i_x-1,i_y,i_z)];
const Float p = dev_darcy_p[cellidx];
t@@ -1182,7 +1155,6 @@ __global__ void findDarcyPressureForce(
checkFiniteFloat3("f_p", i_x, i_y, i_z, f_p);
#endif
// save force
- __syncthreads();
dev_force[i] += MAKE_FLOAT4(f_p.x, f_p.y, f_p.z, 0.0);
dev_darcy_f_p[i] = MAKE_FLOAT4(f_p.x, f_p.y, f_p.z, 0.0);
}
t@@ -1207,7 +1179,6 @@ __global__ void setDarcyTopPressure(
const unsigned int cellidx = idx(x,y,z);
// Write the new pressure the top boundary cells
- __syncthreads();
dev_darcy_p[cellidx] = new_pressure;
}
}
t@@ -1231,7 +1202,6 @@ __global__ void setDarcyTopWallPressure(
const unsigned int cellidx = idx(x,y,z);
// Write the new pressure the top boundary cells
- __syncthreads();
dev_darcy_p[cellidx] = new_pressure;
}
}
t@@ -1252,7 +1222,6 @@ __global__ void setDarcyTopWallFixedFlow(
z == wall0_iz+1) {
// Write the new pressure the top boundary cells
- __syncthreads();
const Float new_pressure = dev_darcy_p[idx(x,y,z-2)];
dev_darcy_p[idx(x,y,z)] = new_pressure;
}
t@@ -1280,7 +1249,6 @@ __global__ void findDarcyPermeabilities(
// 1D thread index
const unsigned int cellidx = d_idx(x,y,z);
- __syncthreads();
Float phi = dev_darcy_phi[cellidx];
// avoid division by zero
t@@ -1300,7 +1268,6 @@ __global__ void findDarcyPermeabilities(
//k = fmin(2.7e-9, k);
k = fmin(2.7e-10, k);
- __syncthreads();
dev_darcy_k[cellidx] = k;
#ifdef CHECK_FLUID_FINITE
t@@ -1335,7 +1302,6 @@ __global__ void findDarcyPermeabilityGradients(
const unsigned int cellidx = d_idx(x,y,z);
// read values
- __syncthreads();
const Float k_xn = dev_darcy_k[d_idx(x-1,y,z)];
const Float k_xp = dev_darcy_k[d_idx(x+1,y,z)];
const Float k_yn = dev_darcy_k[d_idx(x,y-1,z)];
t@@ -1350,7 +1316,6 @@ __global__ void findDarcyPermeabilityGradients(
(k_zp - k_zn)/(dz+dz));
// write result
- __syncthreads();
dev_darcy_grad_k[cellidx] = grad_k;
/*printf("%d,%d,%d findK:\n"
t@@ -1389,7 +1354,6 @@ __global__ void findDarcyPressureChange(
const unsigned int cellidx = d_idx(x,y,z);
// read values
- __syncthreads();
const Float p_old = dev_darcy_p_old[cellidx];
const Float p = dev_darcy_p[cellidx];
t@@ -1401,7 +1365,6 @@ __global__ void findDarcyPressureChange(
dpdt = 0.0;
// write result
- __syncthreads();
dev_darcy_dpdt[cellidx] = dpdt;
/*printf("%d,%d,%d\n"
t@@ -1461,7 +1424,6 @@ __global__ void firstDarcySolution(
const unsigned int cellidx = d_idx(x,y,z);
// read values
- __syncthreads();
const Float phi_xn = dev_darcy_phi[d_idx(x-1,y,z)];
const Float phi = dev_darcy_phi[cellidx];
const Float phi_xp = dev_darcy_phi[d_idx(x+1,y,z)];
t@@ -1585,7 +1547,6 @@ __global__ void firstDarcySolution(
#endif
// save explicit integrated pressure change
- __syncthreads();
dev_darcy_dp_expl[cellidx] = dp_expl;
#ifdef CHECK_FLUID_FINITE
t@@ -1641,7 +1602,6 @@ __global__ void updateDarcySolution(
const unsigned int cellidx = d_idx(x,y,z);
// read values
- __syncthreads();
const Float phi_xn = dev_darcy_phi[d_idx(x-1,y,z)];
const Float phi = dev_darcy_phi[cellidx];
const Float phi_xp = dev_darcy_phi[d_idx(x+1,y,z)];
t@@ -1778,7 +1738,6 @@ __global__ void updateDarcySolution(
#endif
// save new pressure and the residual
- __syncthreads();
dev_darcy_p_new[cellidx] = p_new;
dev_darcy_norm[cellidx] = res_norm;
t@@ -1817,7 +1776,6 @@ __global__ void findNewPressure(
const Float dp = dev_darcy_dp[cellidx];
// save new pressure
- __syncthreads();
dev_darcy_p[cellidx] += dp;
/*printf("%d,%d,%d\tp = % f\tp_new = % f\tres_norm = % f\n",
t@@ -1860,7 +1818,6 @@ __global__ void findDarcyVelocities(
const unsigned int cellidx = d_idx(x,y,z);
- __syncthreads();
const Float p_xn = dev_darcy_p[d_idx(x-1,y,z)];
const Float p_xp = dev_darcy_p[d_idx(x+1,y,z)];
const Float p_yn = dev_darcy_p[d_idx(x,y-1,z)];
t@@ -1888,7 +1845,6 @@ __global__ void findDarcyVelocities(
const Float3 v = (-k/mu * grad_p)/phi;
// Save velocity
- __syncthreads();
dev_darcy_v[cellidx] = v;
}
}