tadjustable integration method, implicit default - sphere - GPU-based 3D discrete element method algorithm with optional fluid coupling
(HTM) git clone git://src.adamsgaard.dk/sphere
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) LICENSE
---
(DIR) commit c880b88b82551b826e8514baf6ed3718ea58fe5c
(DIR) parent 77eba564f47fe4ae136be994f22c3e51e0cc4aa4
(HTM) Author: Anders Damsgaard <anders.damsgaard@geo.au.dk>
Date: Wed, 12 Nov 2014 10:04:39 +0100
adjustable integration method, implicit default
Diffstat:
M src/darcy.cuh | 193 +++++++++++++++++++++++++++++--
M src/device.cu | 25 +++++++++++++++++++++++++
M src/sphere.h | 2 +-
3 files changed, 207 insertions(+), 13 deletions(-)
---
(DIR) diff --git a/src/darcy.cuh b/src/darcy.cuh
t@@ -22,7 +22,8 @@ void DEM::initDarcyMemDev(void)
// size of cell-face arrays in staggered grid discretization
//unsigned int memSizeFface = sizeof(Float)*darcyCellsVelocity();
- cudaMalloc((void**)&dev_darcy_dpdt, memSizeF); // Backwards Euler gradient
+ //cudaMalloc((void**)&dev_darcy_dpdt, memSizeF); // Backwards Euler gradient
+ cudaMalloc((void**)&dev_darcy_dp_expl, memSizeF); // Expl. pressure change
cudaMalloc((void**)&dev_darcy_p_old, memSizeF); // old pressure
cudaMalloc((void**)&dev_darcy_p, memSizeF); // hydraulic pressure
cudaMalloc((void**)&dev_darcy_p_new, memSizeF); // updated pressure
t@@ -43,7 +44,8 @@ void DEM::initDarcyMemDev(void)
// Free memory
void DEM::freeDarcyMemDev()
{
- cudaFree(dev_darcy_dpdt);
+ //cudaFree(dev_darcy_dpdt);
+ cudaFree(dev_darcy_dp_expl);
cudaFree(dev_darcy_p_old);
cudaFree(dev_darcy_p);
cudaFree(dev_darcy_p_new);
t@@ -782,12 +784,156 @@ __global__ void findDarcyPressureChange(
}
}
+__global__ void firstDarcySolution(
+ const Float* __restrict__ dev_darcy_p, // in
+ const Float* __restrict__ dev_darcy_k, // in
+ const Float* __restrict__ dev_darcy_phi, // in
+ const Float* __restrict__ dev_darcy_dphi, // in
+ const Float3* __restrict__ dev_darcy_grad_k, // in
+ const Float beta_f, // in
+ const Float mu, // in
+ const int bc_bot, // in
+ const int bc_top, // in
+ const unsigned int ndem, // in
+ const unsigned int wall0_iz, // in
+ Float* __restrict__ dev_darcy_dp_expl) // out
+{
+ // 3D thread index
+ const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
+ const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
+ const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
+
+ // Grid dimensions
+ const unsigned int nx = devC_grid.num[0];
+ const unsigned int ny = devC_grid.num[1];
+ const unsigned int nz = devC_grid.num[2];
+
+ // Cell size
+ const Float dx = devC_grid.L[0]/nx;
+ const Float dy = devC_grid.L[1]/ny;
+ const Float dz = devC_grid.L[2]/nz;
+
+ // Perform the epsilon updates for all non-ghost nodes except the Dirichlet
+ // boundaries at z=0 and z=nz-1.
+ // Adjust z range if a boundary has the Dirichlet boundary condition.
+ /*int z_min = 0;
+ int z_max = nz-1;
+ if (bc_bot == 0)
+ z_min = 1;
+ if (bc_top == 0)
+ z_max = nz-2;*/
+
+ //if (x < nx && y < ny && z >= z_min && z <= z_max) {
+ if (x < nx && y < ny && z < nz) {
+
+ // 1D thread index
+ const unsigned int cellidx = d_idx(x,y,z);
+
+ // read values
+ __syncthreads();
+ const Float k = dev_darcy_k[cellidx];
+ const Float3 grad_k = dev_darcy_grad_k[cellidx];
+ const Float phi = dev_darcy_phi[cellidx];
+ const Float dphi = dev_darcy_dphi[cellidx];
+
+ const Float p_xn = dev_darcy_p[d_idx(x-1,y,z)];
+ const Float p = dev_darcy_p[cellidx];
+ const Float p_xp = dev_darcy_p[d_idx(x+1,y,z)];
+ const Float p_yn = dev_darcy_p[d_idx(x,y-1,z)];
+ const Float p_yp = dev_darcy_p[d_idx(x,y+1,z)];
+ Float p_zn = dev_darcy_p[d_idx(x,y,z-1)];
+ Float p_zp = dev_darcy_p[d_idx(x,y,z+1)];
+
+ // Neumann BCs
+ if (z == 0 && bc_bot == 1)
+ p_zn = p;
+ if (z == nz-1 && bc_top == 1)
+ p_zp = p;
+
+ // upwind coefficients for grad(p) determined from values of k
+ // k = 1.0: backwards difference
+ // k = -1.0: forwards difference
+ /*const Float3 e_k = MAKE_FLOAT3(
+ copysign(1.0, grad_k.x),
+ copysign(1.0, grad_k.y),
+ copysign(1.0, grad_k.z));
+
+ // gradient approximated by first-order forward differences
+ const Float3 grad_p = MAKE_FLOAT3(
+ ((1.0 + e_k.x)*(p - p_xn) + (1.0 - e_k.x)*(p_xp - p))/(dx + dx),
+ ((1.0 + e_k.y)*(p - p_yn) + (1.0 - e_k.y)*(p_yp - p))/(dy + dy),
+ ((1.0 + e_k.z)*(p - p_zn) + (1.0 - e_k.z)*(p_zp - p))/(dz + dz)
+ );*/
+
+ // gradient approximated by first-order central differences
+ const Float3 grad_p = MAKE_FLOAT3(
+ (p_xp - p_xn)/(dx + dx),
+ (p_yp - p_yn)/(dy + dy),
+ (p_zp - p_zn)/(dz + dz));
+
+ // laplacian approximated by second-order central differences
+ const Float laplace_p =
+ (p_xp - (p + p) + p_xn)/(dx*dx) +
+ (p_yp - (p + p) + p_yn)/(dy*dy) +
+ (p_zp - (p + p) + p_zn)/(dz*dz);
+
+ Float dp_expl =
+ + (ndem*devC_dt)/(beta_f*phi*mu)*(k*laplace_p + dot(grad_k, grad_p))
+ - dphi/(beta_f*phi*(1.0 - phi));
+
+ // Dirichlet BC at dynamic top wall. wall0_iz will be larger than the
+ // grid if the wall isn't dynamic
+ if ((bc_bot == 0 && z == 0) || (bc_top == 0 && z == nz-1)
+ || (z >= wall0_iz))
+ dp_expl = 0.0;
+
+#ifdef REPORT_FORCING_TERMS
+ const Float dp_diff = (ndem*devC_dt)/(beta_f*phi*mu)
+ *(k*laplace_p + dot(grad_k, grad_p));
+ const Float dp_forc = -dphi/(beta_f*phi*(1.0 - phi));
+ printf("\n%d,%d,%d updateDarcySolution\n"
+ "p = %e\n"
+ "p_x = %e, %e\n"
+ "p_y = %e, %e\n"
+ "p_z = %e, %e\n"
+ "dp_expl = %e\n"
+ "laplace_p = %e\n"
+ "grad_p = %e, %e, %e\n"
+ "grad_k = %e, %e, %e\n"
+ "dp_diff = %e\n"
+ "dp_forc = %e\n"
+ "dphi = %e\n"
+ "dphi/dt = %e\n"
+ ,
+ x,y,z,
+ p,
+ p_xn, p_xp,
+ p_yn, p_yp,
+ p_zn, p_zp,
+ dp_expl,
+ laplace_p,
+ grad_p.x, grad_p.y, grad_p.z,
+ grad_k.x, grad_k.y, grad_k.z,
+ dp_diff, dp_forc,
+ dphi, dphi/(ndem*devC_dt));
+#endif
+
+ // save explicit integrated pressure change
+ __syncthreads();
+ dev_darcy_dp_expl[cellidx] = dp_expl;
+
+#ifdef CHECK_FLUID_FINITE
+ checkFiniteFloat("dp_expl", x, y, z, dp_expl);
+#endif
+ }
+}
// A single jacobi iteration where the pressure values are updated to
// dev_darcy_p_new.
// bc = 0: Dirichlet, 1: Neumann
__global__ void updateDarcySolution(
const Float* __restrict__ dev_darcy_p_old, // in
//const Float* __restrict__ dev_darcy_dpdt, // in
+ const Float* __restrict__ dev_darcy_dp_expl, // in
const Float* __restrict__ dev_darcy_p, // in
const Float* __restrict__ dev_darcy_k, // in
const Float* __restrict__ dev_darcy_phi, // in
t@@ -835,14 +981,13 @@ __global__ void updateDarcySolution(
// read values
__syncthreads();
- const Float k = dev_darcy_k[cellidx];
+ const Float k = dev_darcy_k[cellidx];
const Float3 grad_k = dev_darcy_grad_k[cellidx];
const Float phi = dev_darcy_phi[cellidx];
const Float dphi = dev_darcy_dphi[cellidx];
- //const Float dpdt = dev_darcy_dpdt[cellidx];
-
- const Float p_old = dev_darcy_p_old[cellidx];
+ const Float p_old = dev_darcy_p_old[cellidx];
+ const Float dp_expl = dev_darcy_dp_expl[cellidx];
const Float p_xn = dev_darcy_p[d_idx(x-1,y,z)];
const Float p = dev_darcy_p[cellidx];
t@@ -858,6 +1003,21 @@ __global__ void updateDarcySolution(
if (z == nz-1 && bc_top == 1)
p_zp = p;
+ // upwind coefficients for grad(p) determined from values of k
+ // k = 1.0: backwards difference
+ // k = -1.0: forwards difference
+ /*const Float3 e_k = MAKE_FLOAT3(
+ copysign(1.0, grad_k.x),
+ copysign(1.0, grad_k.y),
+ copysign(1.0, grad_k.z));
+
+ // gradient approximated by first-order forward differences
+ const Float3 grad_p = MAKE_FLOAT3(
+ ((1.0 + e_k.x)*(p - p_xn) + (1.0 - e_k.x)*(p_xp - p))/(dx + dx),
+ ((1.0 + e_k.y)*(p - p_yn) + (1.0 - e_k.y)*(p_yp - p))/(dy + dy),
+ ((1.0 + e_k.z)*(p - p_zn) + (1.0 - e_k.z)*(p_zp - p))/(dz + dz)
+ );*/
+
// gradient approximated by first-order central differences
const Float3 grad_p = MAKE_FLOAT3(
(p_xp - p_xn)/(dx + dx),
t@@ -870,7 +1030,8 @@ __global__ void updateDarcySolution(
(p_yp - (p + p) + p_yn)/(dy*dy) +
(p_zp - (p + p) + p_zn)/(dz*dz);
- Float p_new = p_old
+ //Float p_new = p_old
+ Float dp_impl =
+ (ndem*devC_dt)/(beta_f*phi*mu)*(k*laplace_p + dot(grad_k, grad_p))
- dphi/(beta_f*phi*(1.0 - phi));
t@@ -878,11 +1039,19 @@ __global__ void updateDarcySolution(
// grid if the wall isn't dynamic
if ((bc_bot == 0 && z == 0) || (bc_top == 0 && z == nz-1)
|| (z >= wall0_iz))
- p_new = p;
-
- // add relaxation
- //const Float theta = 0.1;
- //p_new = p*(1.0 - theta) + p_new*theta;
+ dp_impl = 0.0;
+ //p_new = p;
+
+ // choose integration method
+ // epsilon = 0: explicit
+ // epsilon = 0.5: Crank-Nicolson
+ // epsilon = 1: implicit
+ const Float epsilon = 1.0;
+ Float p_new = p_old + (1.0 - epsilon)*dp_expl + epsilon*dp_impl;
+
+ // add underrelaxation
+ const Float theta = 0.1;
+ p_new = p*(1.0 - theta) + p_new*theta;
// normalized residual, avoid division by zero
//const Float res_norm = (p_new - p)*(p_new - p)/(p_new*p_new + 1.0e-16);
(DIR) diff --git a/src/device.cu b/src/device.cu
t@@ -1946,11 +1946,36 @@ __host__ void DEM::startTime()
checkForCudaErrorsIter("Post setDarcyGhostNodes("
"dev_darcy_p) in Jacobi loop", iter);
+ if (nijac == 0) {
+ if (PROFILING == 1)
+ startTimer(&kernel_tic);
+ firstDarcySolution<<<dimGridFluid, dimBlockFluid>>>(
+ dev_darcy_p,
+ dev_darcy_k,
+ dev_darcy_phi,
+ dev_darcy_dphi,
+ dev_darcy_grad_k,
+ darcy.beta_f,
+ darcy.mu,
+ darcy.bc_bot,
+ darcy.bc_top,
+ darcy.ndem,
+ wall0_iz,
+ dev_darcy_dp_expl);
+ cudaThreadSynchronize();
+ if (PROFILING == 1)
+ stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
+ &t_updateDarcySolution);
+ checkForCudaErrorsIter("Post updateDarcySolution",
+ iter);
+ }
+
if (PROFILING == 1)
startTimer(&kernel_tic);
updateDarcySolution<<<dimGridFluid, dimBlockFluid>>>(
dev_darcy_p_old,
//dev_darcy_dpdt,
+ dev_darcy_dp_expl,
dev_darcy_p,
dev_darcy_k,
dev_darcy_phi,
(DIR) diff --git a/src/sphere.h b/src/sphere.h
t@@ -295,7 +295,7 @@ class DEM {
// Darcy values, device
Float* dev_darcy_p_old; // Previous cell hydraulic pressure
- Float* dev_darcy_dpdt; // Previous cell hydraulic pressure
+ Float* dev_darcy_dp_expl; // Explicit integrated pressure change
Float* dev_darcy_p; // Cell hydraulic pressure
Float* dev_darcy_p_new; // Updated cell hydraulic pressure
Float3* dev_darcy_v; // Cell fluid velocity