timplemented staggered grid in several functions - sphere - GPU-based 3D discrete element method algorithm with optional fluid coupling
(HTM) git clone git://src.adamsgaard.dk/sphere
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) LICENSE
---
(DIR) commit bd373edea562b18de93fe231cc1583108a4d1b05
(DIR) parent 0548db26d1afead334b2d7be927411f8e6dfd0dc
(HTM) Author: Anders Damsgaard <anders.damsgaard@geo.au.dk>
Date: Tue, 3 Jun 2014 10:51:51 +0200
implemented staggered grid in several functions
Diffstat:
M python/fluidshear.py | 4 ++--
M src/device.cu | 180 +++++++++++++++++--------------
M src/navierstokes.cuh | 431 ++++++++++++++++++++++++++-----
M src/sphere.h | 11 +++++++----
M tests/stokes_law.py | 2 +-
5 files changed, 469 insertions(+), 159 deletions(-)
---
(DIR) diff --git a/python/fluidshear.py b/python/fluidshear.py
t@@ -9,8 +9,8 @@ sid = 'fluidshear'
sim = sphere.sim(sid + '-init', np = 24000, fluid = False)
#sim.cleanup()
sim.radius[:] = 0.05
-sim.initRandomGridPos(gridnum = [16, 16, 9000])
-sim.initTemporal(total = 5.0, file_dt = 0.05)
+sim.initRandomGridPos(gridnum = [20, 20, 9000])
+sim.initTemporal(total = 10.0, file_dt = 0.05)
sim.g[2] = -9.81
sim.run(dry=True)
sim.run()
(DIR) diff --git a/src/device.cu b/src/device.cu
t@@ -557,7 +557,7 @@ __host__ void DEM::startTime()
unsigned int blocksPerGridBonds = iDivUp(params.nb0, threadsPerBlock);
dim3 dimGridBonds(blocksPerGridBonds, 1, 1); // Blocks arranged in 1D grid
- // Use 3D block and grid layout for fluid calculations
+ // Use 3D block and grid layout for cell-centered fluid calculations
dim3 dimBlockFluid(8, 8, 8); // 512 threads per block
dim3 dimGridFluid(
iDivUp(grid.num[0], dimBlockFluid.x),
t@@ -568,6 +568,17 @@ __host__ void DEM::startTime()
exit(1);
}
+ // Use 3D block and grid layout for cell-face fluid calculations
+ dim3 dimBlockFluidFace(8, 8, 8); // 512 threads per block
+ dim3 dimGridFluidFace(
+ iDivUp(grid.num[0]+1, dimBlockFluid.x),
+ iDivUp(grid.num[1]+1, dimBlockFluid.y),
+ iDivUp(grid.num[2]+1, dimBlockFluid.z));
+ if (dimGridFluid.z > 64 && navierstokes == 1) {
+ cerr << "Error: dimGridFluid.z > 64" << endl;
+ exit(1);
+ }
+
// Shared memory per block
unsigned int smemSize = sizeof(unsigned int)*(threadsPerBlock+1);
t@@ -867,29 +878,42 @@ __host__ void DEM::startTime()
}*/
if (np > 0) {
- // Determine the interaction force
- /*findInteractionForce<<<dimGridFluid, dimBlockFluid>>>(
- dev_ns_phi,
- dev_ns_d_avg,
- dev_ns_vp_avg,
- dev_ns_v,
- dev_ns_fi);
- cudaThreadSynchronize();
- checkForCudaErrorsIter("Post findInteractionForce", iter);
- // Apply interaction force to the particles
- applyParticleInteractionForce<<<dimGridFluid, dimBlockFluid>>>(
- dev_ns_fi,
- dev_ns_phi,
- dev_ns_p,
- dev_gridParticleIndex,
- dev_cellStart,
- dev_cellEnd,
- dev_x_sorted,
- dev_force);
+ if (iter == 0) {
+ // set cell center ghost nodes
+ setNSghostNodes<Float3><<<dimGridFluid, dimBlockFluid>>>(
+ dev_ns_v, ns.bc_bot, ns.bc_top);
+
+ // find cell face velocities
+ interpolateCenterToFace
+ <<<dimGridFluidFace, dimBlockFluidFace>>>(
+ dev_ns_v,
+ dev_ns_v_x,
+ dev_ns_v_y,
+ dev_ns_v_z);
+ cudaThreadSynchronize();
+ checkForCudaErrors("Post interpolateCenterToFace");
+ }
+
+ setNSghostNodesFace<Float>
+ <<<dimGridFluidFace, dimBlockFluidFace>>>(
+ dev_ns_v_x,
+ dev_ns_v_y,
+ dev_ns_v_z,
+ ns.bc_bot,
+ ns.bc_top);
cudaThreadSynchronize();
- checkForCudaErrorsIter("Post applyParticleInteractionForce",
- iter);*/
+ checkForCudaErrorsIter("Post setNSghostNodesFace", iter);
+
+ findFaceDivTau<<<dimGridFluidFace, dimBlockFluidFace>>>(
+ dev_ns_v_x,
+ dev_ns_v_y,
+ dev_ns_v_z,
+ dev_ns_div_tau_x,
+ dev_ns_div_tau_y,
+ dev_ns_div_tau_z);
+ cudaThreadSynchronize();
+ checkForCudaErrorsIter("Post findFaceDivTau", iter);
// Per particle, find the fluid-particle interaction force f_pf
// and apply it to the particle
t@@ -898,9 +922,12 @@ __host__ void DEM::startTime()
dev_vel,
dev_ns_phi,
dev_ns_p,
- dev_ns_v,
- dev_ns_tau,
- iter,
+ dev_ns_v_x,
+ dev_ns_v_y,
+ dev_ns_v_z,
+ dev_ns_div_tau_x,
+ dev_ns_div_tau_y,
+ dev_ns_div_tau_z,
dev_ns_f_pf,
dev_force);
cudaThreadSynchronize();
t@@ -961,21 +988,19 @@ __host__ void DEM::startTime()
// Set the values of the ghost nodes in the grid
if (PROFILING == 1)
startTimer(&kernel_tic);
- /*setNSghostNodesDev<<<dimGridFluid, dimBlockFluid>>>(
- dev_ns_p,
- dev_ns_v,
- dev_ns_v_p,
- dev_ns_phi,
- dev_ns_dphi,
- dev_ns_epsilon);*/
+
setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
dev_ns_p, ns.bc_bot, ns.bc_top);
- setNSghostNodes<Float3><<<dimGridFluid, dimBlockFluid>>>(
- dev_ns_v, ns.bc_bot, ns.bc_top);
+ //setNSghostNodes<Float3><<<dimGridFluid, dimBlockFluid>>>(
+ //dev_ns_v, ns.bc_bot, ns.bc_top);
- setNSghostNodes<Float3><<<dimGridFluid, dimBlockFluid>>>(
- dev_ns_v_p, ns.bc_bot, ns.bc_top);
+ setNSghostNodesFace<Float>
+ <<<dimGridFluidFace, dimBlockFluidFace>>>(
+ dev_ns_v_p_x,
+ dev_ns_v_p_y,
+ dev_ns_v_p_z,
+ ns.bc_bot, ns.bc_top);
setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
dev_ns_phi, ns.bc_bot, ns.bc_top);
t@@ -993,26 +1018,15 @@ __host__ void DEM::startTime()
transferNSepsilonFromGlobalDeviceMemory();
printNSarray(stdout, ns.epsilon, "epsilon");*/
- // Find the fluid stress tensor, needed for predicting the fluid
- // velocities
- if (PROFILING == 1)
- startTimer(&kernel_tic);
- findNSstressTensor<<<dimGridFluid, dimBlockFluid>>>(
- dev_ns_v,
- dev_ns_tau);
+ // interpolate velocities to cell centers which makes velocity
+ // prediction easier
+ interpolateFaceToCenter<<<dimGridFluid, dimBlockFluid>>>(
+ dev_ns_v_x,
+ dev_ns_v_y,
+ dev_ns_v_z,
+ dev_ns_v);
cudaThreadSynchronize();
- if (PROFILING == 1)
- stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
- &t_findNSstressTensor);
- checkForCudaErrorsIter("Post findNSstressTensor", iter);
-
- // Set stress tensor values in the ghost nodes
- setNSghostNodes_tau<<<dimGridFluid, dimBlockFluid>>>(
- dev_ns_tau,
- ns.bc_bot,
- ns.bc_top);
- cudaThreadSynchronize();
- checkForCudaErrorsIter("Post setNSghostNodes_tau", iter);
+ checkForCudaErrorsIter("Post interpolateFaceToCenter", iter);
// Find the divergence of phi*vi*v, needed for predicting the
// fluid velocities
t@@ -1028,53 +1042,44 @@ __host__ void DEM::startTime()
&t_findNSdivphiviv);
checkForCudaErrorsIter("Post findNSdivphiviv", iter);
- // Find the divergence of phi*tau, needed for predicting the
- // fluid velocities
- if (PROFILING == 1)
- startTimer(&kernel_tic);
- /*findNSdivphitau<<<dimGridFluid, dimBlockFluid>>>(
- dev_ns_phi,
- dev_ns_tau,
- dev_ns_div_phi_tau);*/
- findNSdivtau<<<dimGridFluid, dimBlockFluid>>>(
- dev_ns_tau,
- dev_ns_div_tau);
- cudaThreadSynchronize();
- if (PROFILING == 1)
- stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
- &t_findNSdivtau);
- //checkForCudaErrorsIter("Post findNSdivtau", iter);
- checkForCudaErrorsIter("Post findNSdivtau", iter);
-
// Predict the fluid velocities on the base of the old pressure
// field and ignoring the incompressibility constraint
if (PROFILING == 1)
startTimer(&kernel_tic);
- findPredNSvelocities<<<dimGridFluid, dimBlockFluid>>>(
+ findPredNSvelocities<<<dimGridFluidFace, dimBlockFluidFace>>>(
dev_ns_p,
+ dev_ns_v_x,
+ dev_ns_v_y,
+ dev_ns_v_z,
dev_ns_v,
dev_ns_phi,
dev_ns_dphi,
- dev_ns_div_phi_vi_v,
- //dev_ns_div_phi_tau,
- dev_ns_div_tau,
+ dev_ns_div_tau_x,
+ dev_ns_div_tau_y,
+ dev_ns_div_tau_z,
ns.bc_bot,
ns.bc_top,
ns.beta,
dev_ns_fi,
ns.ndem,
- dev_ns_v_p);
+ dev_ns_v_p_x,
+ dev_ns_v_p_y,
+ dev_ns_v_p_z);
cudaThreadSynchronize();
if (PROFILING == 1)
stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
&t_findPredNSvelocities);
checkForCudaErrorsIter("Post findPredNSvelocities", iter);
- setNSghostNodes<Float3><<<dimGridFluid, dimBlockFluid>>>(
- dev_ns_v_p);
+ setNSghostNodesFace<Float>
+ <<<dimGridFluidFace, dimBlockFluidFace>>>(
+ dev_ns_v_p_x,
+ dev_ns_v_p_y,
+ dev_ns_v_p_z,
+ ns.bc_bot, ns.bc_top);
cudaThreadSynchronize();
- checkForCudaErrorsIter("Post setNSghostNodesFloat3(dev_ns_v_p)",
- iter);
+ checkForCudaErrorsIter(
+ "Post setNSghostNodesFaceFloat3(dev_ns_v_p)", iter);
// In the first iteration of the sphere program, we'll need to
// manually estimate the values of epsilon. In the subsequent
t@@ -1507,6 +1512,17 @@ __host__ void DEM::startTime()
cudaThreadSynchronize();
checkForCudaErrorsIter("Beginning of file output section", iter);
+ // v_x, v_y, v_z -> v
+ if (navierstokes == 1) {
+ interpolateFaceToCenter<<<dimGridFluid, dimBlockFluid>>>(
+ dev_ns_v_x,
+ dev_ns_v_y,
+ dev_ns_v_z,
+ dev_ns_v);
+ cudaThreadSynchronize();
+ checkForCudaErrorsIter("Post interpolateFaceToCenter", iter);
+ }
+
//// Copy device data to host memory
transferFromGlobalDeviceMemory();
checkForCudaErrorsIter("After transferFromGlobalDeviceMemory()",
(DIR) diff --git a/src/navierstokes.cuh b/src/navierstokes.cuh
t@@ -69,7 +69,7 @@ void DEM::initNSmemDev(void)
cudaMalloc((void**)&dev_ns_v_x, memSizeFvel);// velocity in stag. grid
cudaMalloc((void**)&dev_ns_v_y, memSizeFvel);// velocity in stag. grid
cudaMalloc((void**)&dev_ns_v_z, memSizeFvel);// velocity in stag. grid
- cudaMalloc((void**)&dev_ns_v_p, memSizeF*3); // predicted cell velocity
+ //cudaMalloc((void**)&dev_ns_v_p, memSizeF*3); // predicted cell velocity
cudaMalloc((void**)&dev_ns_v_p_x, memSizeFvel); // pred. vel. in stag. grid
cudaMalloc((void**)&dev_ns_v_p_y, memSizeFvel); // pred. vel. in stag. grid
cudaMalloc((void**)&dev_ns_v_p_z, memSizeFvel); // pred. vel. in stag. grid
t@@ -78,7 +78,7 @@ void DEM::initNSmemDev(void)
cudaMalloc((void**)&dev_ns_fi, memSizeF*3); // interaction force
cudaMalloc((void**)&dev_ns_phi, memSizeF); // cell porosity
cudaMalloc((void**)&dev_ns_dphi, memSizeF); // cell porosity change
- cudaMalloc((void**)&dev_ns_div_phi_v_v, memSizeF*3); // div(phi v v)
+ //cudaMalloc((void**)&dev_ns_div_phi_v_v, memSizeF*3); // div(phi v v)
cudaMalloc((void**)&dev_ns_epsilon, memSizeF); // pressure difference
cudaMalloc((void**)&dev_ns_epsilon_new, memSizeF); // new pressure diff.
cudaMalloc((void**)&dev_ns_epsilon_old, memSizeF); // old pressure diff.
t@@ -87,7 +87,10 @@ void DEM::initNSmemDev(void)
cudaMalloc((void**)&dev_ns_f1, memSizeF); // constant addition in forcing
cudaMalloc((void**)&dev_ns_f2, memSizeF*3); // constant slope in forcing
cudaMalloc((void**)&dev_ns_tau, memSizeF*6); // stress tensor (symmetrical)
- cudaMalloc((void**)&dev_ns_div_tau, memSizeF*6); // div(tau)
+ cudaMalloc((void**)&dev_ns_div_tau, memSizeF3); // div(tau), cell center
+ cudaMalloc((void**)&dev_ns_div_tau_x, memSizeFvel); // div(tau), cell face
+ cudaMalloc((void**)&dev_ns_div_tau_y, memSizeFvel); // div(tau), cell face
+ cudaMalloc((void**)&dev_ns_div_tau_z, memSizeFvel); // div(tau), cell face
cudaMalloc((void**)&dev_ns_div_phi_vi_v, memSizeF*3); // div(phi*vi*v)
//cudaMalloc((void**)&dev_ns_div_phi_tau, memSizeF*3); // div(phi*tau)
cudaMalloc((void**)&dev_ns_f_pf, sizeof(Float3)*np); // particle fluid force
t@@ -103,7 +106,7 @@ void DEM::freeNSmemDev()
cudaFree(dev_ns_v_x);
cudaFree(dev_ns_v_y);
cudaFree(dev_ns_v_z);
- cudaFree(dev_ns_v_p);
+ //cudaFree(dev_ns_v_p);
cudaFree(dev_ns_v_p_x);
cudaFree(dev_ns_v_p_y);
cudaFree(dev_ns_v_p_z);
t@@ -112,7 +115,7 @@ void DEM::freeNSmemDev()
cudaFree(dev_ns_fi);
cudaFree(dev_ns_phi);
cudaFree(dev_ns_dphi);
- cudaFree(dev_ns_div_phi_v_v);
+ //cudaFree(dev_ns_div_phi_v_v);
cudaFree(dev_ns_epsilon);
cudaFree(dev_ns_epsilon_new);
cudaFree(dev_ns_epsilon_old);
t@@ -123,7 +126,10 @@ void DEM::freeNSmemDev()
cudaFree(dev_ns_tau);
cudaFree(dev_ns_div_phi_vi_v);
//cudaFree(dev_ns_div_phi_tau);
- cudaFree(dev_ns_div_tau);
+ //cudaFree(dev_ns_div_tau);
+ cudaFree(dev_ns_div_tau_x);
+ cudaFree(dev_ns_div_tau_y);
+ cudaFree(dev_ns_div_tau_z);
cudaFree(dev_ns_f_pf);
}
t@@ -144,7 +150,7 @@ void DEM::transferNStoGlobalDeviceMemory(int statusmsg)
cudaMemcpy(dev_ns_p, ns.p, memSizeF, cudaMemcpyHostToDevice);
checkForCudaErrors("transferNStoGlobalDeviceMemory after first cudaMemcpy");
cudaMemcpy(dev_ns_v, ns.v, memSizeF*3, cudaMemcpyHostToDevice);
- cudaMemcpy(dev_ns_v_p, ns.v_p, memSizeF*3, cudaMemcpyHostToDevice);
+ //cudaMemcpy(dev_ns_v_p, ns.v_p, memSizeF*3, cudaMemcpyHostToDevice);
cudaMemcpy(dev_ns_phi, ns.phi, memSizeF, cudaMemcpyHostToDevice);
cudaMemcpy(dev_ns_dphi, ns.dphi, memSizeF, cudaMemcpyHostToDevice);
t@@ -165,7 +171,7 @@ void DEM::transferNSfromGlobalDeviceMemory(int statusmsg)
cudaMemcpy(ns.p, dev_ns_p, memSizeF, cudaMemcpyDeviceToHost);
checkForCudaErrors("In transferNSfromGlobalDeviceMemory, dev_ns_p", 0);
cudaMemcpy(ns.v, dev_ns_v, memSizeF*3, cudaMemcpyDeviceToHost);
- cudaMemcpy(ns.v_p, dev_ns_v_p, memSizeF*3, cudaMemcpyDeviceToHost);
+ //cudaMemcpy(ns.v_p, dev_ns_v_p, memSizeF*3, cudaMemcpyDeviceToHost);
cudaMemcpy(ns.phi, dev_ns_phi, memSizeF, cudaMemcpyDeviceToHost);
cudaMemcpy(ns.dphi, dev_ns_dphi, memSizeF, cudaMemcpyDeviceToHost);
cudaMemcpy(ns.norm, dev_ns_norm, memSizeF, cudaMemcpyDeviceToHost);
t@@ -624,6 +630,63 @@ __global__ void setNSghostNodes(
}
}
+// Update a field in the ghost nodes from their parent cell values. The edge
+// (diagonal) cells are not written since they are not read.
+// Launch per face.
+template<typename T>
+__global__ void setNSghostNodesFace(
+ T* dev_scalarfield_x,
+ T* dev_scalarfield_y,
+ T* dev_scalarfield_z,
+ int bc_bot,
+ int bc_top)
+{
+ // 3D thread index
+ const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
+ const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
+ const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
+
+ // Grid dimensions
+ const unsigned int nx = devC_grid.num[0];
+ const unsigned int ny = devC_grid.num[1];
+ const unsigned int nz = devC_grid.num[2];
+
+ // check that we are not outside the fluid grid
+ if (x <= nx && y <= ny && z <= nz) {
+
+ const T val_x = dev_scalarfield_x[vidx(x,y,z)];
+ const T val_y = dev_scalarfield_y[vidx(x,y,z)];
+ const T val_z = dev_scalarfield_z[vidx(x,y,z)];
+
+ // x
+ if (x == 0)
+ dev_scalarfield_x[vidx(nx+1,y,z)] = val_x;
+ if (x == nx)
+ dev_scalarfield_x[vidx(-1,y,z)] = val_x;
+
+ // y
+ if (y == 0)
+ dev_scalarfield_y[vidx(x,ny+1,z)] = val_y;
+ if (y == ny)
+ dev_scalarfield_y[vidx(x,-1,z)] = val_y;
+
+ // z
+ if (z == 0 && bc_bot == 0)
+ dev_scalarfield_z[vidx(x,y,-1)] = val_z; // Dirichlet
+ if (z == 1 && bc_bot == 1)
+ dev_scalarfield_z[vidx(x,y,-1)] = val_z; // Neumann
+ if (z == 0 && bc_bot == 2)
+ dev_scalarfield_z[vidx(x,y,nz+1)] = val_z; // Periodic -z
+
+ if (z == nz && bc_top == 0)
+ dev_scalarfield_z[vidx(x,y,nz)] = val_z; // Dirichlet
+ if (z == nz-1 && bc_top == 1)
+ dev_scalarfield_z[vidx(x,y,nz)] = val_z; // Neumann
+ if (z == nz && bc_top == 2)
+ dev_scalarfield_z[vidx(x,y,-1)] = val_z; // Periodic +z
+ }
+}
+
// Update the tensor field for the ghost nodes from their parent cell values.
// The edge (diagonal) cells are not written since they are not read. Launch
// this kernel for all cells in the grid.
t@@ -1688,7 +1751,7 @@ __global__ void findNSdivphiviv(
// Read porosity and velocity in the 6 neighbor cells
__syncthreads();
const Float phi_xn = dev_ns_phi[idx(x-1,y,z)];
- //const Float phi = dev_ns_phi[idx(x,y,z)];
+ const Float phi = dev_ns_phi[idx(x,y,z)];
const Float phi_xp = dev_ns_phi[idx(x+1,y,z)];
const Float phi_yn = dev_ns_phi[idx(x,y-1,z)];
const Float phi_yp = dev_ns_phi[idx(x,y+1,z)];
t@@ -1696,7 +1759,7 @@ __global__ void findNSdivphiviv(
const Float phi_zp = dev_ns_phi[idx(x,y,z+1)];
const Float3 v_xn = dev_ns_v[idx(x-1,y,z)];
- //const Float3 v = dev_ns_v[idx(x,y,z)];
+ const Float3 v = dev_ns_v[idx(x,y,z)];
const Float3 v_xp = dev_ns_v[idx(x+1,y,z)];
const Float3 v_yn = dev_ns_v[idx(x,y-1,z)];
const Float3 v_yp = dev_ns_v[idx(x,y+1,z)];
t@@ -1704,7 +1767,8 @@ __global__ void findNSdivphiviv(
const Float3 v_zp = dev_ns_v[idx(x,y,z+1)];
// Calculate upwind coefficients
- /*const Float3 a = MAKE_FLOAT3(
+ //*
+ const Float3 a = MAKE_FLOAT3(
copysign(1.0, v.x),
copysign(1.0, v.y),
copysign(1.0, v.z));
t@@ -1763,8 +1827,9 @@ __global__ void findNSdivphiviv(
// Determine the weighted average of both discretizations
const Float3 div_phi_vi_v = tau*div_uw + (1.0 - tau)*div_cd;
- */
+ //*/
+ /*
// Calculate the divergence: div(phi*v_i*v)
const Float3 div_phi_vi_v = MAKE_FLOAT3(
// x
t@@ -1779,6 +1844,7 @@ __global__ void findNSdivphiviv(
(phi_xp*v_xp.z*v_xp.x - phi_xn*v_xn.z*v_xn.x)/(2.0*dx) +
(phi_yp*v_yp.z*v_yp.y - phi_yn*v_yn.z*v_yn.y)/(2.0*dy) +
(phi_zp*v_zp.z*v_zp.z - phi_zn*v_zn.z*v_zn.z)/(2.0*dz));
+ // */
// Write divergence
__syncthreads();
t@@ -1929,6 +1995,7 @@ __global__ void findNSdivphitau(
}
// Find the divergence of phi v v
+// Unused
__global__ void findNSdivphivv(
Float* dev_ns_v_prod, // in
Float* dev_ns_phi, // in
t@@ -2017,20 +2084,27 @@ __global__ void findNSdivphivv(
// Find predicted fluid velocity
+// Launch per face.
__global__ void findPredNSvelocities(
Float* dev_ns_p, // in
+ Float* dev_ns_v_x, // in
+ Float* dev_ns_v_y, // in
+ Float* dev_ns_v_z, // in
Float3* dev_ns_v, // in
Float* dev_ns_phi, // in
Float* dev_ns_dphi, // in
+ Float* dev_ns_div_tau_x, // in
+ Float* dev_ns_div_tau_y, // in
+ Float* dev_ns_div_tau_z, // in
Float3* dev_ns_div_phi_vi_v, // in
- //Float3* dev_ns_div_phi_tau, // in
- Float3* dev_ns_div_tau, // in
int bc_bot, // in
int bc_top, // in
Float beta, // in
Float3* dev_ns_fi, // in
unsigned int ndem, // in
- Float3* dev_ns_v_p) // out
+ Float* dev_ns_v_p_x, // out
+ Float* dev_ns_v_p_y, // out
+ Float* dev_ns_v_p_z) // out
{
// 3D thread index
const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
t@@ -2048,76 +2122,123 @@ __global__ void findPredNSvelocities(
const Float dz = devC_grid.L[2]/nz;
// 1D thread index
+ const unsigned int vidx = vidx(x,y,z);
const unsigned int cellidx = idx(x,y,z);
// Check that we are not outside the fluid grid
- if (x < nx && y < ny && z < nz) {
+ if (x <= nx && y <= ny && z <= nz) {
// Values that are needed for calculating the predicted velocity
__syncthreads();
- const Float3 v = dev_ns_v[cellidx];
- const Float phi = dev_ns_phi[cellidx];
- const Float dphi = dev_ns_dphi[cellidx];
- const Float3 div_phi_vi_v = dev_ns_div_phi_vi_v[cellidx];
- //const Float3 div_phi_tau = dev_ns_div_phi_tau[cellidx];
- const Float3 div_tau = dev_ns_div_tau[cellidx];
+ const Float3 v_face = MAKE_FLOAT3(
+ dev_ns_v_x[vidx],
+ dev_ns_v_y[vidx],
+ dev_ns_v_z[vidx]);
+ const Float3 div_tau_face = MAKE_FLOAT3(
+ dev_ns_div_tau_x[vidx],
+ dev_ns_div_tau_y[vidx],
+ dev_ns_div_tau_z[vidx]);
+
+ // cell center values
+ const Float phi_xn = dev_ns_phi[idx(x-1,y,z)];
+ const Float phi_c = dev_ns_phi[cellidx];
+ const Float phi_yn = dev_ns_phi[idx(x,y-1,z)];
+ const Float phi_zn = dev_ns_phi[idx(x,y,z-1)];
+
+ const Float dphi_xn = dev_ns_dphi[idx(x-1,y,z)];
+ const Float dphi_c = dev_ns_dphi[cellidx];
+ const Float dphi_yn = dev_ns_dphi[idx(x,y-1,z)];
+ const Float dphi_zn = dev_ns_dphi[idx(x,y,z-1)];
+
+ const Float3 v_xn = dev_ns_v[idx(x-1,y,z)];
+ const Float3 v_c = dev_ns_v[cellidx];
+ const Float3 v_yn = dev_ns_v[idx(x,y-1,z)];
+ const Float3 v_zn = dev_ns_v[idx(x,y,z-1)];
+
+ const Float3 div_phi_vi_v_xn = dev_ns_div_phi_vi_v[idx(x-1,y,z)];
+ const Float3 div_phi_vi_v_c = dev_ns_div_phi_vi_v[cellidx];
+ const Float3 div_phi_vi_v_yn = dev_ns_div_phi_vi_v[idx(x,y-1,z)];
+ const Float3 div_phi_vi_v_zn = dev_ns_div_phi_vi_v[idx(x,y,z-1)];
+
+ // component-wise average values
+ const Float3 phi = MAKE_FLOAT3(
+ (phi_c + phi_xn)/2.0,
+ (phi_c + phi_yn)/2.0,
+ (phi_c + phi_zn)/2.0);
+ const Float3 dphi = MAKE_FLOAT3(
+ (dphi_c + dphi_xn)/2.0,
+ (dphi_c + dphi_yn)/2.0,
+ (dphi_c + dphi_zn)/2.0);
// The particle-fluid interaction force should only be incoorporated if
// there is a fluid viscosity
- Float3 f_i;
- if (devC_params.mu > 0.0)
- f_i = dev_ns_fi[cellidx];
- else
- f_i = MAKE_FLOAT3(0.0, 0.0, 0.0);
+ Float3 f_i_c, f_i_xn, f_i_yn, f_i_zn;
+ if (devC_params.mu > 0.0) {
+ f_i_c = dev_ns_fi[cellidx];
+ f_i_xn = dev_ns_fi[idx(x-1,y,z)];
+ f_i_yn = dev_ns_fi[idx(x,y-1,z)];
+ f_i_zn = dev_ns_fi[idx(x,y,z-1)];
+ } else {
+ f_i_c = MAKE_FLOAT3(0.0, 0.0, 0.0);
+ f_i_xn = MAKE_FLOAT3(0.0, 0.0, 0.0);
+ f_i_yn = MAKE_FLOAT3(0.0, 0.0, 0.0);
+ f_i_zn = MAKE_FLOAT3(0.0, 0.0, 0.0);
+ }
+ const Float3 f_i = MAKE_FLOAT3(
+ (f_i_c.x + f_i_xn.x)/2.0,
+ (f_i_c.y + f_i_yn.y)/2.0,
+ (f_i_c.z + f_i_zn.z)/2.0);
const Float dt = ndem*devC_dt;
const Float rho = devC_params.rho_f;
- // Find pressure gradient
- Float3 grad_p = MAKE_FLOAT3(0.0, 0.0, 0.0);
-
// The pressure gradient is not needed in Chorin's projection method
// (ns.beta=0), so only has to be looked up in pressure-dependant
// projection methods
Float3 pressure_term = MAKE_FLOAT3(0.0, 0.0, 0.0);
if (beta > 0.0) {
- grad_p = gradient(dev_ns_p, x, y, z, dx, dy, dz);
+ __syncthreads();
+ const Float p = dev_ns_p[cellidx];
+ const Float p_xn = dev_ns_p[idx(x-1,y,z)];
+ const Float p_yn = dev_ns_p[idx(x,y-1,z)];
+ const Float p_zn = dev_ns_p[idx(x,y,z-1)];
+ const Float3 grad_p = MAKE_FLOAT3(
+ (p - p_xn)/dx,
+ (p - p_yn)/dy,
+ (p - p_zn)/dz);
#ifdef SET_1
- pressure_term = -beta*dt/(rho*phi)*grad_p;
+ pressure_term = -beta*dt/(rho*phi)*grad_p_x;
#endif
#ifdef SET_2
pressure_term = -beta*dt/rho*grad_p;
#endif
}
- // Calculate the predicted velocity
- /*
- Float3 v_p = v
- + pressure_term
- + 1.0/devC_params.rho_f*div_phi_tau*ndem*devC_dt/phi // diffusion
- + MAKE_FLOAT3(devC_params.g[0], devC_params.g[1], devC_params.g[2])
- *ndem*devC_dt // gravity term
- - ndem*devC_dt/(devC_params.rho_f*phi)*f_i
- //- ndem*devC_dt/(devC_params.rho_f*phi*dx*dy*dz)*f_i
- - v*dphi/phi
- - div_phi_vi_v*ndem*devC_dt/phi // advection term
- ;*/
+ // calculate the advection term using a first-order upwind scheme
+ // div_phi_vf_vf
+
+ const float3 div_phi_vf_vx = make_float3(
+ div_phi_vx_vf,
+ div_phi_vy_vf,
+ div_phi_vz_vf)k
+
+ // Determine the predicted velocity
#ifdef SET_1
- const Float3 interaction_term = -dt/(rho*phi)*f_i;
- const Float3 diffusion_term = dt/(rho*phi)*div_tau;
- const Float3 gravity_term = MAKE_FLOAT3(
+ const Float3 interaction_term = -dt/(rho*phi)*f_i;
+ const Float3 diffusion_term = dt/(rho*phi)*div_tau;
+ const Float3 gravity_term = MAKE_FLOAT3(
devC_params.g[0], devC_params.g[1], devC_params.g[2])*dt;
- const Float3 porosity_term = -1.0*v*dphi/phi;
- const Float3 advection_term = -1.0*div_phi_vi_v*dt/phi;
+ const Float3 porosity_term = -1.0*v*dphi/phi;
+ const Float3 advection_term = -1.0*div_phi_vi_v*dt/phi;
#endif
#ifdef SET_2
- const Float3 interaction_term = -dt/(rho*phi)*f_i;
- const Float3 diffusion_term = dt/rho*div_tau;
- const Float3 gravity_term = MAKE_FLOAT3(
- devC_params.g[0], devC_params.g[1], devC_params.g[2])*dt;
- const Float3 porosity_term = -1.0*v*dphi/phi;
- const Float3 advection_term = -1.0*div_phi_vi_v*dt/phi;
+ const Float3 interaction_term = -dt/(rho*phi)*f_i;
+ const Float3 diffusion_term = dt/rho*div_tau;
+ const Float3 gravity_term = MAKE_FLOAT3(
+ devC_params.g[0], devC_params.g[1], devC_params.g[2])*dt;
+ const Float3 porosity_term = -1.0*v*dphi/phi;
+ const Float3 advection_term = -1.0*div_phi_vi_v*dt/phi;
#endif
Float3 v_p = v
t@@ -2155,8 +2276,9 @@ __global__ void findPredNSvelocities(
// Save the predicted velocity
__syncthreads();
- //v_p = MAKE_FLOAT3(0.0, 0.0, 0.0);
- dev_ns_v_p[cellidx] = v_p;
+ dev_ns_v_p_x[vidx] = v_p.x;
+ dev_ns_v_p_y[vidx] = v_p.y;
+ dev_ns_v_p_z[vidx] = v_p.z;
#ifdef CHECK_NS_FINITE
(void)checkFiniteFloat3("v_p", x, y, z, v_p);
t@@ -2176,7 +2298,7 @@ __global__ void findNSforcing(
Float* dev_ns_f,
Float* dev_ns_phi,
Float* dev_ns_dphi,
- Float3* dev_ns_v_p,
+ //Float3* dev_ns_v_p,
unsigned int nijac,
unsigned int ndem)
{
t@@ -2786,9 +2908,12 @@ __global__ void findInteractionForce(
Float4* dev_vel, // in
Float* dev_ns_phi, // in
Float* dev_ns_p, // in
- Float3* dev_ns_v, // in
- Float* dev_ns_tau, // in
- const unsigned int iter, // in
+ Float* dev_ns_v_x, // in
+ Float* dev_ns_v_y, // in
+ Float* dev_ns_v_z, // in
+ Float* dev_ns_div_tau_x,// in
+ Float* dev_ns_div_tau_y,// in
+ Float* dev_ns_div_tau_z,// in
Float3* dev_ns_f_pf, // out
Float4* dev_force) // out
{
t@@ -2819,8 +2944,30 @@ __global__ void findInteractionForce(
// Fluid information
__syncthreads();
- const Float phi = dev_ns_phi[cellidx];
- const Float3 v_f = dev_ns_v[cellidx];
+ const Float phi = dev_ns_phi[cellidx];
+ const Float v_x = dev_ns_v_x[vidx(x,y,z)];
+ const Float v_x_p = dev_ns_v_x[vidx(x+1,y,z)];
+ const Float v_y = dev_ns_v_x[vidx(x,y,z)];
+ const Float v_y_p = dev_ns_v_x[vidx(x,y+1,z)];
+ const Float v_z = dev_ns_v_x[vidx(x,y,z)];
+ const Float v_z_p = dev_ns_v_x[vidx(x,y,z+1)];
+
+ const Float div_tau_x = dev_ns_div_tau_x[vidx(x,y,z)];
+ const Float div_tau_x_p = dev_ns_div_tau_x[vidx(x+1,y,z)];
+ const Float div_tau_y = dev_ns_div_tau_x[vidx(x,y,z)];
+ const Float div_tau_y_p = dev_ns_div_tau_x[vidx(x,y+1,z)];
+ const Float div_tau_z = dev_ns_div_tau_x[vidx(x,y,z)];
+ const Float div_tau_z_p = dev_ns_div_tau_x[vidx(x,y,z+1)];
+
+ const Float3 v_f = MAKE_FLOAT3(
+ (v_x + v_x_p)/2.0,
+ (v_y + v_y_p)/2.0,
+ (v_z + v_z_p)/2.0);
+
+ const Float3 div_tau = MAKE_FLOAT3(
+ (div_tau_x + div_tau_x_p)/2.0,
+ (div_tau_y + div_tau_y_p)/2.0,
+ (div_tau_z + div_tau_z_p)/2.0);
const Float3 v_rel = v_f - v_p;
const Float v_rel_length = length(v_rel);
t@@ -2844,13 +2991,7 @@ __global__ void findInteractionForce(
-1.0*gradient(dev_ns_p, i_x, i_y, i_z, dx, dy, dz)*V_p;
// Viscous force
- Float3 f_v;
- if (iter == 0)
- f_v = MAKE_FLOAT3(0.0, 0.0, 0.0);
- else
- f_v =
- -1.0*divergence_tensor(dev_ns_tau, i_x, i_y, i_z, dx, dy, dz)
- *V_p;
+ const Float3 f_v = -1.0*div_tau*V_p;
// Interaction force on particle (force) and fluid (f_pf)
__syncthreads();
t@@ -3020,6 +3161,156 @@ __global__ void applyInteractionForceToFluid(
}
}*/
+// Launch per cell face node.
+// Cell center ghost nodes must be set prior to call.
+__global__ void interpolateCenterToFace(
+ Float3* dev_in,
+ Float* dev_out_x,
+ Float* dev_out_y,
+ Float* dev_out_z)
+{
+ // 3D thread index
+ const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
+ const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
+ const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
+
+ // Check that we are not outside the fluid grid
+ if (x <= devC_grid.num[0]
+ && y <= devC_grid.num[1]
+ && z <= devC_grid.num[2]) {
+
+ const unsigned int faceidx = vidx(x,y,z);
+
+ __syncthreads();
+ const Float3 xn = dev_in[idx(x-1,y,z)];
+ const Float3 yn = dev_in[idx(x,y-1,z)];
+ const Float3 zn = dev_in[idx(x,y,z-1)];
+ const Float3 center = dev_in[idx(x,y,z)];
+
+ const Float3 x_val = (center.x - xn.x)/2.0;
+ const Float3 y_val = (center.y - yn.y)/2.0;
+ const Float3 z_val = (center.z - zn.z)/2.0;
+
+ __syncthreads();
+ dev_out_x[faceidx] = x_val;
+ dev_out_y[faceidx] = y_val;
+ dev_out_z[faceidx] = z_val;
+ }
+}
+
+// Launch per cell center node
+__global__ void interpolateFaceToCenter(
+ Float* dev_in_x,
+ Float* dev_in_y,
+ Float* dev_in_z,
+ Float3* dev_out)
+{
+ // 3D thread index
+ const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
+ const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
+ const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
+
+ // Check that we are not outside the fluid grid
+ if (x < devC_grid.num[0] && y < devC_grid.num[1] && z < devC_grid.num[2]) {
+
+ const unsigned int cellidx = idx(x,y,z);
+
+ __syncthreads();
+ const Float x_n = dev_in_x[idx(x,y,z)];
+ const Float x_p = dev_in_x[idx(x+1,y,z)];
+ const Float y_n = dev_in_x[idx(x,y,z)];
+ const Float y_p = dev_in_x[idx(x,y+1,z)];
+ const Float z_n = dev_in_x[idx(x,y,z)];
+ const Float z_p = dev_in_x[idx(x,y,z+1)];
+
+ const val = MAKE_FLOAT3(
+ (x_n + x_p)/2.0,
+ (y_n + y_p)/2.0,
+ (z_n + z_p)/2.0);
+
+ __syncthreads();
+ dev_out[cellidx] = val;
+ }
+}
+
+// Launch per cell face node. Set velocity ghost nodes beforehand.
+// Find div(tau) at all cell faces.
+__global__ findFaceDivTau(
+ Float* dev_ns_v_x,
+ Float* dev_ns_v_y,
+ Float* dev_ns_v_z,
+ Float* dev_ns_div_tau_x,
+ Float* dev_ns_div_tau_y,
+ Float* dev_ns_div_tau_z)
+{
+ // 3D thread index
+ const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
+ const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
+ const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
+
+ // Grid dimensions
+ const unsigned int nx = devC_grid.num[0];
+ const unsigned int ny = devC_grid.num[1];
+ const unsigned int nz = devC_grid.num[2];
+
+ // Cell sizes
+ const Float dx = devC_grid.L[0]/nx;
+ const Float dy = devC_grid.L[1]/ny;
+ const Float dz = devC_grid.L[2]/nz;
+
+ // Check that we are not outside the fluid grid
+ if (x <= nx && y <= ny && z <= nz) {
+
+ const unsigned int faceidx = vidx(x,y,z);
+
+ __syncthreads();
+ const Float v_x_xn = dev_ns_v_x[vidx(x-1,y,z)];
+ const Float v_x = dev_ns_v_x[faceidx];
+ const Float v_x_xp = dev_ns_v_x[vidx(x+1,y,z)];
+ const Float v_x_yn = dev_ns_v_x[vidx(x,y-1,z)];
+ const Float v_x_yp = dev_ns_v_x[vidx(x,y+1,z)];
+ const Float v_x_zn = dev_ns_v_x[vidx(x,y,z-1)];
+ const Float v_x_zp = dev_ns_v_x[vidx(x,y,z+1)];
+
+ const Float x_y_xn = dev_nx_v_y[vidx(x-1,y,z)];
+ const Float x_y_xp = dev_nx_v_y[vidx(x+1,y,z)];
+ const Float v_y_yn = dev_ns_v_y[vidx(x,y-1,z)];
+ const Float v_y = dev_ns_v_y[faceidx];
+ const Float v_y_yp = dev_ns_v_y[vidx(x,y+1,z)];
+ const Float x_y_zn = dev_nx_v_y[vidx(x,y,z-1)];
+ const Float x_y_zp = dev_nx_v_y[vidx(x,y,z+1)];
+
+ const Float v_z_xn = dev_ns_v_z[vidx(x-1,y,z)];
+ const Float v_z_xp = dev_ns_v_z[vidx(x+1,y,z)];
+ const Float v_z_yn = dev_ns_v_z[vidx(x,y-1,z)];
+ const Float v_z_yp = dev_ns_v_z[vidx(x,y+1,z)];
+ const Float v_z_zn = dev_ns_v_z[vidx(x,y,z-1)];
+ const Float v_z = dev_ns_v_z[faceidx];
+ const Float v_z_zp = dev_ns_v_z[vidx(x,y,z+1)];
+
+ const Float div_tau_x =
+ devC_params.mu*(
+ (v_x_xp - 2.0*v_x + v_x_xn)/(dx*dx) +
+ (v_x_yp - 2.0*v_x + v_x_yn)/(dy*dy) +
+ (v_x_zp - 2.0*v_x + v_x_zn)/(dz*dz));
+ const Float div_tau_y =
+ devC_params.mu*(
+ (v_y_xp - 2.0*v_y + v_y_xn)/(dx*dx) +
+ (v_y_yp - 2.0*v_y + v_y_yn)/(dy*dy) +
+ (v_y_zp - 2.0*v_y + v_y_zn)/(dz*dz));
+ const Float div_tau_z =
+ devC_params.mu*(
+ (v_z_xp - 2.0*v_z + v_z_xn)/(dx*dx) +
+ (v_z_yp - 2.0*v_z + v_z_yn)/(dy*dy) +
+ (v_z_zp - 2.0*v_z + v_z_zn)/(dz*dz));
+
+ __syncthreads();
+ dev_ns_div_tau_x[faceidx] = div_tau_x;
+ dev_ns_div_tau_y[faceidx] = div_tau_y;
+ dev_ns_div_tau_z[faceidx] = div_tau_z;
+ }
+}
+
// Print final heads and free memory
void DEM::endNSdev()
{
(DIR) diff --git a/src/sphere.h b/src/sphere.h
t@@ -175,7 +175,7 @@ class DEM {
Float* dev_ns_v_x; // Cell fluid velocity in staggered grid
Float* dev_ns_v_y; // Cell fluid velocity in staggered grid
Float* dev_ns_v_z; // Cell fluid velocity in staggered grid
- Float3* dev_ns_v_p; // Predicted cell fluid velocity
+ //Float3* dev_ns_v_p; // Predicted cell fluid velocity
Float* dev_ns_v_p_x; // Predicted cell fluid velocity in st. gr.
Float* dev_ns_v_p_y; // Predicted cell fluid velocity in st. gr.
Float* dev_ns_v_p_z; // Predicted cell fluid velocity in st. gr.
t@@ -184,7 +184,7 @@ class DEM {
Float3* dev_ns_fi; // Particle-fluid interaction force
Float* dev_ns_phi; // Cell porosity
Float* dev_ns_dphi; // Cell porosity change
- Float3* dev_ns_div_phi_v_v; // Divegence used in velocity prediction
+ //Float3* dev_ns_div_phi_v_v; // Divegence used in velocity prediction
Float* dev_ns_epsilon; // Pressure difference
Float* dev_ns_epsilon_new; // Pressure diff. after Jacobi iteration
Float* dev_ns_epsilon_old; // Pressure diff. before Jacobi iteration
t@@ -193,10 +193,13 @@ class DEM {
Float* dev_ns_f1; // Constant terms in forcing function
Float3* dev_ns_f2; // Constant slopes in forcing function
Float* dev_ns_v_prod; // Outer product of fluid velocities
- Float* dev_ns_tau; // Fluid stress tensor
- Float3* dev_ns_div_phi_vi_v; // div(phi*vi*v)
+ //Float* dev_ns_tau; // Fluid stress tensor
+ //Float3* dev_ns_div_phi_vi_v; // div(phi*vi*v)
//Float3* dev_ns_div_phi_tau; // div(phi*tau)
Float3* dev_ns_div_tau; // div(tau)
+ Float* dev_ns_div_tau_x; // div(tau) on x-face
+ Float* dev_ns_div_tau_y; // div(tau) on y-face
+ Float* dev_ns_div_tau_z; // div(tau) on z-face
Float3* dev_ns_f_pf; // Particle-fluid interaction force
//// Navier Stokes functions
(DIR) diff --git a/tests/stokes_law.py b/tests/stokes_law.py
t@@ -8,7 +8,7 @@ import matplotlib.pyplot as plt
print("### Stokes test - single sphere sedimentation ###")
## Stokes drag
-orig = sphere.sim(sid = "stokes_law_gradP", fluid = True)
+orig = sphere.sim(sid = "stokes_law_set2", fluid = True)
cleanup(orig)
orig.defaultParams()
orig.addParticle([0.5,0.5,1.46], 0.05)