tchanged to 3d thread layout - sphere - GPU-based 3D discrete element method algorithm with optional fluid coupling
(HTM) git clone git://src.adamsgaard.dk/sphere
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) LICENSE
---
(DIR) commit d456b79cf318820c11ea7fbf63ea807591a44cc4
(DIR) parent a1814d9713a6767e9c6e8ef614aad8219ef61dc9
(HTM) Author: Anders Damsgaard <adc@geo.au.dk>
Date: Wed, 1 May 2013 16:06:16 +0200
changed to 3d thread layout
Diffstat:
M src/latticeboltzmann.cuh | 752 ++++++++++++++++---------------
1 file changed, 381 insertions(+), 371 deletions(-)
---
(DIR) diff --git a/src/latticeboltzmann.cuh b/src/latticeboltzmann.cuh
t@@ -43,44 +43,49 @@ __global__ void initfluid(
Float4* dev_v_rho,
Float* dev_f)
{
- // 1D thread index
- const unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x;
-
- // Check if the thread is outside the cell grid
- if (idx >= devC_grid.num[0]*devC_grid.num[1]*devC_grid.num[2])
- return;
-
// 3D thread index
- const unsigned int z = idx % devC_grid.num[2];
- const unsigned int y = (idx / devC_grid.num[2]) % devC_grid.num[1];
- const unsigned int x = idx / (devC_grid.num[1] * devC_grid.num[2]);
-
- // Read velocity and density, zero velocity
- Float4 v_rho = dev_v_rho[idx];
- v_rho = MAKE_FLOAT4(0.0, 0.0, 0.0, v_rho.w);
-
- // Set values to equilibrium distribution (f_i = omega_i * rho_0)
- __syncthreads();
- dev_v_rho[idx] = v_rho;
- dev_f[grid2index(x,y,z,0)] = 1.0/3.0 * v_rho.w;
- dev_f[grid2index(x,y,z,1)] = 1.0/18.0 * v_rho.w;
- dev_f[grid2index(x,y,z,2)] = 1.0/18.0 * v_rho.w;
- dev_f[grid2index(x,y,z,3)] = 1.0/18.0 * v_rho.w;
- dev_f[grid2index(x,y,z,4)] = 1.0/18.0 * v_rho.w;
- dev_f[grid2index(x,y,z,5)] = 1.0/18.0 * v_rho.w;
- dev_f[grid2index(x,y,z,6)] = 1.0/18.0 * v_rho.w;
- dev_f[grid2index(x,y,z,7)] = 1.0/36.0 * v_rho.w;
- dev_f[grid2index(x,y,z,8)] = 1.0/36.0 * v_rho.w;
- dev_f[grid2index(x,y,z,9)] = 1.0/36.0 * v_rho.w;
- dev_f[grid2index(x,y,z,10)] = 1.0/36.0 * v_rho.w;
- dev_f[grid2index(x,y,z,11)] = 1.0/36.0 * v_rho.w;
- dev_f[grid2index(x,y,z,12)] = 1.0/36.0 * v_rho.w;
- dev_f[grid2index(x,y,z,13)] = 1.0/36.0 * v_rho.w;
- dev_f[grid2index(x,y,z,14)] = 1.0/36.0 * v_rho.w;
- dev_f[grid2index(x,y,z,15)] = 1.0/36.0 * v_rho.w;
- dev_f[grid2index(x,y,z,16)] = 1.0/36.0 * v_rho.w;
- dev_f[grid2index(x,y,z,17)] = 1.0/36.0 * v_rho.w;
- dev_f[grid2index(x,y,z,18)] = 1.0/36.0 * v_rho.w;
+ const unsigned int z = blockDim.x * blockIdx.x + threadIdx.x;
+ const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
+ const unsigned int x = blockDim.z * blockIdx.z + threadIdx.z;
+
+ // Grid dimensions
+ const unsigned int nx = devC_grid.num[0];
+ const unsigned int ny = devC_grid.num[1];
+ const unsigned int nz = devC_grid.num[2];
+
+ // Check that we are not outside the fluid grid
+ if (x < nx && y < ny && z < nz) {
+
+ // 1D thread index
+ const unsigned long int tidx = x + nx*y + nx*ny*z;
+
+ // Read velocity and density, zero velocity
+ Float4 v_rho = dev_v_rho[tidx];
+ v_rho = MAKE_FLOAT4(0.0, 0.0, 0.0, v_rho.w);
+
+ // Set values to equilibrium distribution (f_i = omega_i * rho_0)
+ __syncthreads();
+ dev_v_rho[tidx] = v_rho;
+ dev_f[grid2index(x,y,z,0)] = 1.0/3.0 * v_rho.w;
+ dev_f[grid2index(x,y,z,1)] = 1.0/18.0 * v_rho.w;
+ dev_f[grid2index(x,y,z,2)] = 1.0/18.0 * v_rho.w;
+ dev_f[grid2index(x,y,z,3)] = 1.0/18.0 * v_rho.w;
+ dev_f[grid2index(x,y,z,4)] = 1.0/18.0 * v_rho.w;
+ dev_f[grid2index(x,y,z,5)] = 1.0/18.0 * v_rho.w;
+ dev_f[grid2index(x,y,z,6)] = 1.0/18.0 * v_rho.w;
+ dev_f[grid2index(x,y,z,7)] = 1.0/36.0 * v_rho.w;
+ dev_f[grid2index(x,y,z,8)] = 1.0/36.0 * v_rho.w;
+ dev_f[grid2index(x,y,z,9)] = 1.0/36.0 * v_rho.w;
+ dev_f[grid2index(x,y,z,10)] = 1.0/36.0 * v_rho.w;
+ dev_f[grid2index(x,y,z,11)] = 1.0/36.0 * v_rho.w;
+ dev_f[grid2index(x,y,z,12)] = 1.0/36.0 * v_rho.w;
+ dev_f[grid2index(x,y,z,13)] = 1.0/36.0 * v_rho.w;
+ dev_f[grid2index(x,y,z,14)] = 1.0/36.0 * v_rho.w;
+ dev_f[grid2index(x,y,z,15)] = 1.0/36.0 * v_rho.w;
+ dev_f[grid2index(x,y,z,16)] = 1.0/36.0 * v_rho.w;
+ dev_f[grid2index(x,y,z,17)] = 1.0/36.0 * v_rho.w;
+ dev_f[grid2index(x,y,z,18)] = 1.0/36.0 * v_rho.w;
+ }
}
// Swap two arrays pointers
t@@ -104,343 +109,348 @@ __global__ void latticeBoltzmannD3Q19(
Float4* dev_force,
unsigned int* dev_gridParticleIndex)
{
- // 1D thread index
- const unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x;
-
- // Check if the thread is outside the cell grid
- if (idx >= devC_grid.num[0]*devC_grid.num[1]*devC_grid.num[2])
- return;
-
// 3D thread index
- unsigned int z = idx % devC_grid.num[2];
- unsigned int y = (idx / devC_grid.num[2]) % devC_grid.num[1];
- unsigned int x = idx / (devC_grid.num[1] * devC_grid.num[2]);
-
-
- // Load the fluid distribution into local registers
- __syncthreads();
- Float f_0 = dev_f[grid2index(x,y,z,0)];
- Float f_1 = dev_f[grid2index(x,y,z,1)];
- Float f_2 = dev_f[grid2index(x,y,z,2)];
- Float f_3 = dev_f[grid2index(x,y,z,3)];
- Float f_4 = dev_f[grid2index(x,y,z,4)];
- Float f_5 = dev_f[grid2index(x,y,z,5)];
- Float f_6 = dev_f[grid2index(x,y,z,6)];
- Float f_7 = dev_f[grid2index(x,y,z,7)];
- Float f_8 = dev_f[grid2index(x,y,z,8)];
- Float f_9 = dev_f[grid2index(x,y,z,9)];
- Float f_10 = dev_f[grid2index(x,y,z,10)];
- Float f_11 = dev_f[grid2index(x,y,z,11)];
- Float f_12 = dev_f[grid2index(x,y,z,12)];
- Float f_13 = dev_f[grid2index(x,y,z,13)];
- Float f_14 = dev_f[grid2index(x,y,z,14)];
- Float f_15 = dev_f[grid2index(x,y,z,15)];
- Float f_16 = dev_f[grid2index(x,y,z,16)];
- Float f_17 = dev_f[grid2index(x,y,z,17)];
- Float f_18 = dev_f[grid2index(x,y,z,18)];
-
- // Fluid constant (Wei et al. 2004), nu: kinematic viscosity [Pa*s]
- const Float tau = 0.5*(1.0 + 6.0*devC_params.nu);
-
- // Directional vectors to each lattice-velocity in D3Q19
- // Zero velocity: i = 0
- // Faces: i = 1..6
- // Edges: i = 7..18
- const Float3 e_0 = MAKE_FLOAT3( 0.0, 0.0, 0.0); // zero vel.
- const Float3 e_1 = MAKE_FLOAT3( 1.0, 0.0, 0.0); // face: +x
- const Float3 e_2 = MAKE_FLOAT3(-1.0, 0.0, 0.0); // face: -x
- const Float3 e_3 = MAKE_FLOAT3( 0.0, 1.0, 0.0); // face: +y
- const Float3 e_4 = MAKE_FLOAT3( 0.0,-1.0, 0.0); // face: -y
- const Float3 e_5 = MAKE_FLOAT3( 0.0, 0.0, 1.0); // face: +z
- const Float3 e_6 = MAKE_FLOAT3( 0.0, 0.0,-1.0); // face: -z
- const Float3 e_7 = MAKE_FLOAT3( 1.0, 1.0, 0.0); // edge: +x,+y
- const Float3 e_8 = MAKE_FLOAT3(-1.0,-1.0, 0.0); // edge: -x,-y
- const Float3 e_9 = MAKE_FLOAT3( 1.0,-1.0, 0.0); // edge: -x,+y
- const Float3 e_10 = MAKE_FLOAT3( 1.0,-1.0, 0.0); // edge: +x,-y
- const Float3 e_11 = MAKE_FLOAT3( 1.0, 0.0, 1.0); // edge: +x,+z
- const Float3 e_12 = MAKE_FLOAT3(-1.0, 0.0,-1.0); // edge: -x,-z
- const Float3 e_13 = MAKE_FLOAT3( 0.0, 1.0, 1.0); // edge: +y,+z
- const Float3 e_14 = MAKE_FLOAT3( 0.0,-1.0,-1.0); // edge: -y,-z
- const Float3 e_15 = MAKE_FLOAT3(-1.0, 0.0, 1.0); // edge: -x,+z
- const Float3 e_16 = MAKE_FLOAT3( 1.0, 0.0,-1.0); // edge: +x,-z
- const Float3 e_17 = MAKE_FLOAT3( 0.0,-1.0, 1.0); // edge: -y,+z
- const Float3 e_18 = MAKE_FLOAT3( 0.0, 1.0,-1.0); // edge: +y,-z
-
-
- //// Calculate the cell's macroproperties
-
- // Fluid density (rho = sum(f_i))
- const Float rho = f_0 + f_1 + f_2 + f_3 + f_4 + f_5 + f_6 + f_7 + f_8 + f_9
- + f_10 + f_11 + f_12 + f_13 + f_14 + f_15 + f_16 + f_17 + f_18;
-
- // Fluid velocity (v = sum(f_i/e_i)/rho)
- const Float3 v = (f_0/e_0 + f_1/e_1 + f_2/e_2 + f_3/e_3 + f_4/e_4 + f_5/e_5
- + f_6/e_6 + f_7/e_7 + f_8/e_8 + f_9/e_9 + f_10/e_10 + f_11/e_11
- + f_12/e_12 + f_13/e_13 + f_14/e_14 + f_15/e_15 + f_16/e_16
- + f_17/e_17 + f_18/e_18) / rho;
-
- //// Calculate the force transferred from the particles to the fluid
- Float3 f_particle;
- Float3 f_particles = MAKE_FLOAT3(0.0, 0.0, 0.0);
- Float4 x_particle4; // particle position + radius
- Float r_particle; // radius
- Float4 v_particle4; // particle velocity + fixvel
- Float3 v_particle; // particle velocity
-
- // Lowest particle index in cell
- unsigned int startIdx = dev_cellStart[idx];
- unsigned int orig_idx;
-
- // Make sure cell is not empty
- if (startIdx != 0xffffffff) {
-
- // Highest particle index in cell + 1
- unsigned int endIdx = dev_cellEnd[idx];
-
- // Iterate over cell particles
- for (unsigned int idx = startIdx; idx<endIdx; ++idx) {
-
- // Read particle radius and velocity
- __syncthreads();
- orig_idx = dev_gridParticleIndex[idx];
- x_particle4 = dev_x_sorted[idx];
- v_particle4 = dev_vel_sorted[idx];
-
- r_particle = x_particle4.w;
- v_particle = MAKE_FLOAT3(
- v_particle4.x,
- v_particle4.y,
- v_particle4.z);
-
- // Aerodynamic drag
- f_particle = (v - v_particle) * r_particle*r_particle;
-
- // Add the drag force to the sum of forces in the cell
- f_particles += f_particle;
-
- // The particle experiences the opposite drag force
- __syncthreads();
- dev_force[orig_idx] = MAKE_FLOAT4(
- -f_particle.x,
- -f_particle.y,
- -f_particle.z,
- 0.0);
+ const unsigned int z = blockDim.x * blockIdx.x + threadIdx.x;
+ const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
+ const unsigned int x = blockDim.z * blockIdx.z + threadIdx.z;
+
+ // Grid dimensions
+ const unsigned int nx = devC_grid.num[0];
+ const unsigned int ny = devC_grid.num[1];
+ const unsigned int nz = devC_grid.num[2];
+
+ // Check that we are not outside the fluid grid
+ if (x < nx && y < ny && z < nz) {
+
+ //printf("(x,y,x) = (%d,%d,%d), tidx = %d\n", x, y, z, tidx);
+
+
+ // Load the fluid distribution into local registers
+ __syncthreads();
+ Float f_0 = dev_f[grid2index(x,y,z,0)];
+ Float f_1 = dev_f[grid2index(x,y,z,1)];
+ Float f_2 = dev_f[grid2index(x,y,z,2)];
+ Float f_3 = dev_f[grid2index(x,y,z,3)];
+ Float f_4 = dev_f[grid2index(x,y,z,4)];
+ Float f_5 = dev_f[grid2index(x,y,z,5)];
+ Float f_6 = dev_f[grid2index(x,y,z,6)];
+ Float f_7 = dev_f[grid2index(x,y,z,7)];
+ Float f_8 = dev_f[grid2index(x,y,z,8)];
+ Float f_9 = dev_f[grid2index(x,y,z,9)];
+ Float f_10 = dev_f[grid2index(x,y,z,10)];
+ Float f_11 = dev_f[grid2index(x,y,z,11)];
+ Float f_12 = dev_f[grid2index(x,y,z,12)];
+ Float f_13 = dev_f[grid2index(x,y,z,13)];
+ Float f_14 = dev_f[grid2index(x,y,z,14)];
+ Float f_15 = dev_f[grid2index(x,y,z,15)];
+ Float f_16 = dev_f[grid2index(x,y,z,16)];
+ Float f_17 = dev_f[grid2index(x,y,z,17)];
+ Float f_18 = dev_f[grid2index(x,y,z,18)];
+
+ // Fluid constant (Wei et al. 2004), nu: kinematic viscosity [Pa*s]
+ const Float tau = 0.5*(1.0 + 6.0*devC_params.nu);
+
+ // Directional vectors to each lattice-velocity in D3Q19
+ // Zero velocity: i = 0
+ // Faces: i = 1..6
+ // Edges: i = 7..18
+ const Float3 e_0 = MAKE_FLOAT3( 0.0, 0.0, 0.0); // zero vel.
+ const Float3 e_1 = MAKE_FLOAT3( 1.0, 0.0, 0.0); // face: +x
+ const Float3 e_2 = MAKE_FLOAT3(-1.0, 0.0, 0.0); // face: -x
+ const Float3 e_3 = MAKE_FLOAT3( 0.0, 1.0, 0.0); // face: +y
+ const Float3 e_4 = MAKE_FLOAT3( 0.0,-1.0, 0.0); // face: -y
+ const Float3 e_5 = MAKE_FLOAT3( 0.0, 0.0, 1.0); // face: +z
+ const Float3 e_6 = MAKE_FLOAT3( 0.0, 0.0,-1.0); // face: -z
+ const Float3 e_7 = MAKE_FLOAT3( 1.0, 1.0, 0.0); // edge: +x,+y
+ const Float3 e_8 = MAKE_FLOAT3(-1.0,-1.0, 0.0); // edge: -x,-y
+ const Float3 e_9 = MAKE_FLOAT3( 1.0,-1.0, 0.0); // edge: -x,+y
+ const Float3 e_10 = MAKE_FLOAT3( 1.0,-1.0, 0.0); // edge: +x,-y
+ const Float3 e_11 = MAKE_FLOAT3( 1.0, 0.0, 1.0); // edge: +x,+z
+ const Float3 e_12 = MAKE_FLOAT3(-1.0, 0.0,-1.0); // edge: -x,-z
+ const Float3 e_13 = MAKE_FLOAT3( 0.0, 1.0, 1.0); // edge: +y,+z
+ const Float3 e_14 = MAKE_FLOAT3( 0.0,-1.0,-1.0); // edge: -y,-z
+ const Float3 e_15 = MAKE_FLOAT3(-1.0, 0.0, 1.0); // edge: -x,+z
+ const Float3 e_16 = MAKE_FLOAT3( 1.0, 0.0,-1.0); // edge: +x,-z
+ const Float3 e_17 = MAKE_FLOAT3( 0.0,-1.0, 1.0); // edge: -y,+z
+ const Float3 e_18 = MAKE_FLOAT3( 0.0, 1.0,-1.0); // edge: +y,-z
+
+
+ //// Calculate the cell's macroproperties
+
+ // Fluid density (rho = sum(f_i))
+ const Float rho = f_0 + f_1 + f_2 + f_3 + f_4 + f_5 + f_6 + f_7 + f_8 +
+ f_9 + f_10 + f_11 + f_12 + f_13 + f_14 + f_15 + f_16 + f_17 + f_18;
+
+ // Fluid velocity (v = sum(f_i/e_i)/rho)
+ const Float3 v = (f_0/e_0 + f_1/e_1 + f_2/e_2 + f_3/e_3 + f_4/e_4 +
+ f_5/e_5 + f_6/e_6 + f_7/e_7 + f_8/e_8 + f_9/e_9 + f_10/e_10 +
+ f_11/e_11 + f_12/e_12 + f_13/e_13 + f_14/e_14 + f_15/e_15 +
+ f_16/e_16 + f_17/e_17 + f_18/e_18) / rho;
+
+ //// Calculate the force transferred from the particles to the fluid
+ Float3 f_particle;
+ Float3 f_particles = MAKE_FLOAT3(0.0, 0.0, 0.0);
+ Float4 x_particle4; // particle position + radius
+ Float r_particle; // radius
+ Float4 v_particle4; // particle velocity + fixvel
+ Float3 v_particle; // particle velocity
+
+ // 1D thread index
+ const unsigned int tidx = x + nx*y + nx*ny*z;
+
+ // Lowest particle index in cell
+ unsigned int startIdx = dev_cellStart[tidx];
+ //unsigned int orig_idx;
+
+ // Make sure cell is not empty
+ if (startIdx != 0xffffffff) {
+
+ // Highest particle index in cell + 1
+ unsigned int endIdx = dev_cellEnd[tidx];
+
+
+ // Iterate over cell particles
+ for (unsigned int idx = startIdx; idx<endIdx; ++idx) {
+
+ // Read particle radius and velocity
+ __syncthreads();
+ x_particle4 = dev_x_sorted[idx];
+ v_particle4 = dev_vel_sorted[idx];
+
+ r_particle = x_particle4.w;
+ v_particle = MAKE_FLOAT3(
+ v_particle4.x,
+ v_particle4.y,
+ v_particle4.z);
+
+ // Aerodynamic drag
+ f_particle = (v - v_particle) * r_particle*r_particle;
+
+ // Add the drag force to the sum of forces in the cell
+ f_particles += f_particle;
+
+ // The particle experiences the opposite drag force
+ // Causes "unspecified launch failure"
+ //orig_idx = dev_gridParticleIndex[idx];
+ //dev_force[orig_idx] = MAKE_FLOAT4(
+ //-f_particle.x,
+ //-f_particle.y,
+ //-f_particle.z,
+ //0.0);
+ }
}
- }
-
- // Scale the particle force
- // 100: experimental value, depends on the grid size compared to the
- // particle size and the time step size
- f_particles *= 100.0 * rho * 6.0;
-
- // Gravitational force (F = g * m)
- const Float3 f_gravity = MAKE_FLOAT3(
- devC_params.g[0],
- devC_params.g[1],
- devC_params.g[2])
- * (devC_grid.L[0]/devC_grid.num[0])
- * (devC_grid.L[1]/devC_grid.num[1])
- * (devC_grid.L[2]/devC_grid.num[2]) * rho;
-
- // The final external force
- const Float3 f_ext = f_particles + f_gravity;
-
- //// Collide fluid
- // Weights corresponding to each e_i lattice-velocity in D3Q19, sum to 1.0
- f_0 -= bgk(f_0, tau, v, rho, e_0, 1.0/3.0, f_ext);
- f_1 -= bgk(f_1, tau, v, rho, e_1, 1.0/18.0, f_ext);
- f_2 -= bgk(f_2, tau, v, rho, e_2, 1.0/18.0, f_ext);
- f_3 -= bgk(f_3, tau, v, rho, e_3, 1.0/18.0, f_ext);
- f_4 -= bgk(f_4, tau, v, rho, e_4, 1.0/18.0, f_ext);
- f_5 -= bgk(f_5, tau, v, rho, e_5, 1.0/18.0, f_ext);
- f_6 -= bgk(f_6, tau, v, rho, e_6, 1.0/18.0, f_ext);
- f_7 -= bgk(f_7, tau, v, rho, e_7, 1.0/36.0, f_ext);
- f_8 -= bgk(f_8, tau, v, rho, e_8, 1.0/36.0, f_ext);
- f_9 -= bgk(f_9, tau, v, rho, e_9, 1.0/36.0, f_ext);
- f_10 -= bgk(f_10, tau, v, rho, e_10, 1.0/36.0, f_ext);
- f_11 -= bgk(f_11, tau, v, rho, e_11, 1.0/36.0, f_ext);
- f_12 -= bgk(f_12, tau, v, rho, e_12, 1.0/36.0, f_ext);
- f_13 -= bgk(f_13, tau, v, rho, e_13, 1.0/36.0, f_ext);
- f_14 -= bgk(f_14, tau, v, rho, e_14, 1.0/36.0, f_ext);
- f_15 -= bgk(f_15, tau, v, rho, e_15, 1.0/36.0, f_ext);
- f_16 -= bgk(f_16, tau, v, rho, e_16, 1.0/36.0, f_ext);
- f_17 -= bgk(f_17, tau, v, rho, e_17, 1.0/36.0, f_ext);
- f_18 -= bgk(f_18, tau, v, rho, e_18, 1.0/36.0, f_ext);
-
-
- //// Stream fluid
- // Lower and upper boundaries: bounceback, sides: periodic
- unsigned int nx = devC_grid.num[0];
- unsigned int ny = devC_grid.num[1];
- unsigned int nz = devC_grid.num[2];
-
- // There may be a write conflict due to bounce backs
- __syncthreads();
-
- // Face 0
- dev_f_new[grid2index(x,y,z,0)] = fmax(0.0, f_0);
-
- // Face 1 (+x): Periodic
- if (x < nx-1) // not at boundary
- dev_f_new[grid2index( x+1, y, z, 1)] = fmax(0.0, f_1);
- else // at boundary
- dev_f_new[grid2index( 0, y, z, 1)] = fmax(0.0, f_1);
-
- // Face 2 (-x): Periodic
- if (x > 0) // not at boundary
- dev_f_new[grid2index( x-1, y, z, 2)] = fmax(0.0, f_2);
- else // at boundary
- dev_f_new[grid2index(nx-1, y, z, 2)] = fmax(0.0, f_2);
-
- // Face 3 (+y): Periodic
- if (y < ny-1) // not at boundary
- dev_f_new[grid2index( x, y+1, z, 3)] = fmax(0.0, f_3);
- else // at boundary
- dev_f_new[grid2index( x, 0, z, 3)] = fmax(0.0, f_3);
-
- // Face 4 (-y): Periodic
- if (y > 0) // not at boundary
- dev_f_new[grid2index( x, y-1, z, 4)] = fmax(0.0, f_4);
- else // at boundary
- dev_f_new[grid2index( x,ny-1, z, 4)] = fmax(0.0, f_4);
-
- // Face 5 (+z): Bounce back, free slip
- if (z < nz-1) // not at boundary
- dev_f_new[grid2index( x, y, z+1, 5)] = fmax(0.0, f_5);
- else // at boundary
- dev_f_new[grid2index( x, y, z, 6)] = fmax(0.0, f_5);
-
- // Face 6 (-z): Bounce back, free slip
- if (z > 0) // not at boundary
- dev_f_new[grid2index( x, y, z-1, 6)] = fmax(0.0, f_6);
- else // at boundary
- dev_f_new[grid2index( x, y, z, 5)] = fmax(0.0, f_6);
-
- // Edge 7 (+x,+y): Periodic
- if (x < nx-1 && y < ny-1) // not at boundary
- dev_f_new[grid2index( x+1, y+1, z, 7)] = fmax(0.0, f_7);
- else if (x < nx-1) // at +y boundary
- dev_f_new[grid2index( x+1, 0, z, 7)] = fmax(0.0, f_7);
- else if (y < ny-1) // at +x boundary
- dev_f_new[grid2index( 0, y+1, z, 7)] = fmax(0.0, f_7);
- else // at +x+y boundary
- dev_f_new[grid2index( 0, 0, z, 7)] = fmax(0.0, f_7);
-
- // Edge 8 (-x,-y): Periodic
- if (x > 0 && y > 0) // not at boundary
- dev_f_new[grid2index( x-1, y-1, z, 8)] = fmax(0.0, f_8);
- else if (x > 0) // at -y boundary
- dev_f_new[grid2index( x-1,ny-1, z, 8)] = fmax(0.0, f_8);
- else if (y > 0) // at -x boundary
- dev_f_new[grid2index(nx-1, y-1, z, 8)] = fmax(0.0, f_8);
- else // at -x-y boundary
- dev_f_new[grid2index(nx-1,ny-1, z, 8)] = fmax(0.0, f_8);
-
- // Edge 9 (-x,+y): Periodic
- if (x > 0 && y < ny-1) // not at boundary
- dev_f_new[grid2index( x-1, y+1, z, 9)] = fmax(0.0, f_9);
- else if (x > 0) // at +y boundary
- dev_f_new[grid2index( x-1, 0, z, 9)] = fmax(0.0, f_9);
- else if (y < ny-1) // at -x boundary
- dev_f_new[grid2index(nx-1, y+1, z, 9)] = fmax(0.0, f_9);
- else // at -x+y boundary
- dev_f_new[grid2index(nx-1, 0, z, 9)] = fmax(0.0, f_9);
-
- // Edge 10 (+x,-y): Periodic
- if (x < nx-1 && y > 0) // not at boundary
- dev_f_new[grid2index( x+1, y-1, z, 10)] = fmax(0.0, f_10);
- else if (x < nx-1) // at -y boundary
- dev_f_new[grid2index( x+1,ny-1, z, 10)] = fmax(0.0, f_10);
- else if (y > 0) // at +x boundary
- dev_f_new[grid2index( 0, y-1, z, 10)] = fmax(0.0, f_10);
- else // at +x-y boundary
- dev_f_new[grid2index( 0,ny-1, z, 10)] = fmax(0.0, f_10);
-
- // Edge 11 (+x,+z): Periodic & bounce-back (free slip)
- if (x < nx-1 && z < nz-1) // not at boundary
- dev_f_new[grid2index( x+1, y, z+1, 11)] = fmax(0.0, f_11);
- else if (x < nx-1) // at +z boundary
- dev_f_new[grid2index( x+1, y, 0, 12)] = fmax(0.0, f_11);
- else if (z < nz-1) // at +x boundary
- dev_f_new[grid2index( 0, y, z+1, 11)] = fmax(0.0, f_11);
- else // at +x+z boundary
- dev_f_new[grid2index( 0, y, 0, 12)] = fmax(0.0, f_11);
-
- // Edge 12 (-x,-z): Periodic & bounce back (free slip)
- if (x > 0 && z > 0) // not at boundary
- dev_f_new[grid2index( x-1, y, z-1, 12)] = fmax(0.0, f_12);
- else if (x > 0) // at -z boundary
- dev_f_new[grid2index( x-1, y,nz-1, 11)] = fmax(0.0, f_12);
- else if (z > 0) // at -x boundary
- dev_f_new[grid2index(nx-1, y, z-1, 12)] = fmax(0.0, f_12);
- else // at -x-z boundary
- dev_f_new[grid2index(nx-1, y,nz-1, 11)] = fmax(0.0, f_12);
-
- // Edge 13 (+y,+z): Periodic & bounce-back (free slip)
- if (y < ny-1 && z < nz-1) // not at boundary
- dev_f_new[grid2index( x, y+1, z+1, 13)] = fmax(0.0, f_13);
- else if (y < ny-1) // at +z boundary
- dev_f_new[grid2index( x, y+1, 0, 14)] = fmax(0.0, f_13);
- else if (z < nz-1) // at +y boundary
- dev_f_new[grid2index( x, 0, z+1, 13)] = fmax(0.0, f_13);
- else // at +y+z boundary
- dev_f_new[grid2index( x, 0, 0, 14)] = fmax(0.0, f_13);
-
- // Edge 14 (-y,-z): Periodic & bounce-back (free slip)
- if (y > 0 && z > 0) // not at boundary
- dev_f_new[grid2index( x, y-1, z-1, 14)] = fmax(0.0, f_14);
- else if (y > 0) // at -z boundary
- dev_f_new[grid2index( x, y-1,nz-1, 13)] = fmax(0.0, f_14);
- else if (z > 0) // at -y boundary
- dev_f_new[grid2index( x,ny-1, z-1, 14)] = fmax(0.0, f_14);
- else // at -y-z boundary
- dev_f_new[grid2index( x,ny-1,nz-1, 13)] = fmax(0.0, f_14);
-
- // Edge 15 (-x,+z): Periodic & bounce-back (free slip)
- if (x > 0 && z < nz-1) // not at boundary
- dev_f_new[grid2index( x-1, y, z+1, 15)] = fmax(0.0, f_15);
- else if (x > 0) // at +z boundary
- dev_f_new[grid2index( x-1, y, 0, 16)] = fmax(0.0, f_15);
- else if (z < nz-1) // at -x boundary
- dev_f_new[grid2index(nx-1, y, z+1, 15)] = fmax(0.0, f_15);
- else // at -x+z boundary
- dev_f_new[grid2index(nx-1, y, 0, 16)] = fmax(0.0, f_15);
-
- // Edge 16 (+x,-z): Periodic & bounce-back (free slip)
- if (x < nx-1 && z > 0) // not at boundary
- dev_f_new[grid2index( x+1, y, z-1, 16)] = fmax(0.0, f_16);
- else if (x < nx-1) // at -z boundary
- dev_f_new[grid2index( x+1, y,nz-1, 15)] = fmax(0.0, f_16);
- else if (z > 0) // at +x boundary
- dev_f_new[grid2index( 0, y, z-1, 16)] = fmax(0.0, f_16);
- else // at +x-z boundary
- dev_f_new[grid2index( 0, y,nz-1, 15)] = fmax(0.0, f_16);
-
- // Edge 17 (-y,+z): Periodic & bounce-back (free slip)
- if (y > 0 && z < nz-1) // not at boundary
- dev_f_new[grid2index( x, y-1, z+1, 17)] = fmax(0.0, f_17);
- else if (y > 0) // at +z boundary
- dev_f_new[grid2index( x, y-1, 0, 18)] = fmax(0.0, f_17);
- else if (z < nz-1) // at -y boundary
- dev_f_new[grid2index( x,ny-1, z+1, 17)] = fmax(0.0, f_17);
- else // at -y+z boundary
- dev_f_new[grid2index( x,ny-1, 0, 18)] = fmax(0.0, f_17);
-
- // Edge 18 (+y,-z): Periodic & bounce-back (free slip)
- if (y < ny-1 && z > 0) // not at boundary
- dev_f_new[grid2index( x, y+1, z-1, 18)] = fmax(0.0, f_18);
- else if (y < ny-1) // at -z boundary
- dev_f_new[grid2index( x, y+1, 0, 17)] = fmax(0.0, f_18);
- else if (z > 0) // at +y boundary
- dev_f_new[grid2index( x, 0, z+1, 18)] = fmax(0.0, f_18);
- else // at +y-z boundary
- dev_f_new[grid2index( x, 0, 0, 17)] = fmax(0.0, f_18);
-
-
- // Write fluid velocity and density to global memory
- __syncthreads();
- dev_v_rho[idx] = MAKE_FLOAT4(v.x, v.y, v.z, rho);
+ // Scale the particle force
+ // 100: experimental value, depends on the grid size compared to the
+ // particle size and the time step size
+ f_particles *= 100.0 * rho * 6.0;
+
+ // Gravitational force (F = g * m)
+ const Float3 f_gravity = MAKE_FLOAT3(
+ devC_params.g[0],
+ devC_params.g[1],
+ devC_params.g[2])
+ * (devC_grid.L[0]/devC_grid.num[0])
+ * (devC_grid.L[1]/devC_grid.num[1])
+ * (devC_grid.L[2]/devC_grid.num[2]) * rho;
+
+ // The final external force
+ const Float3 f_ext = f_particles + f_gravity;
+
+ //// Collide fluid
+ // Weights corresponding to each e_i lattice-velocity in D3Q19, sum to 1.0
+ f_0 -= bgk(f_0, tau, v, rho, e_0, 1.0/3.0, f_ext);
+ f_1 -= bgk(f_1, tau, v, rho, e_1, 1.0/18.0, f_ext);
+ f_2 -= bgk(f_2, tau, v, rho, e_2, 1.0/18.0, f_ext);
+ f_3 -= bgk(f_3, tau, v, rho, e_3, 1.0/18.0, f_ext);
+ f_4 -= bgk(f_4, tau, v, rho, e_4, 1.0/18.0, f_ext);
+ f_5 -= bgk(f_5, tau, v, rho, e_5, 1.0/18.0, f_ext);
+ f_6 -= bgk(f_6, tau, v, rho, e_6, 1.0/18.0, f_ext);
+ f_7 -= bgk(f_7, tau, v, rho, e_7, 1.0/36.0, f_ext);
+ f_8 -= bgk(f_8, tau, v, rho, e_8, 1.0/36.0, f_ext);
+ f_9 -= bgk(f_9, tau, v, rho, e_9, 1.0/36.0, f_ext);
+ f_10 -= bgk(f_10, tau, v, rho, e_10, 1.0/36.0, f_ext);
+ f_11 -= bgk(f_11, tau, v, rho, e_11, 1.0/36.0, f_ext);
+ f_12 -= bgk(f_12, tau, v, rho, e_12, 1.0/36.0, f_ext);
+ f_13 -= bgk(f_13, tau, v, rho, e_13, 1.0/36.0, f_ext);
+ f_14 -= bgk(f_14, tau, v, rho, e_14, 1.0/36.0, f_ext);
+ f_15 -= bgk(f_15, tau, v, rho, e_15, 1.0/36.0, f_ext);
+ f_16 -= bgk(f_16, tau, v, rho, e_16, 1.0/36.0, f_ext);
+ f_17 -= bgk(f_17, tau, v, rho, e_17, 1.0/36.0, f_ext);
+ f_18 -= bgk(f_18, tau, v, rho, e_18, 1.0/36.0, f_ext);
+
+
+ //// Stream fluid
+ // Lower and upper boundaries: bounceback, sides: periodic
+
+
+ // There may be a write conflict due to bounce backs
+ __syncthreads();
+
+ // Write fluid velocity and density to global memory
+ dev_v_rho[tidx] = MAKE_FLOAT4(v.x, v.y, v.z, rho);
+
+ // Face 0
+ dev_f_new[grid2index(x,y,z,0)] = fmax(0.0, f_0);
+
+ // Face 1 (+x): Periodic
+ if (x < nx-1) // not at boundary
+ dev_f_new[grid2index( x+1, y, z, 1)] = fmax(0.0, f_1);
+ else // at boundary
+ dev_f_new[grid2index( 0, y, z, 1)] = fmax(0.0, f_1);
+
+ // Face 2 (-x): Periodic
+ if (x > 0) // not at boundary
+ dev_f_new[grid2index( x-1, y, z, 2)] = fmax(0.0, f_2);
+ else // at boundary
+ dev_f_new[grid2index(nx-1, y, z, 2)] = fmax(0.0, f_2);
+
+ // Face 3 (+y): Periodic
+ if (y < ny-1) // not at boundary
+ dev_f_new[grid2index( x, y+1, z, 3)] = fmax(0.0, f_3);
+ else // at boundary
+ dev_f_new[grid2index( x, 0, z, 3)] = fmax(0.0, f_3);
+
+ // Face 4 (-y): Periodic
+ if (y > 0) // not at boundary
+ dev_f_new[grid2index( x, y-1, z, 4)] = fmax(0.0, f_4);
+ else // at boundary
+ dev_f_new[grid2index( x,ny-1, z, 4)] = fmax(0.0, f_4);
+
+ // Face 5 (+z): Bounce back, free slip
+ if (z < nz-1) // not at boundary
+ dev_f_new[grid2index( x, y, z+1, 5)] = fmax(0.0, f_5);
+ else // at boundary
+ dev_f_new[grid2index( x, y, z, 6)] = fmax(0.0, f_5);
+
+ // Face 6 (-z): Bounce back, free slip
+ if (z > 0) // not at boundary
+ dev_f_new[grid2index( x, y, z-1, 6)] = fmax(0.0, f_6);
+ else // at boundary
+ dev_f_new[grid2index( x, y, z, 5)] = fmax(0.0, f_6);
+
+ // Edge 7 (+x,+y): Periodic
+ if (x < nx-1 && y < ny-1) // not at boundary
+ dev_f_new[grid2index( x+1, y+1, z, 7)] = fmax(0.0, f_7);
+ else if (x < nx-1) // at +y boundary
+ dev_f_new[grid2index( x+1, 0, z, 7)] = fmax(0.0, f_7);
+ else if (y < ny-1) // at +x boundary
+ dev_f_new[grid2index( 0, y+1, z, 7)] = fmax(0.0, f_7);
+ else // at +x+y boundary
+ dev_f_new[grid2index( 0, 0, z, 7)] = fmax(0.0, f_7);
+
+ // Edge 8 (-x,-y): Periodic
+ if (x > 0 && y > 0) // not at boundary
+ dev_f_new[grid2index( x-1, y-1, z, 8)] = fmax(0.0, f_8);
+ else if (x > 0) // at -y boundary
+ dev_f_new[grid2index( x-1,ny-1, z, 8)] = fmax(0.0, f_8);
+ else if (y > 0) // at -x boundary
+ dev_f_new[grid2index(nx-1, y-1, z, 8)] = fmax(0.0, f_8);
+ else // at -x-y boundary
+ dev_f_new[grid2index(nx-1,ny-1, z, 8)] = fmax(0.0, f_8);
+
+ // Edge 9 (-x,+y): Periodic
+ if (x > 0 && y < ny-1) // not at boundary
+ dev_f_new[grid2index( x-1, y+1, z, 9)] = fmax(0.0, f_9);
+ else if (x > 0) // at +y boundary
+ dev_f_new[grid2index( x-1, 0, z, 9)] = fmax(0.0, f_9);
+ else if (y < ny-1) // at -x boundary
+ dev_f_new[grid2index(nx-1, y+1, z, 9)] = fmax(0.0, f_9);
+ else // at -x+y boundary
+ dev_f_new[grid2index(nx-1, 0, z, 9)] = fmax(0.0, f_9);
+
+ // Edge 10 (+x,-y): Periodic
+ /*if (x < nx-1 && y > 0) // not at boundary
+ dev_f_new[grid2index( x+1, y-1, z, 10)] = fmax(0.0, f_10);
+ else if (x < nx-1) // at -y boundary
+ dev_f_new[grid2index( x+1,ny-1, z, 10)] = fmax(0.0, f_10);
+ else if (y > 0) // at +x boundary
+ dev_f_new[grid2index( 0, y-1, z, 10)] = fmax(0.0, f_10);
+ else // at +x-y boundary
+ dev_f_new[grid2index( 0,ny-1, z, 10)] = fmax(0.0, f_10);
+
+ // Edge 11 (+x,+z): Periodic & bounce-back (free slip)
+ if (x < nx-1 && z < nz-1) // not at boundary
+ dev_f_new[grid2index( x+1, y, z+1, 11)] = fmax(0.0, f_11);
+ else if (x < nx-1) // at +z boundary
+ dev_f_new[grid2index( x+1, y, 0, 12)] = fmax(0.0, f_11);
+ else if (z < nz-1) // at +x boundary
+ dev_f_new[grid2index( 0, y, z+1, 11)] = fmax(0.0, f_11);
+ else // at +x+z boundary
+ dev_f_new[grid2index( 0, y, 0, 12)] = fmax(0.0, f_11);
+
+ // Edge 12 (-x,-z): Periodic & bounce back (free slip)
+ if (x > 0 && z > 0) // not at boundary
+ dev_f_new[grid2index( x-1, y, z-1, 12)] = fmax(0.0, f_12);
+ else if (x > 0) // at -z boundary
+ dev_f_new[grid2index( x-1, y,nz-1, 11)] = fmax(0.0, f_12);
+ else if (z > 0) // at -x boundary
+ dev_f_new[grid2index(nx-1, y, z-1, 12)] = fmax(0.0, f_12);
+ else // at -x-z boundary
+ dev_f_new[grid2index(nx-1, y,nz-1, 11)] = fmax(0.0, f_12);
+
+ // Edge 13 (+y,+z): Periodic & bounce-back (free slip)
+ if (y < ny-1 && z < nz-1) // not at boundary
+ dev_f_new[grid2index( x, y+1, z+1, 13)] = fmax(0.0, f_13);
+ else if (y < ny-1) // at +z boundary
+ dev_f_new[grid2index( x, y+1, 0, 14)] = fmax(0.0, f_13);
+ else if (z < nz-1) // at +y boundary
+ dev_f_new[grid2index( x, 0, z+1, 13)] = fmax(0.0, f_13);
+ else // at +y+z boundary
+ dev_f_new[grid2index( x, 0, 0, 14)] = fmax(0.0, f_13);
+
+ // Edge 14 (-y,-z): Periodic & bounce-back (free slip)
+ if (y > 0 && z > 0) // not at boundary
+ dev_f_new[grid2index( x, y-1, z-1, 14)] = fmax(0.0, f_14);
+ else if (y > 0) // at -z boundary
+ dev_f_new[grid2index( x, y-1,nz-1, 13)] = fmax(0.0, f_14);
+ else if (z > 0) // at -y boundary
+ dev_f_new[grid2index( x,ny-1, z-1, 14)] = fmax(0.0, f_14);
+ else // at -y-z boundary
+ dev_f_new[grid2index( x,ny-1,nz-1, 13)] = fmax(0.0, f_14);
+
+ // Edge 15 (-x,+z): Periodic & bounce-back (free slip)
+ if (x > 0 && z < nz-1) // not at boundary
+ dev_f_new[grid2index( x-1, y, z+1, 15)] = fmax(0.0, f_15);
+ else if (x > 0) // at +z boundary
+ dev_f_new[grid2index( x-1, y, 0, 16)] = fmax(0.0, f_15);
+ else if (z < nz-1) // at -x boundary
+ dev_f_new[grid2index(nx-1, y, z+1, 15)] = fmax(0.0, f_15);
+ else // at -x+z boundary
+ dev_f_new[grid2index(nx-1, y, 0, 16)] = fmax(0.0, f_15);
+
+ // Edge 16 (+x,-z): Periodic & bounce-back (free slip)
+ if (x < nx-1 && z > 0) // not at boundary
+ dev_f_new[grid2index( x+1, y, z-1, 16)] = fmax(0.0, f_16);
+ else if (x < nx-1) // at -z boundary
+ dev_f_new[grid2index( x+1, y,nz-1, 15)] = fmax(0.0, f_16);
+ else if (z > 0) // at +x boundary
+ dev_f_new[grid2index( 0, y, z-1, 16)] = fmax(0.0, f_16);
+ else // at +x-z boundary
+ dev_f_new[grid2index( 0, y,nz-1, 15)] = fmax(0.0, f_16);
+
+ // Edge 17 (-y,+z): Periodic & bounce-back (free slip)
+ if (y > 0 && z < nz-1) // not at boundary
+ dev_f_new[grid2index( x, y-1, z+1, 17)] = fmax(0.0, f_17);
+ else if (y > 0) // at +z boundary
+ dev_f_new[grid2index( x, y-1, 0, 18)] = fmax(0.0, f_17);
+ else if (z < nz-1) // at -y boundary
+ dev_f_new[grid2index( x,ny-1, z+1, 17)] = fmax(0.0, f_17);
+ else // at -y+z boundary
+ dev_f_new[grid2index( x,ny-1, 0, 18)] = fmax(0.0, f_17);
+
+ // Edge 18 (+y,-z): Periodic & bounce-back (free slip)
+ if (y < ny-1 && z > 0) // not at boundary
+ dev_f_new[grid2index( x, y+1, z-1, 18)] = fmax(0.0, f_18);
+ else if (y < ny-1) // at -z boundary
+ dev_f_new[grid2index( x, y+1, 0, 17)] = fmax(0.0, f_18);
+ else if (z > 0) // at +y boundary
+ dev_f_new[grid2index( x, 0, z+1, 18)] = fmax(0.0, f_18);
+ else // at +y-z boundary
+ dev_f_new[grid2index( x, 0, 0, 17)] = fmax(0.0, f_18);
+
+ // */
+ }
}
#endif