tMost constant device parameters are moved to structures devC_params and devC_grid - sphere - GPU-based 3D discrete element method algorithm with optional fluid coupling
(HTM) git clone git://src.adamsgaard.dk/sphere
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) LICENSE
---
(DIR) commit 97645c4855a4b023c0ad2cb8146889ebd70cc7a9
(DIR) parent f0a223e72931062cba1c3008097e3e4d6b070d64
(HTM) Author: Anders Damsgaard <adc@geo.au.dk>
Date: Mon, 22 Oct 2012 10:10:46 +0200
Most constant device parameters are moved to structures devC_params and devC_grid
Diffstat:
M src/cohesion.cuh | 6 +++---
M src/constants.cuh | 32 ++++---------------------------
M src/contactmodels.cuh | 98 ++++++++++++++++----------------
M src/contactsearch.cuh | 100 ++++++++++++++++----------------
M src/datatypes.cuh | 5 +++++
M src/datatypes.h | 13 ++++++-------
M src/device.cu | 58 +++++++++++++++++--------------
M src/integration.cuh | 32 ++++++++++++++++----------------
M src/main.cpp | 19 +++++++++----------
M src/sorting.cuh | 16 ++++++++--------
10 files changed, 181 insertions(+), 198 deletions(-)
---
(DIR) diff --git a/src/cohesion.cuh b/src/cohesion.cuh
t@@ -62,11 +62,11 @@ __device__ void bondLinear(Float3* N, Float3* T, Float* es_dot, Float* p,
Float R_bar = (radius_a + radius_b)/2.0f;
// Normal force component: Elastic
- f_n = devC_k_n * delta_ab * n_ab;
+ f_n = devC_params.k_n * delta_ab * n_ab;
if (length(vel_t_ab) > 0.f) {
// Shear force component: Viscous
- f_t = -1.0f * devC_gamma_t * vel_t_ab;
+ f_t = -1.0f * devC_params.gamma_t * vel_t_ab;
// Shear friction production rate [W]
//*es_dot += -dot(vel_t_ab, f_t);
t@@ -112,7 +112,7 @@ __device__ void capillaryCohesion_exp(Float3* N, Float radius_a,
R_geo = sqrtf(radius_a * radius_b);
// The exponential falloff of the capillary force with distance
- lambda = 0.9f * h * sqrtf(devC_V_b/R_har);
+ lambda = 0.9f * h * sqrtf(devC_params.V_b/R_har);
// Calculate cohesional force
f_c = -kappa * R_geo * expf(-delta_ab/lambda) * n_ab;
(DIR) diff --git a/src/constants.cuh b/src/constants.cuh
t@@ -1,34 +1,10 @@
#ifndef CONSTANTS_CUH_
#define CONSTANTS_CUH_
+// Most constant memory variables are stored in
+// structures, see datatypes.cuh
+
// Constant memory size: 64 kb
-__constant__ Float devC_origo[ND]; // World coordinate system origo
-__constant__ Float devC_L[ND]; // World length in dimension ND
-__constant__ unsigned int devC_num[ND]; // Number of cells in dimension ND
-__constant__ Float devC_dt; // Time step length
-__constant__ int devC_global; // Parameter properties, 1: global, 0: individual
-__constant__ Float devC_g[ND]; // Gravitational acceleration vector
-__constant__ unsigned int devC_np; // Number of particles
__constant__ int devC_nc; // Max. number of contacts a particle can have
-__constant__ unsigned int devC_shearmodel; // Shear force model: 1: viscous, frictional, 2: elastic, frictional
-__constant__ Float devC_k_n; // Material normal stiffness
-__constant__ Float devC_k_t; // Material tangential stiffness
-__constant__ Float devC_k_r; // Material rolling stiffness
-__constant__ Float devC_gamma_n; // Material normal viscosity
-__constant__ Float devC_gamma_t; // Material tangential viscosity
-__constant__ Float devC_gamma_r; // Material rolling viscosity
-__constant__ Float devC_gamma_wn; // Wall normal viscosity
-__constant__ Float devC_gamma_ws; // Wall shear viscosity
-__constant__ Float devC_gamma_wr; // Wall rolling viscosity
-__constant__ Float devC_mu_s; // Material static shear friction coefficient
-__constant__ Float devC_mu_d; // Material dynamic shear friction coefficient
-__constant__ Float devC_mu_r; // Material rolling friction coefficient
-__constant__ Float devC_rho; // Material density
-__constant__ Float devC_kappa; // Capillary bond prefactor
-__constant__ Float devC_db; // Debonding distance
-__constant__ Float devC_V_b; // Liquid volume of capillary bond
-__constant__ unsigned int devC_nw; // Number of walls
-__constant__ unsigned int devC_w_n; // Dimension of orthogonal wall surface normal
-__constant__ int devC_periodic; // Behavior of x- and y boundaries: 0: walls, 1: periodic
-__constant__ int devC_wmode[MAXWALLS]; // Wall BCs, 0: devs, 1: vel
+
#endif
(DIR) diff --git a/src/contactmodels.cuh b/src/contactmodels.cuh
t@@ -41,10 +41,10 @@ __device__ Float contactLinear_wall(Float3* F, Float3* T, Float* es_dot,
Float vel_t_length = length(vel_t);
// Calculate elastic normal component
- //Float3 f_n = -devC_k_n * delta * n;
+ //Float3 f_n = -devC_params.k_n * delta * n;
// Normal force component: Elastic - viscous damping
- Float3 f_n = (-devC_k_n * delta - devC_gamma_wn * vel_n) * n;
+ Float3 f_n = (-devC_params.k_n * delta - devC_params.gamma_wn * vel_n) * n;
// Make sure the viscous damping doesn't exceed the elastic component,
// i.e. the damping factor doesn't exceed the critical damping, 2*sqrt(m*k_n)
t@@ -61,8 +61,8 @@ __device__ Float contactLinear_wall(Float3* F, Float3* T, Float* es_dot,
// divide by zero (producing a NaN)
if (vel_t_length > 0.f) {
- Float f_t_visc = devC_gamma_ws * vel_t_length; // Tangential force by viscous model
- Float f_t_limit = devC_mu_s * f_n_length; // Max. friction
+ Float f_t_visc = devC_params.gamma_ws * vel_t_length; // Tangential force by viscous model
+ Float f_t_limit = devC_params.mu_s * f_n_length; // Max. friction
// If the shear force component exceeds the friction,
// the particle slips and energy is dissipated
t@@ -79,11 +79,11 @@ __device__ Float contactLinear_wall(Float3* F, Float3* T, Float* es_dot,
/* if (angvel_length > 0.f) {
// Apply rolling resistance (Zhou et al. 1999)
- //T_res = -angvel_a/angvel_length * devC_mu_r * radius_a * f_n_length;
+ //T_res = -angvel_a/angvel_length * devC_params.mu_r * radius_a * f_n_length;
// New rolling resistance model
- T_res = -1.0f * fmin(devC_gamma_r * radius_a * angvel_length,
- devC_mu_r * radius_a * f_n_length)
+ T_res = -1.0f * fmin(devC_params.gamma_r * radius_a * angvel_length,
+ devC_params.mu_r * radius_a * f_n_length)
* angvel_a/angvel_length;
}*/
t@@ -179,7 +179,7 @@ __device__ void contactLinearViscous(Float3* F, Float3* T,
//f_n = -k_n_ab * delta_ab * n_ab;
// Normal force component: Elastic - viscous damping
- f_n = (-devC_k_n * delta_ab - devC_gamma_n * vel_n_ab) * n_ab;
+ f_n = (-devC_params.k_n * delta_ab - devC_params.gamma_n * vel_n_ab) * n_ab;
// Make sure the viscous damping doesn't exceed the elastic component,
// i.e. the damping factor doesn't exceed the critical damping, 2*sqrt(m*k_n)
t@@ -201,14 +201,14 @@ __device__ void contactLinearViscous(Float3* F, Float3* T,
if (vel_t_ab_length > 0.f) {
// Tangential force by viscous model
- Float f_t_visc = devC_gamma_t * vel_t_ab_length;
+ Float f_t_visc = devC_params.gamma_t * vel_t_ab_length;
// Determine max. friction
Float f_t_limit;
if (vel_t_ab_length > 0.001f) { // Dynamic
- f_t_limit = devC_mu_d * length(f_n-f_c);
+ f_t_limit = devC_params.mu_d * length(f_n-f_c);
} else { // Static
- f_t_limit = devC_mu_s * length(f_n-f_c);
+ f_t_limit = devC_params.mu_s * length(f_n-f_c);
}
// If the shear force component exceeds the friction,
t@@ -226,11 +226,11 @@ __device__ void contactLinearViscous(Float3* F, Float3* T,
/* if (angvel_ab_length > 0.f) {
// Apply rolling resistance (Zhou et al. 1999)
- //T_res = -angvel_ab/angvel_ab_length * devC_mu_r * R_bar * length(f_n);
+ //T_res = -angvel_ab/angvel_ab_length * devC_params.mu_r * R_bar * length(f_n);
// New rolling resistance model
- T_res = -1.0f * fmin(devC_gamma_r * R_bar * angvel_ab_length,
- devC_mu_r * R_bar * f_n_length)
+ T_res = -1.0f * fmin(devC_params.gamma_r * R_bar * angvel_ab_length,
+ devC_params.mu_r * R_bar * f_n_length)
* angvel_ab/angvel_ab_length;
}
*/
t@@ -316,17 +316,17 @@ __device__ void contactLinear(Float3* F, Float3* T,
//Float k_n_ab = k_n_a * k_n_b / (k_n_a + k_n_b);
// Normal force component: Elastic
- //f_n = -devC_k_n * delta_ab * n_ab;
+ //f_n = -devC_params.k_n * delta_ab * n_ab;
// Normal force component: Elastic - viscous damping
- f_n = (-devC_k_n * delta_ab - devC_gamma_n * vel_n_ab) * n_ab;
+ f_n = (-devC_params.k_n * delta_ab - devC_params.gamma_n * vel_n_ab) * n_ab;
// Store energy dissipated in normal viscous component
// watt = gamma_n * vel_n * dx_n / dt
// watt = gamma_n * vel_n * vel_n * dt / dt
// watt = gamma_n * vel_n * vel_n
// watt = N*m/s = N*s/m * m/s * m/s * s / s
- *ev_dot += devC_gamma_n * vel_n_ab * vel_n_ab;
+ *ev_dot += devC_params.gamma_n * vel_n_ab * vel_n_ab;
// Make sure the viscous damping doesn't exceed the elastic component,
t@@ -337,7 +337,7 @@ __device__ void contactLinear(Float3* F, Float3* T,
Float f_n_length = length(f_n);
// Add max. capillary force
- f_c = -devC_kappa * sqrtf(radius_a * radius_b) * n_ab;
+ f_c = -devC_params.kappa * sqrtf(radius_a * radius_b) * n_ab;
// Initialize force vectors to zero
f_t = MAKE_FLOAT3(0.0f, 0.0f, 0.0f);
t@@ -348,15 +348,15 @@ __device__ void contactLinear(Float3* F, Float3* T,
if (delta_t0_length > 0.f || vel_t_ab_length > 0.f) {
// Shear force: Visco-Elastic, limited by Coulomb friction
- Float3 f_t_elast = -devC_k_t * delta_t0;
- Float3 f_t_visc = -devC_gamma_t * vel_t_ab;
+ Float3 f_t_elast = -devC_params.k_t * delta_t0;
+ Float3 f_t_visc = -devC_params.gamma_t * vel_t_ab;
Float f_t_limit;
if (vel_t_ab_length > 0.001f) { // Dynamic friciton
- f_t_limit = devC_mu_d * length(f_n-f_c);
+ f_t_limit = devC_params.mu_d * length(f_n-f_c);
} else { // Static friction
- f_t_limit = devC_mu_s * length(f_n-f_c);
+ f_t_limit = devC_params.mu_s * length(f_n-f_c);
}
// Tangential force before friction limit correction
t@@ -377,33 +377,33 @@ __device__ void contactLinear(Float3* F, Float3* T,
// In a slip event, the tangential spring is adjusted to a
// length which is consistent with Coulomb's equation
// (Hinrichsen and Wolf, 2004)
- delta_t = -1.0f/devC_k_t * (f_t + devC_gamma_t * vel_t_ab);
+ delta_t = -1.0f/devC_params.k_t * (f_t + devC_params.gamma_t * vel_t_ab);
// Shear friction heat production rate:
// The energy lost from the tangential spring is dissipated as heat
//*es_dot += -dot(vel_t_ab, f_t);
- *es_dot += length(delta_t0 - delta_t) * devC_k_t / devC_dt; // Seen in EsyS-Particle
- //*es_dot += fabs(dot(delta_t0 - delta_t, f_t)) / devC_dt;
+ *es_dot += length(delta_t0 - delta_t) * devC_params.k_t / devC_params.dt; // Seen in EsyS-Particle
+ //*es_dot += fabs(dot(delta_t0 - delta_t, f_t)) / devC_params.dt;
} else { // Static case
// No correction of f_t is required
// Add tangential displacement to total tangential displacement
- delta_t = delta_t0 + vel_t_ab * devC_dt;
+ delta_t = delta_t0 + vel_t_ab * devC_params.dt;
}
}
if (angvel_ab_length > 0.f) {
// Apply rolling resistance (Zhou et al. 1999)
- //T_res = -angvel_ab/angvel_ab_length * devC_mu_r * R_bar * length(f_n);
+ //T_res = -angvel_ab/angvel_ab_length * devC_params.mu_r * R_bar * length(f_n);
// New rolling resistance model
- /*T_res = -1.0f * fmin(devC_gamma_r * R_bar * angvel_ab_length,
- devC_mu_r * R_bar * f_n_length)
+ /*T_res = -1.0f * fmin(devC_params.gamma_r * R_bar * angvel_ab_length,
+ devC_params.mu_r * R_bar * f_n_length)
* angvel_ab/angvel_ab_length;*/
- T_res = -1.0f * fmin(devC_gamma_r * radius_a * angvel_ab_length,
- devC_mu_r * radius_a * f_n_length)
+ T_res = -1.0f * fmin(devC_params.gamma_r * radius_a * angvel_ab_length,
+ devC_params.mu_r * radius_a * f_n_length)
* angvel_ab/angvel_ab_length;
}
t@@ -490,8 +490,8 @@ __device__ void contactHertz(Float3* F, Float3* T,
Float3 delta_t;
// Normal force component
- f_n = (-devC_k_n * powf(delta_ab, 3.0f/2.0f)
- -devC_gamma_n * powf(delta_ab, 1.0f/4.0f) * vel_n_ab)
+ f_n = (-devC_params.k_n * powf(delta_ab, 3.0f/2.0f)
+ -devC_params.gamma_n * powf(delta_ab, 1.0f/4.0f) * vel_n_ab)
* n_ab;
// Store energy dissipated in normal viscous component
t@@ -499,7 +499,7 @@ __device__ void contactHertz(Float3* F, Float3* T,
// watt = gamma_n * vel_n * vel_n * dt / dt
// watt = gamma_n * vel_n * vel_n
// watt = N*m/s = N*s/m * m/s * m/s * s / s
- *ev_dot += devC_gamma_n * vel_n_ab * vel_n_ab;
+ *ev_dot += devC_params.gamma_n * vel_n_ab * vel_n_ab;
// Make sure the viscous damping doesn't exceed the elastic component,
t@@ -510,7 +510,7 @@ __device__ void contactHertz(Float3* F, Float3* T,
Float f_n_length = length(f_n);
// Add max. capillary force
- f_c = -devC_kappa * sqrtf(radius_a * radius_b) * n_ab;
+ f_c = -devC_params.kappa * sqrtf(radius_a * radius_b) * n_ab;
// Initialize force vectors to zero
f_t = MAKE_FLOAT3(0.0f, 0.0f, 0.0f);
t@@ -521,14 +521,14 @@ __device__ void contactHertz(Float3* F, Float3* T,
if (delta_t0_length > 0.f || vel_t_ab_length > 0.f) {
// Shear force: Visco-Elastic, limited by Coulomb friction
- Float3 f_t_elast = -devC_k_t * powf(delta_ab, 1.0f/2.0f) * delta_t0;
- Float3 f_t_visc = -devC_gamma_t * powf(delta_ab, 1.0f/4.0f) * vel_t_ab;
+ Float3 f_t_elast = -devC_params.k_t * powf(delta_ab, 1.0f/2.0f) * delta_t0;
+ Float3 f_t_visc = -devC_params.gamma_t * powf(delta_ab, 1.0f/4.0f) * vel_t_ab;
Float f_t_limit;
if (vel_t_ab_length > 0.001f) { // Dynamic friciton
- f_t_limit = devC_mu_d * length(f_n-f_c);
+ f_t_limit = devC_params.mu_d * length(f_n-f_c);
} else { // Static friction
- f_t_limit = devC_mu_s * length(f_n-f_c);
+ f_t_limit = devC_params.mu_s * length(f_n-f_c);
}
// Tangential force before friction limit correction
t@@ -549,34 +549,34 @@ __device__ void contactHertz(Float3* F, Float3* T,
// In a slip event, the tangential spring is adjusted to a
// length which is consistent with Coulomb's equation
// (Hinrichsen and Wolf, 2004)
- delta_t = (f_t + devC_gamma_t * powf(delta_ab, 1.0f/4.0f) * vel_t_ab)
- / (-devC_k_t * powf(delta_ab, 1.0f/2.0f));
+ delta_t = (f_t + devC_params.gamma_t * powf(delta_ab, 1.0f/4.0f) * vel_t_ab)
+ / (-devC_params.k_t * powf(delta_ab, 1.0f/2.0f));
// Shear friction heat production rate:
// The energy lost from the tangential spring is dissipated as heat
//*es_dot += -dot(vel_t_ab, f_t);
- *es_dot += length(delta_t0 - delta_t) * devC_k_t / devC_dt; // Seen in EsyS-Particle
- //*es_dot += fabs(dot(delta_t0 - delta_t, f_t)) / devC_dt;
+ *es_dot += length(delta_t0 - delta_t) * devC_params.k_t / devC_params.dt; // Seen in EsyS-Particle
+ //*es_dot += fabs(dot(delta_t0 - delta_t, f_t)) / devC_params.dt;
} else { // Static case
// No correction of f_t is required
// Add tangential displacement to total tangential displacement
- delta_t = delta_t0 + vel_t_ab * devC_dt;
+ delta_t = delta_t0 + vel_t_ab * devC_params.dt;
}
}
if (angvel_ab_length > 0.f) {
// Apply rolling resistance (Zhou et al. 1999)
- //T_res = -angvel_ab/angvel_ab_length * devC_mu_r * R_bar * length(f_n);
+ //T_res = -angvel_ab/angvel_ab_length * devC_params.mu_r * R_bar * length(f_n);
// New rolling resistance model
- /*T_res = -1.0f * fmin(devC_gamma_r * R_bar * angvel_ab_length,
- devC_mu_r * R_bar * f_n_length)
+ /*T_res = -1.0f * fmin(devC_params.gamma_r * R_bar * angvel_ab_length,
+ devC_params.mu_r * R_bar * f_n_length)
* angvel_ab/angvel_ab_length;*/
- T_res = -1.0f * fmin(devC_gamma_r * radius_a * angvel_ab_length,
- devC_mu_r * radius_a * f_n_length)
+ T_res = -1.0f * fmin(devC_params.gamma_r * radius_a * angvel_ab_length,
+ devC_params.mu_r * radius_a * f_n_length)
* angvel_ab/angvel_ab_length;
}
(DIR) diff --git a/src/contactsearch.cuh b/src/contactsearch.cuh
t@@ -14,44 +14,44 @@ __device__ int findDistMod(int3* targetCell, Float3* distmod)
// Check whether x- and y boundaries are to be treated as periodic
// 1: x- and y boundaries periodic
// 2: x boundaries periodic
- if (devC_periodic == 1) {
+ if (devC_params.periodic == 1) {
// Periodic x-boundary
if (targetCell->x < 0) {
- targetCell->x = devC_num[0] - 1;
- *distmod += MAKE_FLOAT3(devC_L[0], 0.0f, 0.0f);
+ targetCell->x = devC_grid.num[0] - 1;
+ *distmod += MAKE_FLOAT3(devC_grid.L[0], 0.0f, 0.0f);
}
- if (targetCell->x == devC_num[0]) {
+ if (targetCell->x == devC_grid.num[0]) {
targetCell->x = 0;
- *distmod -= MAKE_FLOAT3(devC_L[0], 0.0f, 0.0f);
+ *distmod -= MAKE_FLOAT3(devC_grid.L[0], 0.0f, 0.0f);
}
// Periodic y-boundary
if (targetCell->y < 0) {
- targetCell->y = devC_num[1] - 1;
- *distmod += MAKE_FLOAT3(0.0f, devC_L[1], 0.0f);
+ targetCell->y = devC_grid.num[1] - 1;
+ *distmod += MAKE_FLOAT3(0.0f, devC_grid.L[1], 0.0f);
}
- if (targetCell->y == devC_num[1]) {
+ if (targetCell->y == devC_grid.num[1]) {
targetCell->y = 0;
- *distmod -= MAKE_FLOAT3(0.0f, devC_L[1], 0.0f);
+ *distmod -= MAKE_FLOAT3(0.0f, devC_grid.L[1], 0.0f);
}
// Only x-boundaries are periodic
- } else if (devC_periodic == 2) {
+ } else if (devC_params.periodic == 2) {
// Periodic x-boundary
if (targetCell->x < 0) {
- targetCell->x = devC_num[0] - 1;
- *distmod += MAKE_FLOAT3(devC_L[0], 0.0f, 0.0f);
+ targetCell->x = devC_grid.num[0] - 1;
+ *distmod += MAKE_FLOAT3(devC_grid.L[0], 0.0f, 0.0f);
}
- if (targetCell->x == devC_num[0]) {
+ if (targetCell->x == devC_grid.num[0]) {
targetCell->x = 0;
- *distmod -= MAKE_FLOAT3(devC_L[0], 0.0f, 0.0f);
+ *distmod -= MAKE_FLOAT3(devC_grid.L[0], 0.0f, 0.0f);
}
// Hande out-of grid cases on y-axis
- if (targetCell->y < 0 || targetCell->y == devC_num[1])
+ if (targetCell->y < 0 || targetCell->y == devC_grid.num[1])
return -1;
t@@ -59,14 +59,14 @@ __device__ int findDistMod(int3* targetCell, Float3* distmod)
} else {
// Hande out-of grid cases on x- and y-axes
- if (targetCell->x < 0 || targetCell->x == devC_num[0])
+ if (targetCell->x < 0 || targetCell->x == devC_grid.num[0])
return -1;
- if (targetCell->y < 0 || targetCell->y == devC_num[1])
+ if (targetCell->y < 0 || targetCell->y == devC_grid.num[1])
return -1;
}
// Handle out-of-grid cases on z-axis
- if (targetCell->z < 0 || targetCell->z == devC_num[2])
+ if (targetCell->z < 0 || targetCell->z == devC_grid.num[2])
return -1;
// Return successfully
t@@ -107,8 +107,8 @@ __device__ void findAndProcessContactsInCell(int3 targetCell,
//// Check and process particle-particle collisions
// Calculate linear cell ID
- unsigned int cellID = targetCell.x + targetCell.y * devC_num[0]
- + (devC_num[0] * devC_num[1]) * targetCell.z;
+ unsigned int cellID = targetCell.x + targetCell.y * devC_grid.num[0]
+ + (devC_grid.num[0] * devC_grid.num[1]) * targetCell.z;
// Lowest particle index in cell
unsigned int startIdx = dev_cellStart[cellID];
t@@ -126,7 +126,7 @@ __device__ void findAndProcessContactsInCell(int3 targetCell,
// Fetch position and velocity of particle B.
Float4 x_b = dev_x_sorted[idx_b];
Float radius_b = dev_radius_sorted[idx_b];
- Float kappa = devC_kappa;
+ Float kappa = devC_params.kappa;
// Distance between particle centers (Float4 -> Float3)
Float3 x_ab = MAKE_FLOAT3(x_a.x - x_b.x,
t@@ -150,7 +150,7 @@ __device__ void findAndProcessContactsInCell(int3 targetCell,
radius_a, radius_b,
x_ab, x_ab_length,
delta_ab, kappa);
- } else if (delta_ab < devC_db) {
+ } else if (delta_ab < devC_params.db) {
// Check wether particle distance satisfies the capillary bond distance
capillaryCohesion_exp(F, radius_a, radius_b, delta_ab,
x_ab, x_ab_length, kappa);
t@@ -202,8 +202,8 @@ __device__ void findContactsInCell(int3 targetCell,
//// Check and process particle-particle collisions
// Calculate linear cell ID
- unsigned int cellID = targetCell.x + targetCell.y * devC_num[0]
- + (devC_num[0] * devC_num[1]) * targetCell.z;
+ unsigned int cellID = targetCell.x + targetCell.y * devC_grid.num[0]
+ + (devC_grid.num[0] * devC_grid.num[1]) * targetCell.z;
// Lowest particle index in cell
unsigned int startIdx = dev_cellStart[cellID];
t@@ -256,10 +256,10 @@ __device__ void findContactsInCell(int3 targetCell,
unsigned int cidx;
// Find out, if particle is already registered in contact list
- for (int i=0; i<devC_nc; ++i) {
+ for (int i=0; i < devC_nc; ++i) {
__syncthreads();
cidx = dev_contacts[(unsigned int)(idx_a_orig*devC_nc+i)];
- if (cidx == devC_np) // Write to position of now-deleted contact
+ if (cidx == devC_params.np) // Write to position of now-deleted contact
cpos = i;
else if (cidx == idx_b_orig) { // Write to position of same contact
cpos = i;
t@@ -322,7 +322,7 @@ __global__ void topology(unsigned int* dev_cellStart,
{
// Thread index equals index of particle A
unsigned int idx_a = blockIdx.x * blockDim.x + threadIdx.x;
- if (idx_a < devC_np) {
+ if (idx_a < devC_params.np) {
// Fetch particle data in global read
Float4 x_a = dev_x_sorted[idx_a];
Float radius_a = dev_radius_sorted[idx_a];
t@@ -335,9 +335,9 @@ __global__ void topology(unsigned int* dev_cellStart,
int3 targetPos;
// Calculate cell address in grid from position of particle
- gridPos.x = floor((x_a.x - devC_origo[0]) / (devC_L[0]/devC_num[0]));
- gridPos.y = floor((x_a.y - devC_origo[1]) / (devC_L[1]/devC_num[1]));
- gridPos.z = floor((x_a.z - devC_origo[2]) / (devC_L[2]/devC_num[2]));
+ gridPos.x = floor((x_a.x - devC_grid.origo[0]) / (devC_grid.L[0]/devC_grid.num[0]));
+ gridPos.y = floor((x_a.y - devC_grid.origo[1]) / (devC_grid.L[1]/devC_grid.num[1]));
+ gridPos.z = floor((x_a.z - devC_grid.origo[2]) / (devC_grid.L[2]/devC_grid.num[2]));
// Find overlaps between particle no. idx and all particles
// from its own cell + 26 neighbor cells
t@@ -390,7 +390,7 @@ __global__ void interact(unsigned int* dev_gridParticleIndex, // Input: Unsorted
// Thread index equals index of particle A
unsigned int idx_a = blockIdx.x * blockDim.x + threadIdx.x;
- if (idx_a < devC_np) {
+ if (idx_a < devC_params.np) {
// Fetch particle data in global read
unsigned int idx_a_orig = dev_gridParticleIndex[idx_a];
t@@ -402,12 +402,12 @@ __global__ void interact(unsigned int* dev_gridParticleIndex, // Input: Unsorted
Float4 w_up_mvfd = dev_w_mvfd[0];
// Fetch world dimensions in constant memory read
- Float3 origo = MAKE_FLOAT3(devC_origo[0],
- devC_origo[1],
- devC_origo[2]);
- Float3 L = MAKE_FLOAT3(devC_L[0],
- devC_L[1],
- devC_L[2]);
+ Float3 origo = MAKE_FLOAT3(devC_grid.origo[0],
+ devC_grid.origo[1],
+ devC_grid.origo[2]);
+ Float3 L = MAKE_FLOAT3(devC_grid.L[0],
+ devC_grid.L[1],
+ devC_grid.L[2]);
// Index of particle which is bonded to particle A.
// The index is equal to the particle no (p.np)
t@@ -426,7 +426,7 @@ __global__ void interact(unsigned int* dev_gridParticleIndex, // Input: Unsorted
Float3 T = MAKE_FLOAT3(0.0f, 0.0f, 0.0f);
// Apply linear elastic, frictional contact model to registered contacts
- if (devC_shearmodel == 2 || devC_shearmodel == 3) {
+ if (devC_params.shearmodel == 2 || devC_params.shearmodel == 3) {
unsigned int idx_b_orig, mempos;
Float delta_n, x_ab_length, radius_b;
Float3 x_ab;
t@@ -454,11 +454,11 @@ __global__ void interact(unsigned int* dev_gridParticleIndex, // Input: Unsorted
delta_n = x_ab_length - (radius_a + radius_b);
- if (idx_b_orig != (unsigned int)devC_np) {
+ if (idx_b_orig != (unsigned int)devC_params.np) {
// Process collision if the particles are overlapping
if (delta_n < 0.0f) {
- if (devC_shearmodel == 2) {
+ if (devC_params.shearmodel == 2) {
contactLinear(&F, &T, &es_dot, &ev_dot, &p,
idx_a_orig,
idx_b_orig,
t@@ -470,7 +470,7 @@ __global__ void interact(unsigned int* dev_gridParticleIndex, // Input: Unsorted
x_ab, x_ab_length,
delta_n, dev_delta_t,
mempos);
- } else if (devC_shearmodel == 3) {
+ } else if (devC_params.shearmodel == 3) {
contactHertz(&F, &T, &es_dot, &ev_dot, &p,
idx_a_orig,
idx_b_orig,
t@@ -486,7 +486,7 @@ __global__ void interact(unsigned int* dev_gridParticleIndex, // Input: Unsorted
} else {
__syncthreads();
// Remove this contact (there is no particle with index=np)
- dev_contacts[mempos] = devC_np;
+ dev_contacts[mempos] = devC_params.np;
// Zero sum of shear displacement in this position
dev_delta_t[mempos] = MAKE_FLOAT4(0.0f, 0.0f, 0.0f, 0.0f);
}
t@@ -501,15 +501,15 @@ __global__ void interact(unsigned int* dev_gridParticleIndex, // Input: Unsorted
// Find contacts and process collisions immidiately for
// shearmodel 1 (visco-frictional).
- } else if (devC_shearmodel == 1) {
+ } else if (devC_params.shearmodel == 1) {
int3 gridPos;
int3 targetPos;
// Calculate address in grid from position
- gridPos.x = floor((x_a.x - devC_origo[0]) / (devC_L[0]/devC_num[0]));
- gridPos.y = floor((x_a.y - devC_origo[1]) / (devC_L[1]/devC_num[1]));
- gridPos.z = floor((x_a.z - devC_origo[2]) / (devC_L[2]/devC_num[2]));
+ gridPos.x = floor((x_a.x - devC_grid.origo[0]) / (devC_grid.L[0]/devC_grid.num[0]));
+ gridPos.y = floor((x_a.y - devC_grid.origo[1]) / (devC_grid.L[1]/devC_grid.num[1]));
+ gridPos.z = floor((x_a.z - devC_grid.origo[2]) / (devC_grid.L[2]/devC_grid.num[2]));
// Find overlaps between particle no. idx and all particles
// from its own cell + 26 neighbor cells.
t@@ -555,7 +555,7 @@ __global__ void interact(unsigned int* dev_gridParticleIndex, // Input: Unsorted
}
- if (devC_periodic == 0) {
+ if (devC_params.periodic == 0) {
// Left wall
delta_w = x_a.x - radius_a - origo.x;
t@@ -593,7 +593,7 @@ __global__ void interact(unsigned int* dev_gridParticleIndex, // Input: Unsorted
w_n, delta_w, 0.0f);
}
- } else if (devC_periodic == 2) {
+ } else if (devC_params.periodic == 2) {
// Front wall
delta_w = x_a.y - radius_a - origo.y;
t@@ -624,8 +624,8 @@ __global__ void interact(unsigned int* dev_gridParticleIndex, // Input: Unsorted
dev_torque[orig_idx] = MAKE_FLOAT4(T.x, T.y, T.z, 0.0f);
dev_es_dot[orig_idx] = es_dot;
dev_ev_dot[orig_idx] = ev_dot;
- dev_es[orig_idx] += es_dot * devC_dt;
- dev_ev[orig_idx] += ev_dot * devC_dt;
+ dev_es[orig_idx] += es_dot * devC_params.dt;
+ dev_ev[orig_idx] += ev_dot * devC_params.dt;
dev_p[orig_idx] = p;
dev_w_force[orig_idx] = w_force;
}
(DIR) diff --git a/src/datatypes.cuh b/src/datatypes.cuh
t@@ -4,10 +4,15 @@
#ifndef DATATYPES_CUH_
#define DATATYPES_CUH_
+#include "datatypes.h"
#include "vector_functions.h"
unsigned int iDivUp(unsigned int a, unsigned int b);
void checkForCudaErrors(const char* checkpoint_description);
void checkForCudaErrors(const char* checkpoint_description, const unsigned int iteration);
+
+// Device constant memory structures
+__constant__ Params devC_params;
+__constant__ Grid devC_grid;
#endif
(DIR) diff --git a/src/datatypes.h b/src/datatypes.h
t@@ -99,9 +99,9 @@ struct Particles {
// Structure containing grid parameters
struct Grid {
unsigned int nd;
- Float *origo;
- Float *L;
- unsigned int *num;
+ Float origo[ND];
+ Float L[ND];
+ unsigned int num[ND];
};
// Structure containing time parameters
t@@ -115,13 +115,12 @@ struct Time {
// Structure containing constant, global physical parameters
struct Params {
- //bool global;
int global;
- Float *g;
+ Float g[ND];
+ Float dt;
unsigned int np;
unsigned int nw;
- int* wmode;
- Float dt;
+ int wmode[MAXWALLS];
Float k_n;
Float k_t;
Float k_r;
(DIR) diff --git a/src/device.cu b/src/device.cu
t@@ -94,20 +94,23 @@ __host__ void transferToConstantMemory(Particles* p,
cout << "\n Transfering data to constant device memory: ";
- cudaMemcpyToSymbol("devC_np", &p->np, sizeof(p->np));
+ //cudaMemcpyToSymbol("devC_np", &p->np, sizeof(p->np));
cudaMemcpyToSymbol("devC_nc", &NC, sizeof(int));
- cudaMemcpyToSymbol("devC_origo", grid->origo, sizeof(Float)*ND);
- cudaMemcpyToSymbol("devC_L", grid->L, sizeof(Float)*ND);
- cudaMemcpyToSymbol("devC_num", grid->num, sizeof(unsigned int)*ND);
- cudaMemcpyToSymbol("devC_dt", &time->dt, sizeof(Float));
- cudaMemcpyToSymbol("devC_global", ¶ms->global, sizeof(int));
- cudaMemcpyToSymbol("devC_g", params->g, sizeof(Float)*ND);
- cudaMemcpyToSymbol("devC_nw", ¶ms->nw, sizeof(unsigned int));
- cudaMemcpyToSymbol("devC_periodic", ¶ms->periodic, sizeof(int));
+ //cudaMemcpyToSymbol("devC_origo", grid->origo, sizeof(Float)*ND);
+ //cudaMemcpyToSymbol("devC_L", grid->L, sizeof(Float)*ND);
+ //cudaMemcpyToSymbol("devC_num", grid->num, sizeof(unsigned int)*ND);
+ //cudaMemcpyToSymbol("devC_dt", &time->dt, sizeof(Float));
+ //cudaMemcpyToSymbol("devC_global", ¶ms->global, sizeof(int));
+ //cudaMemcpyToSymbol("devC_g", params->g, sizeof(Float)*ND);
+ //cudaMemcpyToSymbol("devC_nw", ¶ms->nw, sizeof(unsigned int));
+ //cudaMemcpyToSymbol("devC_periodic", ¶ms->periodic, sizeof(int));
+
+ cudaMemcpyToSymbol(devC_grid, grid, sizeof(grid));
if (params->global == 1) {
// If the physical properties of the particles are global (params.global == true),
// copy the values from the first particle into the designated constant memory.
+
//printf("(params.global == %d) ", params.global);
params->k_n = p->k_n[0];
params->k_t = p->k_t[0];
t@@ -119,24 +122,25 @@ __host__ void transferToConstantMemory(Particles* p,
params->mu_d = p->mu_d[0];
params->mu_r = p->mu_r[0];
params->rho = p->rho[0];
- cudaMemcpyToSymbol("devC_k_n", ¶ms->k_n, sizeof(Float));
- cudaMemcpyToSymbol("devC_k_t", ¶ms->k_t, sizeof(Float));
- cudaMemcpyToSymbol("devC_k_r", ¶ms->k_r, sizeof(Float));
- cudaMemcpyToSymbol("devC_gamma_n", ¶ms->gamma_n, sizeof(Float));
- cudaMemcpyToSymbol("devC_gamma_t", ¶ms->gamma_t, sizeof(Float));
- cudaMemcpyToSymbol("devC_gamma_r", ¶ms->gamma_r, sizeof(Float));
- cudaMemcpyToSymbol("devC_gamma_wn", ¶ms->gamma_wn, sizeof(Float));
- cudaMemcpyToSymbol("devC_gamma_ws", ¶ms->gamma_ws, sizeof(Float));
- cudaMemcpyToSymbol("devC_gamma_wr", ¶ms->gamma_wr, sizeof(Float));
- cudaMemcpyToSymbol("devC_mu_s", ¶ms->mu_s, sizeof(Float));
- cudaMemcpyToSymbol("devC_mu_d", ¶ms->mu_d, sizeof(Float));
- cudaMemcpyToSymbol("devC_mu_r", ¶ms->mu_r, sizeof(Float));
- cudaMemcpyToSymbol("devC_rho", ¶ms->rho, sizeof(Float));
- cudaMemcpyToSymbol("devC_kappa", ¶ms->kappa, sizeof(Float));
- cudaMemcpyToSymbol("devC_db", ¶ms->db, sizeof(Float));
- cudaMemcpyToSymbol("devC_V_b", ¶ms->V_b, sizeof(Float));
- cudaMemcpyToSymbol("devC_shearmodel", ¶ms->shearmodel, sizeof(unsigned int));
- cudaMemcpyToSymbol("devC_wmode", ¶ms->wmode, sizeof(int)*MAXWALLS);
+ cudaMemcpyToSymbol(devC_params, params, sizeof(params));
+ //cudaMemcpyToSymbol("devC_k_n", ¶ms->k_n, sizeof(Float));
+ //cudaMemcpyToSymbol("devC_k_t", ¶ms->k_t, sizeof(Float));
+ //cudaMemcpyToSymbol("devC_k_r", ¶ms->k_r, sizeof(Float));
+ //cudaMemcpyToSymbol("devC_gamma_n", ¶ms->gamma_n, sizeof(Float));
+ //cudaMemcpyToSymbol("devC_gamma_t", ¶ms->gamma_t, sizeof(Float));
+ //cudaMemcpyToSymbol("devC_gamma_r", ¶ms->gamma_r, sizeof(Float));
+ //cudaMemcpyToSymbol("devC_gamma_wn", ¶ms->gamma_wn, sizeof(Float));
+ //cudaMemcpyToSymbol("devC_gamma_ws", ¶ms->gamma_ws, sizeof(Float));
+ //cudaMemcpyToSymbol("devC_gamma_wr", ¶ms->gamma_wr, sizeof(Float));
+ //cudaMemcpyToSymbol("devC_mu_s", ¶ms->mu_s, sizeof(Float));
+ //cudaMemcpyToSymbol("devC_mu_d", ¶ms->mu_d, sizeof(Float));
+ //cudaMemcpyToSymbol("devC_mu_r", ¶ms->mu_r, sizeof(Float));
+ //cudaMemcpyToSymbol("devC_rho", ¶ms->rho, sizeof(Float));
+ //cudaMemcpyToSymbol("devC_kappa", ¶ms->kappa, sizeof(Float));
+ //cudaMemcpyToSymbol("devC_db", ¶ms->db, sizeof(Float));
+ //cudaMemcpyToSymbol("devC_V_b", ¶ms->V_b, sizeof(Float));
+ //cudaMemcpyToSymbol("devC_shearmodel", ¶ms->shearmodel, sizeof(unsigned int));
+ //cudaMemcpyToSymbol("devC_wmode", ¶ms->wmode, sizeof(int)*MAXWALLS);
} else {
//printf("(params.global == %d) ", params.global);
// Copy params structure with individual physical values from host to global memory
(DIR) diff --git a/src/integration.cuh b/src/integration.cuh
t@@ -15,7 +15,7 @@ __global__ void integrate(Float4* dev_x_sorted, Float4* dev_vel_sorted, // Input
{
unsigned int idx = threadIdx.x + blockIdx.x * blockDim.x; // Thread id
- if (idx < devC_np) { // Condition prevents block size error
+ if (idx < devC_params.np) { // Condition prevents block size error
// Copy data to temporary arrays to avoid any potential read-after-write,
// write-after-read, or write-after-write hazards.
t@@ -35,10 +35,10 @@ __global__ void integrate(Float4* dev_x_sorted, Float4* dev_vel_sorted, // Input
Float radius = dev_radius_sorted[idx];
// Coherent read from constant memory to registers
- Float dt = devC_dt;
- Float3 origo = MAKE_FLOAT3(devC_origo[0], devC_origo[1], devC_origo[2]);
- Float3 L = MAKE_FLOAT3(devC_L[0], devC_L[1], devC_L[2]);
- Float rho = devC_rho;
+ Float dt = devC_params.dt;
+ Float3 origo = MAKE_FLOAT3(devC_grid.origo[0], devC_grid.origo[1], devC_grid.origo[2]);
+ Float3 L = MAKE_FLOAT3(devC_grid.L[0], devC_grid.L[1], devC_grid.L[2]);
+ Float rho = devC_params.rho;
// Particle mass
Float m = 4.0f/3.0f * PI * radius*radius*radius * rho;
t@@ -55,9 +55,9 @@ __global__ void integrate(Float4* dev_x_sorted, Float4* dev_vel_sorted, // Input
angacc.z = torque.z * 1.0f / (2.0f/5.0f * m * radius*radius);
// Add gravity
- acc.x += devC_g[0];
- acc.y += devC_g[1];
- acc.z += devC_g[2];
+ acc.x += devC_params.g[0];
+ acc.y += devC_params.g[1];
+ acc.z += devC_params.g[2];
// Check if particle has a fixed horizontal velocity
if (vel.w > 0.0f) {
t@@ -68,7 +68,7 @@ __global__ void integrate(Float4* dev_x_sorted, Float4* dev_vel_sorted, // Input
// to allow for dilation.
acc.x = 0.0f;
acc.y = 0.0f;
- acc.z -= devC_g[2];
+ acc.z -= devC_params.g[2];
// Zero the angular acceleration
angacc = MAKE_FLOAT4(0.0f, 0.0f, 0.0f, 0.0f);
t@@ -106,7 +106,7 @@ __global__ void integrate(Float4* dev_x_sorted, Float4* dev_vel_sorted, // Input
x.w += vel.x * dt + (acc.x * dt*dt)/2.0f;
// Move particle across boundary if it is periodic
- if (devC_periodic == 1) {
+ if (devC_params.periodic == 1) {
if (x.x < origo.x)
x.x += L.x;
if (x.x > L.x)
t@@ -115,7 +115,7 @@ __global__ void integrate(Float4* dev_x_sorted, Float4* dev_vel_sorted, // Input
x.y += L.y;
if (x.y > L.y)
x.y -= L.y;
- } else if (devC_periodic == 2) {
+ } else if (devC_params.periodic == 2) {
if (x.x < origo.x)
x.x += L.x;
if (x.x > L.x)
t@@ -142,7 +142,7 @@ __global__ void summation(Float* in, Float *out)
unsigned int cacheIdx = threadIdx.x;
Float temp = 0.0f;
- while (idx < devC_np) {
+ while (idx < devC_params.np) {
temp += in[idx];
idx += blockDim.x * gridDim.x;
}
t@@ -175,13 +175,13 @@ __global__ void integrateWalls(Float4* dev_w_nx,
{
unsigned int idx = threadIdx.x + blockIdx.x * blockDim.x; // Thread id
- if (idx < devC_nw) { // Condition prevents block size error
+ if (idx < devC_params.nw) { // Condition prevents block size error
// Copy data to temporary arrays to avoid any potential read-after-write,
// write-after-read, or write-after-write hazards.
Float4 w_nx = dev_w_nx[idx];
Float4 w_mvfd = dev_w_mvfd[idx];
- int wmode = devC_wmode[idx]; // Wall BC, 0: fixed, 1: devs, 2: vel
+ int wmode = devC_params.wmode[idx]; // Wall BC, 0: fixed, 1: devs, 2: vel
Float acc;
if (wmode == 0) // Wall fixed: do nothing
t@@ -193,11 +193,11 @@ __global__ void integrateWalls(Float4* dev_w_nx,
w_mvfd.z += dev_w_force_partial[i];
}
- Float dt = devC_dt;
+ Float dt = devC_params.dt;
// Normal load = Deviatoric stress times wall surface area,
// directed downwards.
- Float N = -w_mvfd.w*devC_L[0]*devC_L[1];
+ Float N = -w_mvfd.w*devC_grid.L[0]*devC_grid.L[1];
// Calculate resulting acceleration of wall
// (Wall mass is stored in w component of position Float4)
(DIR) diff --git a/src/main.cpp b/src/main.cpp
t@@ -133,7 +133,6 @@ int main(int argc, char *argv[])
exit(1);
// Copy timestep length to constant memory structure
- //time.dt *= 10; // For debugging: Increase timestep one magnitude
params.dt = time.dt;
// Copy number of particles to constant memory structure
t@@ -158,10 +157,10 @@ int main(int argc, char *argv[])
// Allocate host arrays
cout << "\n Allocating host memory: ";
- grid.origo = new Float[ND]; // Coordinate system origo
- grid.L = new Float[ND]; // Model world dimensions
- grid.num = new unsigned int[ND]; // Number of cells in each dimension
- params.g = new Float[ND]; // Gravitational acceleration vector
+ //grid.origo = new Float[ND]; // Coordinate system origo
+ //grid.L = new Float[ND]; // Model world dimensions
+ //grid.num = new unsigned int[ND]; // Number of cells in each dimension
+ //params.g = new Float[ND]; // Gravitational acceleration vector
p.radius = new Float[p.np]; // Particle radii
p.rho = new Float[p.np]; // Particle densities
p.m = new Float[p.np]; // Particle masses
t@@ -180,7 +179,7 @@ int main(int argc, char *argv[])
p.es = new Float[p.np]; // Total shear energy dissipation
p.ev = new Float[p.np]; // Total viscous energy dissipation
p.p = new Float[p.np]; // Pressure excerted onto particle
- params.wmode = new int[MAXWALLS]; // Wall BC's, 0: fixed, 1: devs, 2: vel
+ //params.wmode = new int[MAXWALLS]; // Wall BC's, 0: fixed, 1: devs, 2: vel
// Allocate Float4 host arrays
Float4 *host_x = new Float4[p.np]; // Center coordinates for each particle (x)
t@@ -481,10 +480,10 @@ int main(int argc, char *argv[])
delete[] host_bonds;
// Particle single-value parameters
- delete[] grid.origo;
- delete[] grid.L;
- delete[] grid.num;
- delete[] params.g;
+ //delete[] grid.origo;
+ //delete[] grid.L;
+ //delete[] grid.num;
+ //delete[] params.g;
delete[] p.radius;
delete[] p.k_n;
delete[] p.k_t;
(DIR) diff --git a/src/sorting.cuh b/src/sorting.cuh
t@@ -6,12 +6,12 @@ __device__ int calcCellID(Float3 x)
unsigned int i_x, i_y, i_z;
// Calculate integral coordinates:
- i_x = floor((x.x - devC_origo[0]) / (devC_L[0]/devC_num[0]));
- i_y = floor((x.y - devC_origo[1]) / (devC_L[1]/devC_num[1]));
- i_z = floor((x.z - devC_origo[2]) / (devC_L[2]/devC_num[2]));
+ i_x = floor((x.x - devC_grid.origo[0]) / (devC_grid.L[0]/devC_grid.num[0]));
+ i_y = floor((x.y - devC_grid.origo[1]) / (devC_grid.L[1]/devC_grid.num[1]));
+ i_z = floor((x.z - devC_grid.origo[2]) / (devC_grid.L[2]/devC_grid.num[2]));
// Integral coordinates are converted to 1D coordinate:
- return (i_z * devC_num[1]) * devC_num[0] + i_y * devC_num[0] + i_x;
+ return (i_z * devC_grid.num[1]) * devC_grid.num[0] + i_y * devC_grid.num[0] + i_x;
} // End of calcCellID(...)
t@@ -25,7 +25,7 @@ __global__ void calcParticleCellID(unsigned int* dev_gridParticleCellID,
//unsigned int idx = threadIdx.x + blockIdx.x * blockDim.x; // Thread id
unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x;
- if (idx < devC_np) { // Condition prevents block size error
+ if (idx < devC_params.np) { // Condition prevents block size error
//volatile Float4 x = dev_x[idx]; // Ensure coalesced read
Float4 x = dev_x[idx]; // Ensure coalesced read
t@@ -68,7 +68,7 @@ __global__ void reorderArrays(unsigned int* dev_cellStart, unsigned int* dev_cel
unsigned int cellID;
// Read cellID data and store it in shared memory (shared_data)
- if (idx < devC_np) { // Condition prevents block size error
+ if (idx < devC_params.np) { // Condition prevents block size error
cellID = dev_gridParticleCellID[idx];
// Load hash data into shared memory, allowing access to neighbor particle cellID values
t@@ -85,7 +85,7 @@ __global__ void reorderArrays(unsigned int* dev_cellStart, unsigned int* dev_cel
__syncthreads();
// Find lowest and highest particle index in each cell
- if (idx < devC_np) { // Condition prevents block size error
+ if (idx < devC_params.np) { // Condition prevents block size error
// If this particle has a different cell index to the previous particle, it's the first
// particle in the cell -> Store the index of this particle in the cell.
// The previous particle must be the last particle in the previous cell.
t@@ -96,7 +96,7 @@ __global__ void reorderArrays(unsigned int* dev_cellStart, unsigned int* dev_cel
}
// Check wether the thread is the last one
- if (idx == (devC_np - 1))
+ if (idx == (devC_params.np - 1))
dev_cellEnd[cellID] = idx + 1;