tElastc shearmodel fixed - sphere - GPU-based 3D discrete element method algorithm with optional fluid coupling
(HTM) git clone git://src.adamsgaard.dk/sphere
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) LICENSE
---
(DIR) commit 29f60d6d74f37ea5f08849e81702f314cafc6166
(DIR) parent 65ad78a8f1f926d405fc84504351359b5b0f5a81
(HTM) Author: Anders Damsgaard Christensen <adc@geo.au.dk>
Date: Thu, 23 Feb 2012 12:18:04 +0100
Elastc shearmodel fixed
Diffstat:
M src/device.cu | 72 ++++++++++++++++---------------
1 file changed, 37 insertions(+), 35 deletions(-)
---
(DIR) diff --git a/src/device.cu b/src/device.cu
t@@ -11,7 +11,7 @@
#include "datatypes.cuh"
#include "constants.cuh"
-#include "cuPrintf.cu"
+//#include "cuPrintf.cu"
// Returns the cellID containing the particle, based cubic grid
t@@ -763,8 +763,7 @@ __device__ void findContactsInCell(int3 targetCell,
unsigned int* dev_cellEnd,
unsigned int* dev_gridParticleIndex,
int* nc,
- unsigned int* dev_contacts,
- float4* dev_x_ab_r_b)
+ unsigned int* dev_contacts)
{
// Variable containing modifier for interparticle
// vector, if it crosses a periodic boundary
t@@ -890,15 +889,14 @@ __device__ void findContactsInCell(int3 targetCell,
}
__syncthreads();
- //cuPrintf("\nOverlap: idx_a = %u (orig=%u), idx_b = %u (orig=%u), cpos = %d\n",
- // idx_a, idx_a_orig, idx_b, idx_b_orig, cpos);
// Write the particle index to the relevant position,
// no matter if it already is there or not (concurrency of write)
dev_contacts[(unsigned int)(idx_a_orig*devC_nc+cpos)] = idx_b_orig;
+
// Write the interparticle vector and radius of particle B
- dev_x_ab_r_b[(unsigned int)(idx_a_orig*devC_nc+cpos)] = make_float4(x_ab, radius_b);
+ //dev_x_ab_r_b[(unsigned int)(idx_a_orig*devC_nc+cpos)] = make_float4(x_ab, radius_b);
// Increment contact counter
++*nc;
t@@ -929,8 +927,7 @@ __global__ void topology(unsigned int* dev_cellStart,
unsigned int* dev_cellEnd, // Input: Particles in cell
unsigned int* dev_gridParticleIndex, // Input: Unsorted-sorted key
float4* dev_x_sorted, float* dev_radius_sorted,
- unsigned int* dev_contacts,
- float4* dev_x_ab_r_b)
+ unsigned int* dev_contacts)
{
// Thread index equals index of particle A
unsigned int idx_a = __umul24(blockIdx.x, blockDim.x) + threadIdx.x;
t@@ -961,7 +958,7 @@ __global__ void topology(unsigned int* dev_cellStart,
dev_x_sorted, dev_radius_sorted,
dev_cellStart, dev_cellEnd,
dev_gridParticleIndex,
- &nc, dev_contacts, dev_x_ab_r_b);
+ &nc, dev_contacts);
}
}
}
t@@ -977,6 +974,7 @@ __global__ void topology(unsigned int* dev_cellStart,
__global__ void interact(unsigned int* dev_gridParticleIndex, // Input: Unsorted-sorted key
unsigned int* dev_cellStart,
unsigned int* dev_cellEnd,
+ float4* dev_x, float* dev_radius,
float4* dev_x_sorted, float* dev_radius_sorted,
float4* dev_vel_sorted, float4* dev_angvel_sorted,
float4* dev_vel, float4* dev_angvel,
t@@ -985,7 +983,6 @@ __global__ void interact(unsigned int* dev_gridParticleIndex, // Input: Unsorted
float4* dev_w_nx, float4* dev_w_mvfd,
float* dev_w_force, //uint4* dev_bonds_sorted,
unsigned int* dev_contacts,
- float4* dev_x_ab_r_b,
float4* dev_delta_t)
{
// Thread index equals index of particle A
t@@ -1029,7 +1026,8 @@ __global__ void interact(unsigned int* dev_gridParticleIndex, // Input: Unsorted
if (devC_shearmodel == 2) {
unsigned int idx_b_orig, mempos;
float delta_n, x_ab_length;
- float4 x_ab_r_b;
+ float4 x_b;
+ float radius_b;
float3 x_ab;
float4 vel_a = dev_vel_sorted[idx_a];
float4 angvel4_a = dev_angvel_sorted[idx_a];
t@@ -1037,19 +1035,23 @@ __global__ void interact(unsigned int* dev_gridParticleIndex, // Input: Unsorted
// Loop over all possible contacts, and remove contacts
// that no longer are valid (delta_n > 0.0)
- for (int i = 0; i<devC_nc; ++i) {
- mempos = idx_a_orig * devC_nc + i;
+ for (int i = 0; i<(int)devC_nc; ++i) {
+ mempos = (unsigned int)(idx_a_orig * devC_nc + i);
+ __syncthreads();
idx_b_orig = dev_contacts[mempos];
- x_ab_r_b = dev_x_ab_r_b[mempos];
- x_ab = make_float3(x_ab_r_b.x,
- x_ab_r_b.y,
- x_ab_r_b.z);
+ x_b = dev_x[idx_b_orig];
+ radius_b = dev_radius[idx_b_orig];
+ x_ab = make_float3(x_a.x - x_b.x,
+ x_a.y - x_b.y,
+ x_a.z - x_b.z);
x_ab_length = length(x_ab);
- delta_n = x_ab_length - (radius_a + x_ab_r_b.w);
+ delta_n = x_ab_length - (radius_a + radius_b);
- // Process collision if the particles are overlapping
- if (delta_n < 0.0f) {
- if (idx_b_orig != devC_np) {
+
+ if (idx_b_orig != (unsigned int)devC_np) {
+
+ // Process collision if the particles are overlapping
+ if (delta_n < 0.0f) {
//cuPrintf("\nProcessing contact, idx_a_orig = %u, idx_b_orig = %u, contact = %d, delta_n = %f\n",
// idx_a_orig, idx_b_orig, i, delta_n);
contactLinear(&N, &T, &es_dot, &p,
t@@ -1059,18 +1061,23 @@ __global__ void interact(unsigned int* dev_gridParticleIndex, // Input: Unsorted
dev_vel,
angvel_a,
dev_angvel,
- radius_a, x_ab_r_b.w,
+ radius_a, radius_b,
x_ab, x_ab_length,
delta_n, dev_delta_t,
mempos);
+ } else {
+ __syncthreads();
+ // Remove this contact (there is no particle with index=np)
+ dev_contacts[mempos] = devC_np;
+ // Zero sum of shear displacement in this position
+ dev_delta_t[mempos] = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
}
} else {
- // Remove this contact (there is no particle with index=np)
- dev_contacts[mempos] = devC_np;
- // Zero sum of shear displacement in this position
+ __syncthreads();
dev_delta_t[mempos] = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
}
- }
+ } // Contact loop end
+
} else if (devC_shearmodel == 1) {
int3 gridPos;
t@@ -1586,8 +1593,6 @@ __host__ void gpuMain(float4* host_x,
unsigned int* dev_contacts;
// x,y,z contains the interparticle vector, corrected if contact
// is across a periodic boundary.
- // w contains radius of particle b.
- float4* dev_x_ab_r_b;
float4* dev_delta_t; // Accumulated shear distance of contact
// Particle memory size
t@@ -1625,7 +1630,6 @@ __host__ void gpuMain(float4* host_x,
// Particle contact bookkeeping arrays
cudaMalloc((void**)&dev_contacts, sizeof(unsigned int)*p->np*NC); // Max NC contacts per particle
- cudaMalloc((void**)&dev_x_ab_r_b, sizeof(float4)*p->np*NC);
cudaMalloc((void**)&dev_delta_t, sizeof(float4)*p->np*NC);
// Wall arrays
t@@ -1668,7 +1672,6 @@ __host__ void gpuMain(float4* host_x,
float4* zerosf4 = new float4[p->np*NC];
for (unsigned int i=0; i<(p->np*NC); ++i)
zerosf4[i] = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
- cudaMemcpy(dev_x_ab_r_b, zerosf4, sizeof(float4)*p->np*NC, cudaMemcpyHostToDevice);
cudaMemcpy(dev_delta_t, zerosf4, sizeof(float4)*p->np*NC, cudaMemcpyHostToDevice);
delete[] zerosf4;
t@@ -1824,8 +1827,7 @@ __host__ void gpuMain(float4* host_x,
dev_gridParticleIndex,
dev_x_sorted,
dev_radius_sorted,
- dev_contacts,
- dev_x_ab_r_b);
+ dev_contacts);
// Empty cuPrintf() buffer to console
//cudaThreadSynchronize();
t@@ -1836,10 +1838,12 @@ __host__ void gpuMain(float4* host_x,
checkForCudaErrors("Post topology. Possibly caused by numerical instability. Is the computational time step too large?", iter);
}
+
// For each particle: Process collisions and compute resulting forces.
interact<<<dimGrid, dimBlock>>>(dev_gridParticleIndex,
dev_cellStart,
dev_cellEnd,
+ dev_x, dev_radius,
dev_x_sorted, dev_radius_sorted,
dev_vel_sorted, dev_angvel_sorted,
dev_vel, dev_angvel,
t@@ -1847,7 +1851,7 @@ __host__ void gpuMain(float4* host_x,
dev_es_dot, dev_es, dev_p,
dev_w_nx, dev_w_mvfd, dev_w_force,
//dev_bonds_sorted,
- dev_contacts, dev_x_ab_r_b,
+ dev_contacts,
dev_delta_t);
// Empty cuPrintf() buffer to console
t@@ -1858,7 +1862,6 @@ __host__ void gpuMain(float4* host_x,
cudaThreadSynchronize();
checkForCudaErrors("Post interact - often caused if particles move outside the grid", iter);
-
// Update particle kinematics
integrate<<<dimGrid, dimBlock>>>(dev_x_sorted, dev_vel_sorted,
dev_angvel_sorted, dev_radius_sorted,
t@@ -1994,7 +1997,6 @@ __host__ void gpuMain(float4* host_x,
//cudaFree(dev_bonds);
//cudaFree(dev_bonds_sorted);
cudaFree(dev_contacts);
- cudaFree(dev_x_ab_r_b);
cudaFree(dev_delta_t);
// Cell-related arrays