tHalf-step leapfrog Verlet integration scheme added - sphere - GPU-based 3D discrete element method algorithm with optional fluid coupling
(HTM) git clone git://src.adamsgaard.dk/sphere
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) LICENSE
---
(DIR) commit a8f9bb4ed81e686acb8e1a51b4ba60fedcf03dae
(DIR) parent 68987a0bcf02ed1b43919109acc000099694f787
(HTM) Author: Anders Damsgaard Christensen <adc@geo.au.dk>
Date: Tue, 13 Nov 2012 20:36:03 +0100
Half-step leapfrog Verlet integration scheme added
Diffstat:
M src/device.cu | 10 ++++++++++
M src/integration.cuh | 154 +++++++++++++++++++------------
M src/sphere.h | 2 ++
3 files changed, 109 insertions(+), 57 deletions(-)
---
(DIR) diff --git a/src/device.cu b/src/device.cu
t@@ -237,10 +237,12 @@ __host__ void DEM::allocateGlobalDeviceMemory(void)
cudaMalloc((void**)&dev_x, memSizeF4);
cudaMalloc((void**)&dev_xysum, memSizeF4);
cudaMalloc((void**)&dev_vel, memSizeF4);
+ cudaMalloc((void**)&dev_vel0, memSizeF4);
cudaMalloc((void**)&dev_acc, memSizeF4);
cudaMalloc((void**)&dev_force, memSizeF4);
cudaMalloc((void**)&dev_angpos, memSizeF4);
cudaMalloc((void**)&dev_angvel, memSizeF4);
+ cudaMalloc((void**)&dev_angvel0, memSizeF4);
cudaMalloc((void**)&dev_angacc, memSizeF4);
cudaMalloc((void**)&dev_torque, memSizeF4);
t@@ -296,10 +298,12 @@ __host__ void DEM::freeGlobalDeviceMemory()
cudaFree(dev_x);
cudaFree(dev_xysum);
cudaFree(dev_vel);
+ cudaFree(dev_vel0);
cudaFree(dev_acc);
cudaFree(dev_force);
cudaFree(dev_angpos);
cudaFree(dev_angvel);
+ cudaFree(dev_angvel0);
cudaFree(dev_angacc);
cudaFree(dev_torque);
t@@ -352,6 +356,8 @@ __host__ void DEM::transferToGlobalDeviceMemory()
sizeof(Float2)*np, cudaMemcpyHostToDevice);
cudaMemcpy( dev_vel, k.vel,
memSizeF4, cudaMemcpyHostToDevice);
+ cudaMemcpy( dev_vel0, k.vel,
+ memSizeF4, cudaMemcpyHostToDevice);
cudaMemcpy( dev_acc, k.acc,
memSizeF4, cudaMemcpyHostToDevice);
cudaMemcpy( dev_force, k.force,
t@@ -360,6 +366,8 @@ __host__ void DEM::transferToGlobalDeviceMemory()
memSizeF4, cudaMemcpyHostToDevice);
cudaMemcpy( dev_angvel, k.angvel,
memSizeF4, cudaMemcpyHostToDevice);
+ cudaMemcpy( dev_angvel0, k.angvel,
+ memSizeF4, cudaMemcpyHostToDevice);
cudaMemcpy( dev_angacc, k.angacc,
memSizeF4, cudaMemcpyHostToDevice);
cudaMemcpy( dev_torque, k.torque,
t@@ -695,6 +703,8 @@ __host__ void DEM::startTime()
dev_angpos,
dev_acc,
dev_angacc,
+ dev_vel0,
+ dev_angvel0,
dev_xysum,
dev_gridParticleIndex);
cudaThreadSynchronize();
(DIR) diff --git a/src/integration.cuh b/src/integration.cuh
t@@ -11,6 +11,7 @@ __global__ void integrate(Float4* dev_x_sorted, Float4* dev_vel_sorted, // Input
Float4* dev_x, Float4* dev_vel, Float4* dev_angvel, // Output
Float4* dev_force, Float4* dev_torque, Float4* dev_angpos, // Input
Float4* dev_acc, Float4* dev_angacc,
+ Float4* dev_vel0, Float4* dev_angvel0,
Float2* dev_xysum,
unsigned int* dev_gridParticleIndex) // Input: Sorted-Unsorted key
{
t@@ -21,15 +22,17 @@ __global__ void integrate(Float4* dev_x_sorted, Float4* dev_vel_sorted, // Input
// Copy data to temporary arrays to avoid any potential read-after-write,
// write-after-read, or write-after-write hazards.
unsigned int orig_idx = dev_gridParticleIndex[idx];
- Float4 force = dev_force[orig_idx];
- Float4 torque = dev_torque[orig_idx];
- Float4 angpos = dev_angpos[orig_idx];
- Float4 acc = dev_acc[orig_idx];
- Float4 angacc = dev_angacc[orig_idx];
- Float4 x = dev_x_sorted[idx];
- Float4 vel = dev_vel_sorted[idx];
- Float4 angvel = dev_angvel_sorted[idx];
- Float radius = x.w;
+ Float4 force = dev_force[orig_idx];
+ Float4 torque = dev_torque[orig_idx];
+ Float4 angpos = dev_angpos[orig_idx];
+ Float4 acc = dev_acc[orig_idx];
+ Float4 angacc = dev_angacc[orig_idx];
+ Float4 vel0 = dev_vel0[orig_idx];
+ Float4 angvel0 = dev_angvel0[orig_idx];
+ Float4 x = dev_x_sorted[idx];
+ Float4 vel = dev_vel_sorted[idx];
+ Float4 angvel = dev_angvel_sorted[idx];
+ Float radius = x.w;
Float2 xysum = MAKE_FLOAT2(0.0f, 0.0f);
t@@ -42,47 +45,6 @@ __global__ void integrate(Float4* dev_x_sorted, Float4* dev_vel_sorted, // Input
// Particle mass
Float m = 4.0/3.0 * PI * radius*radius*radius * rho;
-#if 0
- //// First-order Euler integration scheme ///
- // Update angular position
- angpos.x += angvel.x * dt;
- angpos.y += angvel.y * dt;
- angpos.z += angvel.z * dt;
-
- // Update position
- x.x += vel.x * dt;
- x.y += vel.y * dt;
- x.z += vel.z * dt;
-#else
-
- /// Second-order scheme based on Taylor expansion ///
- // Update angular position
- angpos.x += angvel.x * dt + angacc.x * dt*dt * 0.5;
- angpos.y += angvel.y * dt + angacc.y * dt*dt * 0.5;
- angpos.z += angvel.z * dt + angacc.z * dt*dt * 0.5;
-
- // Update position
- x.x += vel.x * dt + acc.x * dt*dt * 0.5;
- x.y += vel.y * dt + acc.y * dt*dt * 0.5;
- x.z += vel.z * dt + acc.z * dt*dt * 0.5;
-#endif
-
- // Update angular velocity
- angvel.x += angacc.x * dt;
- angvel.y += angacc.y * dt;
- angvel.z += angacc.z * dt;
-
- // Update linear velocity
- vel.x += acc.x * dt;
- vel.y += acc.y * dt;
- vel.z += acc.z * dt;
-
- // Add x-displacement for this time step to
- // sum of x-displacements
- //x.w += vel.x * dt + (acc.x * dt*dt)/2.0f;
- xysum.x += vel.x * dt;
- xysum.y += vel.y * dt;// + (acc.y * dt*dt * 0.5f;
-
// Update linear acceleration of particle
acc.x = force.x / m;
acc.y = force.y / m;
t@@ -132,17 +94,95 @@ __global__ void integrate(Float4* dev_x_sorted, Float4* dev_vel_sorted, // Input
x.x -= L.x;
}
+
+ //// Half-step leapfrog Verlet integration scheme ////
+ // Update half-step linear velocities
+ vel0.x += acc.x * dt;
+ vel0.y += acc.y * dt;
+ vel0.z += acc.z * dt;
+
+ // Update half-step angular velocities
+ angvel0.x += angacc.x * dt;
+ angvel0.y += angacc.y * dt;
+ angvel0.z += angacc.z * dt;
+
+ // Update positions
+ x.x += vel0.x * dt;
+ x.y += vel0.y * dt;
+ x.z += vel0.z * dt;
+
+ // Update angular positions
+ angpos.x += angvel0.x * dt;
+ angpos.y += angvel0.y * dt;
+ angpos.z += angvel0.z * dt;
+
+ // Update full-step linear velocity
+ vel.x = vel0.x + 0.5 * acc.x * dt;
+ vel.y = vel0.y + 0.5 * acc.x * dt;
+ vel.z = vel0.z + 0.5 * acc.x * dt;
+
+ // Update full-step angular velocity
+ angvel.x = angvel0.x + 0.5 * angacc.x * dt;
+ angvel.y = angvel0.y + 0.5 * angacc.x * dt;
+ angvel.z = angvel0.z + 0.5 * angacc.x * dt;
+
+ /*
+ //// First-order Euler integration scheme ///
+ // Update angular position
+ angpos.x += angvel.x * dt;
+ angpos.y += angvel.y * dt;
+ angpos.z += angvel.z * dt;
+
+ // Update position
+ x.x += vel.x * dt;
+ x.y += vel.y * dt;
+ x.z += vel.z * dt;
+ */
+
+ /*
+ /// Second-order scheme based on Taylor expansion ///
+ // Update angular position
+ angpos.x += angvel.x * dt + angacc.x * dt*dt * 0.5;
+ angpos.y += angvel.y * dt + angacc.y * dt*dt * 0.5;
+ angpos.z += angvel.z * dt + angacc.z * dt*dt * 0.5;
+
+ // Update position
+ x.x += vel.x * dt + acc.x * dt*dt * 0.5;
+ x.y += vel.y * dt + acc.y * dt*dt * 0.5;
+ x.z += vel.z * dt + acc.z * dt*dt * 0.5;
+ */
+
+ /*
+ // Update angular velocity
+ angvel.x += angacc.x * dt;
+ angvel.y += angacc.y * dt;
+ angvel.z += angacc.z * dt;
+
+ // Update linear velocity
+ vel.x += acc.x * dt;
+ vel.y += acc.y * dt;
+ vel.z += acc.z * dt;
+ */
+
+ // Add x-displacement for this time step to
+ // sum of x-displacements
+ //x.w += vel.x * dt + (acc.x * dt*dt)/2.0f;
+ xysum.x += vel.x * dt;
+ xysum.y += vel.y * dt;// + (acc.y * dt*dt * 0.5f;
+
// Hold threads for coalesced write
__syncthreads();
// Store data in global memory at original, pre-sort positions
- dev_xysum[orig_idx] += xysum;
- dev_acc[orig_idx] = acc;
- dev_angacc[orig_idx] = angacc;
- dev_angvel[orig_idx] = angvel;
- dev_vel[orig_idx] = vel;
- dev_angpos[orig_idx] = angpos;
- dev_x[orig_idx] = x;
+ dev_xysum[orig_idx] += xysum;
+ dev_acc[orig_idx] = acc;
+ dev_angacc[orig_idx] = angacc;
+ dev_angvel[orig_idx] = angvel;
+ dev_angvel0[orig_idx] = angvel0;
+ dev_vel[orig_idx] = vel;
+ dev_vel0[orig_idx] = vel0;
+ dev_angpos[orig_idx] = angpos;
+ dev_x[orig_idx] = x;
}
} // End of integrate(...)
(DIR) diff --git a/src/sphere.h b/src/sphere.h
t@@ -54,10 +54,12 @@ class DEM {
Float4 *dev_x;
Float2 *dev_xysum;
Float4 *dev_vel;
+ Float4 *dev_vel0;
Float4 *dev_acc;
Float4 *dev_force;
Float4 *dev_angpos;
Float4 *dev_angvel;
+ Float4 *dev_angvel0;
Float4 *dev_angacc;
Float4 *dev_torque;
unsigned int *dev_contacts;