tAdded half-step leapfrog Verlet integration scheme to walls - sphere - GPU-based 3D discrete element method algorithm with optional fluid coupling
(HTM) git clone git://src.adamsgaard.dk/sphere
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) LICENSE
---
(DIR) commit 45640191a4d4d51119d37baa4063ee631b342f89
(DIR) parent 5379ed77ad3917674c12f19a303e21aed69d8cd1
(HTM) Author: Anders Damsgaard <adc@geo.au.dk>
Date: Thu, 29 Nov 2012 10:00:57 +0100
Added half-step leapfrog Verlet integration scheme to walls
Diffstat:
M src/device.cu | 10 +++++++++-
M src/integration.cuh | 16 ++++++++++++++--
M src/sphere.h | 1 +
3 files changed, 24 insertions(+), 3 deletions(-)
---
(DIR) diff --git a/src/device.cu b/src/device.cu
t@@ -283,6 +283,7 @@ __host__ void DEM::allocateGlobalDeviceMemory(void)
cudaMalloc((void**)&dev_walls_nx, sizeof(Float4)*walls.nw);
cudaMalloc((void**)&dev_walls_mvfd, sizeof(Float4)*walls.nw);
cudaMalloc((void**)&dev_walls_force_pp, sizeof(Float)*walls.nw*np);
+ cudaMalloc((void**)&dev_walls_vel0, sizeof(Float)*walls.nw);
// dev_walls_force_partial allocated later
checkForCudaErrors("End of allocateGlobalDeviceMemory");
t@@ -331,6 +332,7 @@ __host__ void DEM::freeGlobalDeviceMemory()
cudaFree(dev_walls_mvfd);
cudaFree(dev_walls_force_partial);
cudaFree(dev_walls_force_pp);
+ cudaFree(dev_walls_vel0);
checkForCudaErrors("During cudaFree calls");
if (verbose == 1)
t@@ -399,6 +401,10 @@ __host__ void DEM::transferToGlobalDeviceMemory()
sizeof(Float4)*walls.nw, cudaMemcpyHostToDevice);
cudaMemcpy( dev_walls_mvfd, walls.mvfd,
sizeof(Float4)*walls.nw, cudaMemcpyHostToDevice);
+ for (int i = 0; i<walls.nw; ++i) {
+ cudaMemcpy( &dev_walls_vel0[i], &walls.mvfd[i].y,
+ sizeof(Float), cudaMemcpyHostToDevice);
+ }
checkForCudaErrors("End of transferToGlobalDeviceMemory");
if (verbose == 1)
t@@ -736,10 +742,12 @@ __host__ void DEM::startTime()
if (PROFILING == 1)
startTimer(&kernel_tic);
if (walls.nw > 0) {
- integrateWalls<<< 1, walls.nw>>>(dev_walls_nx,
+ integrateWalls<<< 1, walls.nw>>>(
+ dev_walls_nx,
dev_walls_mvfd,
dev_walls_wmode,
dev_walls_force_partial,
+ dev_walls_vel0,
blocksPerGrid);
}
(DIR) diff --git a/src/integration.cuh b/src/integration.cuh
t@@ -225,6 +225,7 @@ __global__ void integrateWalls(Float4* dev_walls_nx,
Float4* dev_walls_mvfd,
int* dev_walls_wmode,
Float* dev_walls_force_partial,
+ Float* dev_walls_vel0,
unsigned int blocksPerGrid)
{
unsigned int idx = threadIdx.x + blockIdx.x * blockDim.x; // Thread id
t@@ -236,6 +237,7 @@ __global__ void integrateWalls(Float4* dev_walls_nx,
Float4 w_nx = dev_walls_nx[idx];
Float4 w_mvfd = dev_walls_mvfd[idx];
int wmode = dev_walls_wmode[idx]; // Wall BC, 0: fixed, 1: devs, 2: vel
+ Float vel0 = dev_walls_vel0[idx];
Float acc;
t@@ -263,20 +265,30 @@ __global__ void integrateWalls(Float4* dev_walls_nx,
acc = 0.0;
}
+ //// Half-step leapfrog Verlet integration scheme ////
+
+ // Update half-step velocity
+ vel0 += acc * dt;
+
// Update position. Second-order scheme based on Taylor expansion
- w_nx.w += w_mvfd.y * dt + (acc * dt*dt)/2.0;
+ //w_nx.w += w_mvfd.y * dt + (acc * dt*dt)/2.0;
+
+ // Update position
+ w_nx.w += vel0 * dt;
// Update position. First-order Euler integration scheme
//w_nx.w += w_mvfd.y * dt;
// Update linear velocity
- w_mvfd.y += acc * dt;
+ //w_mvfd.y += acc * dt;
+ w_mvfd.y += vel0 + 0.5 * acc * dt;
//cuPrintf("\nwall %d, wmode = %d, force = %f, acc = %f\n", idx, wmode, w_mvfd.z, acc);
// Store data in global memory
dev_walls_nx[idx] = w_nx;
dev_walls_mvfd[idx] = w_mvfd;
+ dev_walls_vel0[idx] = vel0;
}
} // End of integrateWalls(...)
(DIR) diff --git a/src/sphere.h b/src/sphere.h
t@@ -82,6 +82,7 @@ class DEM {
Float4 *dev_walls_mvfd; // Mass, velocity, force, dev. stress
Float *dev_walls_force_partial; // Pre-sum per wall
Float *dev_walls_force_pp; // Force per particle per wall
+ Float *dev_walls_vel0; // Half-step velocity
unsigned char *dev_img;
float4 *dev_ray_origo; // Ray data always single precision
float4 *dev_ray_direction;