mx1.adamsgaard.dk

       tAdded half-step leapfrog Verlet integration scheme to walls - sphere - GPU-based 3D discrete element method algorithm with optional fluid coupling
 (HTM) git clone git://src.adamsgaard.dk/sphere
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) LICENSE
       ---
 (DIR) commit 45640191a4d4d51119d37baa4063ee631b342f89
 (DIR) parent 5379ed77ad3917674c12f19a303e21aed69d8cd1
 (HTM) Author: Anders Damsgaard <adc@geo.au.dk>
       Date:   Thu, 29 Nov 2012 10:00:57 +0100
       
       Added half-step leapfrog Verlet integration scheme to walls
       
       Diffstat:
         M src/device.cu                       |      10 +++++++++-
         M src/integration.cuh                 |      16 ++++++++++++++--
         M src/sphere.h                        |       1 +
       
       3 files changed, 24 insertions(+), 3 deletions(-)
       ---
 (DIR) diff --git a/src/device.cu b/src/device.cu
       t@@ -283,6 +283,7 @@ __host__ void DEM::allocateGlobalDeviceMemory(void)
            cudaMalloc((void**)&dev_walls_nx, sizeof(Float4)*walls.nw);
            cudaMalloc((void**)&dev_walls_mvfd, sizeof(Float4)*walls.nw);
            cudaMalloc((void**)&dev_walls_force_pp, sizeof(Float)*walls.nw*np);
       +    cudaMalloc((void**)&dev_walls_vel0, sizeof(Float)*walls.nw);
            // dev_walls_force_partial allocated later
        
            checkForCudaErrors("End of allocateGlobalDeviceMemory");
       t@@ -331,6 +332,7 @@ __host__ void DEM::freeGlobalDeviceMemory()
            cudaFree(dev_walls_mvfd);
            cudaFree(dev_walls_force_partial);
            cudaFree(dev_walls_force_pp);
       +    cudaFree(dev_walls_vel0);
            checkForCudaErrors("During cudaFree calls");
        
            if (verbose == 1)
       t@@ -399,6 +401,10 @@ __host__ void DEM::transferToGlobalDeviceMemory()
                    sizeof(Float4)*walls.nw, cudaMemcpyHostToDevice);
            cudaMemcpy( dev_walls_mvfd,  walls.mvfd,
                    sizeof(Float4)*walls.nw, cudaMemcpyHostToDevice);
       +    for (int i = 0; i<walls.nw; ++i) {
       +        cudaMemcpy( &dev_walls_vel0[i], &walls.mvfd[i].y,
       +                sizeof(Float), cudaMemcpyHostToDevice);
       +    }
        
            checkForCudaErrors("End of transferToGlobalDeviceMemory");
            if (verbose == 1)
       t@@ -736,10 +742,12 @@ __host__ void DEM::startTime()
                if (PROFILING == 1)
                    startTimer(&kernel_tic);
                if (walls.nw > 0) {
       -            integrateWalls<<< 1, walls.nw>>>(dev_walls_nx,
       +            integrateWalls<<< 1, walls.nw>>>(
       +                    dev_walls_nx,
                            dev_walls_mvfd,
                            dev_walls_wmode,
                            dev_walls_force_partial,
       +                    dev_walls_vel0,
                            blocksPerGrid);
                }
        
 (DIR) diff --git a/src/integration.cuh b/src/integration.cuh
       t@@ -225,6 +225,7 @@ __global__ void integrateWalls(Float4* dev_walls_nx,
                Float4* dev_walls_mvfd,
                int* dev_walls_wmode,
                Float* dev_walls_force_partial,
       +        Float* dev_walls_vel0,
                unsigned int blocksPerGrid)
        {
            unsigned int idx = threadIdx.x + blockIdx.x * blockDim.x; // Thread id
       t@@ -236,6 +237,7 @@ __global__ void integrateWalls(Float4* dev_walls_nx,
                Float4 w_nx   = dev_walls_nx[idx];
                Float4 w_mvfd = dev_walls_mvfd[idx];
                int wmode = dev_walls_wmode[idx];  // Wall BC, 0: fixed, 1: devs, 2: vel
       +        Float vel0 = dev_walls_vel0[idx];
                Float acc;
        
        
       t@@ -263,20 +265,30 @@ __global__ void integrateWalls(Float4* dev_walls_nx,
                    acc = 0.0;
                }
        
       +        //// Half-step leapfrog Verlet integration scheme ////
       +        
       +        // Update half-step velocity
       +        vel0 += acc * dt;
       +
                // Update position. Second-order scheme based on Taylor expansion 
       -        w_nx.w += w_mvfd.y * dt + (acc * dt*dt)/2.0;
       +        //w_nx.w += w_mvfd.y * dt + (acc * dt*dt)/2.0;
       +
       +        // Update position
       +        w_nx.w += vel0 * dt;
        
                // Update position. First-order Euler integration scheme
                //w_nx.w += w_mvfd.y * dt;
        
                // Update linear velocity
       -        w_mvfd.y += acc * dt;
       +        //w_mvfd.y += acc * dt;
       +        w_mvfd.y += vel0 + 0.5 * acc * dt;
        
                //cuPrintf("\nwall %d, wmode = %d, force = %f, acc = %f\n", idx, wmode, w_mvfd.z, acc);
        
                // Store data in global memory
                dev_walls_nx[idx]   = w_nx;
                dev_walls_mvfd[idx] = w_mvfd;
       +        dev_walls_vel0[idx] = vel0;
            }
        } // End of integrateWalls(...)
        
 (DIR) diff --git a/src/sphere.h b/src/sphere.h
       t@@ -82,6 +82,7 @@ class DEM {
                Float4 *dev_walls_mvfd; // Mass, velocity, force, dev. stress
                Float *dev_walls_force_partial; // Pre-sum per wall
                Float *dev_walls_force_pp; // Force per particle per wall
       +        Float *dev_walls_vel0; // Half-step velocity
                unsigned char *dev_img;
                float4 *dev_ray_origo;        // Ray data always single precision
                float4 *dev_ray_direction;