tIt compiles without errors. Still need to update sphere.py to new file format - sphere - GPU-based 3D discrete element method algorithm with optional fluid coupling
(HTM) git clone git://src.adamsgaard.dk/sphere
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) LICENSE
---
(DIR) commit c7e1dd22dc1892e430ab40a74fcb8df9de98c9d7
(DIR) parent ad86704c71d456826b62ba4d8b1e23dc9f420467
(HTM) Author: Anders Damsgaard <adc@geo.au.dk>
Date: Sat, 27 Oct 2012 00:37:45 +0200
It compiles without errors. Still need to update sphere.py to new file format
Diffstat:
M src/constants.cuh | 1 +
M src/contactmodels.cuh | 16 ++++++++--------
M src/contactsearch.cuh | 54 +++++++++++++++++--------------
D src/datatypes.cuh | 18 ------------------
M src/device.cu | 220 +++++++++++++++----------------
M src/integration.cuh | 27 +++++++++++++--------------
M src/sorting.cuh | 8 ++++----
M src/sphere.cpp | 16 +++++++++++++++-
M src/sphere.h | 5 ++++-
9 files changed, 181 insertions(+), 184 deletions(-)
---
(DIR) diff --git a/src/constants.cuh b/src/constants.cuh
t@@ -9,6 +9,7 @@
// Constant memory size: 64 kb
__constant__ unsigned int devC_nd; // Number of dimensions
__constant__ unsigned int devC_np; // Number of particles
+__constant__ unsigned int devC_nw; // Number of dynamic walls
__constant__ int devC_nc; // Max. number of contacts a particle can have
__constant__ Float devC_dt; // Computational time step length
(DIR) diff --git a/src/contactmodels.cuh b/src/contactmodels.cuh
t@@ -44,7 +44,7 @@ __device__ Float contactLinear_wall(Float3* F, Float3* T, Float* es_dot,
//Float3 f_n = -devC_params.k_n * delta * n;
// Normal force component: Elastic - viscous damping
- Float3 f_n = (-devC_params.k_n * delta - devC_params.gamma_wn * vel_n) * n;
+ Float3 f_n = (-devC_params.k_n * delta - devC_params.gamma_n * vel_n) * n;
// Make sure the viscous damping doesn't exceed the elastic component,
// i.e. the damping factor doesn't exceed the critical damping, 2*sqrt(m*k_n)
t@@ -61,7 +61,7 @@ __device__ Float contactLinear_wall(Float3* F, Float3* T, Float* es_dot,
// divide by zero (producing a NaN)
if (vel_t_length > 0.f) {
- Float f_t_visc = devC_params.gamma_wt * vel_t_length; // Tangential force by viscous model
+ Float f_t_visc = devC_params.gamma_t * vel_t_length; // Tangential force by viscous model
Float f_t_limit = devC_params.mu_s * f_n_length; // Max. friction
// If the shear force component exceeds the friction,
t@@ -382,15 +382,15 @@ __device__ void contactLinear(Float3* F, Float3* T,
// Shear friction heat production rate:
// The energy lost from the tangential spring is dissipated as heat
//*es_dot += -dot(vel_t_ab, f_t);
- *es_dot += length(delta_t0 - delta_t) * devC_params.k_t / devC_params.dt; // Seen in EsyS-Particle
- //*es_dot += fabs(dot(delta_t0 - delta_t, f_t)) / devC_params.dt;
+ *es_dot += length(delta_t0 - delta_t) * devC_params.k_t / devC_dt; // Seen in EsyS-Particle
+ //*es_dot += fabs(dot(delta_t0 - delta_t, f_t)) / devC_dt;
} else { // Static case
// No correction of f_t is required
// Add tangential displacement to total tangential displacement
- delta_t = delta_t0 + vel_t_ab * devC_params.dt;
+ delta_t = delta_t0 + vel_t_ab * devC_dt;
}
}
t@@ -555,15 +555,15 @@ __device__ void contactHertz(Float3* F, Float3* T,
// Shear friction heat production rate:
// The energy lost from the tangential spring is dissipated as heat
//*es_dot += -dot(vel_t_ab, f_t);
- *es_dot += length(delta_t0 - delta_t) * devC_params.k_t / devC_params.dt; // Seen in EsyS-Particle
- //*es_dot += fabs(dot(delta_t0 - delta_t, f_t)) / devC_params.dt;
+ *es_dot += length(delta_t0 - delta_t) * devC_params.k_t / devC_dt; // Seen in EsyS-Particle
+ //*es_dot += fabs(dot(delta_t0 - delta_t, f_t)) / devC_dt;
} else { // Static case
// No correction of f_t is required
// Add tangential displacement to total tangential displacement
- delta_t = delta_t0 + vel_t_ab * devC_params.dt;
+ delta_t = delta_t0 + vel_t_ab * devC_dt;
}
}
(DIR) diff --git a/src/contactsearch.cuh b/src/contactsearch.cuh
t@@ -14,7 +14,7 @@ __device__ int findDistMod(int3* targetCell, Float3* distmod)
// Check whether x- and y boundaries are to be treated as periodic
// 1: x- and y boundaries periodic
// 2: x boundaries periodic
- if (devC_params.periodic == 1) {
+ if (devC_grid.periodic == 1) {
// Periodic x-boundary
if (targetCell->x < 0) {
t@@ -38,7 +38,7 @@ __device__ int findDistMod(int3* targetCell, Float3* distmod)
// Only x-boundaries are periodic
- } else if (devC_params.periodic == 2) {
+ } else if (devC_grid.periodic == 2) {
// Periodic x-boundary
if (targetCell->x < 0) {
t@@ -77,7 +77,7 @@ __device__ int findDistMod(int3* targetCell, Float3* distmod)
// Find overlaps between particle no. 'idx' and particles in cell 'gridpos'.
// Contacts are processed as soon as they are identified.
-// Used for shearmodel=1, where contact history is not needed.
+// Used for contactmodel=1, where contact history is not needed.
// Kernel executed on device, and callable from device only.
// Function is called from interact().
__device__ void findAndProcessContactsInCell(int3 targetCell,
t@@ -175,7 +175,7 @@ __device__ void findAndProcessContactsInCell(int3 targetCell,
// Find overlaps between particle no. 'idx' and particles in cell 'gridpos'
// Write the indexes of the overlaps in array contacts[].
-// Used for shearmodel=2, where bookkeeping of contact history is necessary.
+// Used for contactmodel=2, where bookkeeping of contact history is necessary.
// Kernel executed on device, and callable from device only.
// Function is called from topology().
__device__ void findContactsInCell(int3 targetCell,
t@@ -257,7 +257,7 @@ __device__ void findContactsInCell(int3 targetCell,
for (int i=0; i < devC_nc; ++i) {
__syncthreads();
cidx = dev_contacts[(unsigned int)(idx_a_orig*devC_nc+i)];
- if (cidx == devC_params.np) // Write to position of now-deleted contact
+ if (cidx == devC_np) // Write to position of now-deleted contact
cpos = i;
else if (cidx == idx_b_orig) { // Write to position of same contact
cpos = i;
t@@ -320,7 +320,7 @@ __global__ void topology(unsigned int* dev_cellStart,
{
// Thread index equals index of particle A
unsigned int idx_a = blockIdx.x * blockDim.x + threadIdx.x;
- if (idx_a < devC_params.np) {
+ if (idx_a < devC_np) {
// Fetch particle data in global read
Float4 x_a = dev_x_sorted[idx_a];
Float radius_a = x_a.w;
t@@ -356,13 +356,13 @@ __global__ void topology(unsigned int* dev_cellStart,
// For a single particle:
-// If shearmodel=1:
+// If contactmodel=1:
// Search for neighbors to particle 'idx' inside the 27 closest cells,
// and compute the resulting force and torque on it.
-// If shearmodel=2:
+// If contactmodel=2:
// Process contacts saved in dev_contacts by topology(), and compute
// the resulting force and torque on it.
-// For all shearmodels:
+// For all contactmodels:
// Collide with top- and bottom walls, save resulting force on upper wall.
// Kernel is executed on device, and is callable from host only.
// Function is called from mainGPU loop.
t@@ -371,15 +371,19 @@ __global__ void interact(unsigned int* dev_gridParticleIndex, // Input: Unsorted
unsigned int* dev_cellEnd,
Float4* dev_x,
Float4* dev_x_sorted,
- Float4* dev_vel_sorted, Float4* dev_angvel_sorted,
- Float4* dev_vel, Float4* dev_angvel,
- Float4* dev_force, Float4* dev_torque,
+ Float4* dev_vel_sorted,
+ Float4* dev_angvel_sorted,
+ Float4* dev_vel,
+ Float4* dev_angvel,
+ Float4* dev_force,
+ Float4* dev_torque,
Float* dev_es_dot,
Float* dev_ev_dot,
Float* dev_es,
Float* dev_ev,
Float* dev_p,
- Float4* dev_w_nx, Float4* dev_w_mvfd,
+ Float4* dev_w_nx,
+ Float4* dev_w_mvfd,
Float* dev_w_force, //uint4* dev_bonds_sorted,
unsigned int* dev_contacts,
Float4* dev_distmod,
t@@ -388,7 +392,7 @@ __global__ void interact(unsigned int* dev_gridParticleIndex, // Input: Unsorted
// Thread index equals index of particle A
unsigned int idx_a = blockIdx.x * blockDim.x + threadIdx.x;
- if (idx_a < devC_params.np) {
+ if (idx_a < devC_np) {
// Fetch particle data in global read
unsigned int idx_a_orig = dev_gridParticleIndex[idx_a];
t@@ -424,7 +428,7 @@ __global__ void interact(unsigned int* dev_gridParticleIndex, // Input: Unsorted
Float3 T = MAKE_FLOAT3(0.0f, 0.0f, 0.0f);
// Apply linear elastic, frictional contact model to registered contacts
- if (devC_params.shearmodel == 2 || devC_params.shearmodel == 3) {
+ if (devC_params.contactmodel == 2 || devC_params.contactmodel == 3) {
unsigned int idx_b_orig, mempos;
Float delta_n, x_ab_length, radius_b;
Float3 x_ab;
t@@ -452,11 +456,11 @@ __global__ void interact(unsigned int* dev_gridParticleIndex, // Input: Unsorted
delta_n = x_ab_length - (radius_a + radius_b);
- if (idx_b_orig != (unsigned int)devC_params.np) {
+ if (idx_b_orig != (unsigned int)devC_np) {
// Process collision if the particles are overlapping
if (delta_n < 0.0f) {
- if (devC_params.shearmodel == 2) {
+ if (devC_params.contactmodel == 2) {
contactLinear(&F, &T, &es_dot, &ev_dot, &p,
idx_a_orig,
idx_b_orig,
t@@ -468,7 +472,7 @@ __global__ void interact(unsigned int* dev_gridParticleIndex, // Input: Unsorted
x_ab, x_ab_length,
delta_n, dev_delta_t,
mempos);
- } else if (devC_params.shearmodel == 3) {
+ } else if (devC_params.contactmodel == 3) {
contactHertz(&F, &T, &es_dot, &ev_dot, &p,
idx_a_orig,
idx_b_orig,
t@@ -484,7 +488,7 @@ __global__ void interact(unsigned int* dev_gridParticleIndex, // Input: Unsorted
} else {
__syncthreads();
// Remove this contact (there is no particle with index=np)
- dev_contacts[mempos] = devC_params.np;
+ dev_contacts[mempos] = devC_np;
// Zero sum of shear displacement in this position
dev_delta_t[mempos] = MAKE_FLOAT4(0.0f, 0.0f, 0.0f, 0.0f);
}
t@@ -498,8 +502,8 @@ __global__ void interact(unsigned int* dev_gridParticleIndex, // Input: Unsorted
// Find contacts and process collisions immidiately for
- // shearmodel 1 (visco-frictional).
- } else if (devC_params.shearmodel == 1) {
+ // contactmodel 1 (visco-frictional).
+ } else if (devC_params.contactmodel == 1) {
int3 gridPos;
int3 targetPos;
t@@ -553,7 +557,7 @@ __global__ void interact(unsigned int* dev_gridParticleIndex, // Input: Unsorted
}
- if (devC_params.periodic == 0) {
+ if (devC_grid.periodic == 0) {
// Left wall
delta_w = x_a.x - radius_a - origo.x;
t@@ -591,7 +595,7 @@ __global__ void interact(unsigned int* dev_gridParticleIndex, // Input: Unsorted
w_n, delta_w, 0.0f);
}
- } else if (devC_params.periodic == 2) {
+ } else if (devC_grid.periodic == 2) {
// Front wall
delta_w = x_a.y - radius_a - origo.y;
t@@ -622,8 +626,8 @@ __global__ void interact(unsigned int* dev_gridParticleIndex, // Input: Unsorted
dev_torque[orig_idx] = MAKE_FLOAT4(T.x, T.y, T.z, 0.0f);
dev_es_dot[orig_idx] = es_dot;
dev_ev_dot[orig_idx] = ev_dot;
- dev_es[orig_idx] += es_dot * devC_params.dt;
- dev_ev[orig_idx] += ev_dot * devC_params.dt;
+ dev_es[orig_idx] += es_dot * devC_dt;
+ dev_ev[orig_idx] += ev_dot * devC_dt;
dev_p[orig_idx] = p;
dev_w_force[orig_idx] = w_force;
}
(DIR) diff --git a/src/datatypes.cuh b/src/datatypes.cuh
t@@ -1,18 +0,0 @@
-// datatypes.cuh -- Device structure templates and function prototypes
-
-// Avoiding multiple inclusions of header file
-#ifndef DATATYPES_CUH_
-#define DATATYPES_CUH_
-
-#include "datatypes.h"
-#include "vector_functions.h"
-
-unsigned int iDivUp(unsigned int a, unsigned int b);
-void checkForCudaErrors(const char* checkpoint_description);
-void checkForCudaErrors(const char* checkpoint_description, const unsigned int iteration);
-
-// Device constant memory structures
-__constant__ Params devC_params;
-__constant__ Grid devC_grid;
-
-#endif
(DIR) diff --git a/src/device.cu b/src/device.cu
t@@ -97,8 +97,7 @@ __global__ void checkConstantValues(int* dev_equal,
// Compare values between global- and constant
// memory structures
- if (dev_grid->nd != devC_grid.nd ||
- dev_grid->origo[0] != devC_grid.origo[0] ||
+ if (dev_grid->origo[0] != devC_grid.origo[0] ||
dev_grid->origo[1] != devC_grid.origo[1] ||
dev_grid->origo[2] != devC_grid.origo[2] ||
dev_grid->L[0] != devC_grid.L[0] ||
t@@ -106,35 +105,28 @@ __global__ void checkConstantValues(int* dev_equal,
dev_grid->L[2] != devC_grid.L[2] ||
dev_grid->num[0] != devC_grid.num[0] ||
dev_grid->num[1] != devC_grid.num[1] ||
- dev_grid->num[2] != devC_grid.num[2])
+ dev_grid->num[2] != devC_grid.num[2] ||
+ dev_grid->periodic != devC_grid.periodic)
*dev_equal = 1; // Not ok
- else if (dev_params->global != devC_params.global ||
- dev_params->g[0] != devC_params.g[0] ||
+
+ else if (dev_params->g[0] != devC_params.g[0] ||
dev_params->g[1] != devC_params.g[1] ||
dev_params->g[2] != devC_params.g[2] ||
- dev_params->dt != devC_params.dt ||
- dev_params->np != devC_params.np ||
- dev_params->nw != devC_params.nw ||
- dev_params->wmode[0] != devC_params.wmode[0] ||
dev_params->k_n != devC_params.k_n ||
dev_params->k_t != devC_params.k_t ||
dev_params->k_r != devC_params.k_r ||
dev_params->gamma_n != devC_params.gamma_n ||
dev_params->gamma_t != devC_params.gamma_t ||
dev_params->gamma_r != devC_params.gamma_r ||
- dev_params->gamma_wn != devC_params.gamma_wn ||
- dev_params->gamma_wt != devC_params.gamma_wt ||
- dev_params->gamma_wr != devC_params.gamma_wr ||
dev_params->mu_s != devC_params.mu_s ||
dev_params->mu_d != devC_params.mu_d ||
dev_params->mu_r != devC_params.mu_r ||
dev_params->rho != devC_params.rho ||
+ dev_params->contactmodel != devC_params.contactmodel ||
dev_params->kappa != devC_params.kappa ||
dev_params->db != devC_params.db ||
- dev_params->V_b != devC_params.V_b ||
- dev_params->periodic != devC_params.periodic ||
- dev_params->shearmodel != devC_params.shearmodel)
+ dev_params->V_b != devC_params.V_b)
*dev_equal = 2; // Not ok
}
t@@ -155,8 +147,8 @@ __host__ void DEM::checkConstantMemory()
cudaMalloc((void**)&dev_params, sizeof(Params));
// Copy structure data from host to global device memory
- cudaMemcpy(dev_grid, grid, sizeof(Grid), cudaMemcpyHostToDevice);
- cudaMemcpy(dev_params, params, sizeof(Params), cudaMemcpyHostToDevice);
+ cudaMemcpy(dev_grid, &grid, sizeof(Grid), cudaMemcpyHostToDevice);
+ cudaMemcpy(dev_params, ¶ms, sizeof(Params), cudaMemcpyHostToDevice);
// Compare values between global and constant memory
// structures on the device.
t@@ -197,17 +189,18 @@ __host__ void DEM::transferToConstantDeviceMemory()
cudaMemcpyToSymbol("devC_nd", &nd, sizeof(nd));
cudaMemcpyToSymbol("devC_np", &np, sizeof(np));
+ cudaMemcpyToSymbol("devC_nw", &walls.nw, sizeof(unsigned int));
cudaMemcpyToSymbol("devC_nc", &NC, sizeof(int));
cudaMemcpyToSymbol("devC_dt", &time.dt, sizeof(Float));
- cudaMemcpyToSymbol(devC_grid, grid, sizeof(Grid));
- cudaMemcpyToSymbol(devC_params, params, sizeof(Params));
+ cudaMemcpyToSymbol(devC_grid, &grid, sizeof(Grid));
+ cudaMemcpyToSymbol(devC_params, ¶ms, sizeof(Params));
checkForCudaErrors("After transferring to device constant memory");
if (verbose == 1)
cout << "Done\n";
- //checkConstantMemory();
+ checkConstantMemory();
}
t@@ -223,35 +216,35 @@ __host__ void DEM::allocateGlobalDeviceMemory(void)
std::cout << " Allocating device memory: ";
// Particle arrays
- cudaMalloc((void**)&dev_k.x, memSizeF4);
- cudaMalloc((void**)&dev_sort.x_sorted, memSizeF4);
- cudaMalloc((void**)&dev_k.vel, memSizeF4);
- cudaMalloc((void**)&dev_sort.vel_sorted, memSizeF4);
- cudaMalloc((void**)&dev_k.angvel, memSizeF4);
- cudaMalloc((void**)&dev_sort.angvel_sorted, memSizeF4);
- cudaMalloc((void**)&dev_k.acc, memSizeF4);
+ cudaMalloc((void**)&dev_k->x, memSizeF4);
+ cudaMalloc((void**)&dev_sort->x_sorted, memSizeF4);
+ cudaMalloc((void**)&dev_k->vel, memSizeF4);
+ cudaMalloc((void**)&dev_sort->vel_sorted, memSizeF4);
+ cudaMalloc((void**)&dev_k->angvel, memSizeF4);
+ cudaMalloc((void**)&dev_sort->angvel_sorted, memSizeF4);
+ cudaMalloc((void**)&dev_k->acc, memSizeF4);
k.acc = new Float4[np];
- cudaMalloc((void**)&dev_k.angacc, memSizeF4);
+ cudaMalloc((void**)&dev_k->angacc, memSizeF4);
k.angacc = new Float4[np];
- cudaMalloc((void**)&dev_k.force, memSizeF4);
- cudaMalloc((void**)&dev_k.torque, memSizeF4);
- cudaMalloc((void**)&dev_k.angpos, memSizeF4);
- cudaMalloc((void**)&dev_e.es_dot, memSizeF);
- cudaMalloc((void**)&dev_e.ev_dot, memSizeF);
- cudaMalloc((void**)&dev_e.es, memSizeF);
- cudaMalloc((void**)&dev_e.ev, memSizeF);
- cudaMalloc((void**)&dev_e.p, memSizeF);
+ cudaMalloc((void**)&dev_k->force, memSizeF4);
+ cudaMalloc((void**)&dev_k->torque, memSizeF4);
+ cudaMalloc((void**)&dev_k->angpos, memSizeF4);
+ cudaMalloc((void**)&dev_e->es_dot, memSizeF);
+ cudaMalloc((void**)&dev_e->ev_dot, memSizeF);
+ cudaMalloc((void**)&dev_e->es, memSizeF);
+ cudaMalloc((void**)&dev_e->ev, memSizeF);
+ cudaMalloc((void**)&dev_e->p, memSizeF);
// Cell-related arrays
- cudaMalloc((void**)&dev_sort.gridParticleCellID, sizeof(unsigned int)*np);
- cudaMalloc((void**)&dev_sort.gridParticleIndex, sizeof(unsigned int)*np);
- cudaMalloc((void**)&dev_sort.cellStart, sizeof(unsigned int)*grid.num[0]*grid.num[1]*grid.num[2]);
- cudaMalloc((void**)&dev_sort.cellEnd, sizeof(unsigned int)*grid.num[0]*grid.num[1]*grid.num[2]);
+ cudaMalloc((void**)&dev_sort->gridParticleCellID, sizeof(unsigned int)*np);
+ cudaMalloc((void**)&dev_sort->gridParticleIndex, sizeof(unsigned int)*np);
+ cudaMalloc((void**)&dev_sort->cellStart, sizeof(unsigned int)*grid.num[0]*grid.num[1]*grid.num[2]);
+ cudaMalloc((void**)&dev_sort->cellEnd, sizeof(unsigned int)*grid.num[0]*grid.num[1]*grid.num[2]);
// Particle contact bookkeeping arrays
- cudaMalloc((void**)&dev_k.contacts, sizeof(unsigned int)*np*NC); // Max NC contacts per particle
- cudaMalloc((void**)&dev_k.distmod, sizeof(Float4)*np*NC);
- cudaMalloc((void**)&dev_k.delta_t, sizeof(Float4)*np*NC);
+ cudaMalloc((void**)&dev_k->contacts, sizeof(unsigned int)*np*NC); // Max NC contacts per particle
+ cudaMalloc((void**)&dev_k->distmod, sizeof(Float4)*np*NC);
+ cudaMalloc((void**)&dev_k->delta_t, sizeof(Float4)*np*NC);
// Host contact bookkeeping arrays
k.contacts = new unsigned int[np*NC];
t@@ -262,9 +255,9 @@ __host__ void DEM::allocateGlobalDeviceMemory(void)
k.delta_t = new Float4[np*NC];
// Wall arrays
- cudaMalloc((void**)&dev_walls.nx, sizeof(Float4)*walls.nw);
- cudaMalloc((void**)&dev_walls.mvfd, sizeof(Float4)*walls.nw);
- cudaMalloc((void**)&dev_walls.force, sizeof(Float)*walls.nw*np);
+ cudaMalloc((void**)&dev_walls->nx, sizeof(Float4)*walls.nw);
+ cudaMalloc((void**)&dev_walls->mvfd, sizeof(Float4)*walls.nw);
+ cudaMalloc((void**)&dev_walls->force, sizeof(Float)*walls.nw*np);
// dev_w_force_partial allocated later
checkForCudaErrors("End of allocateGlobalDeviceMemory");
t@@ -306,7 +299,7 @@ __host__ void DEM::freeGlobalDeviceMemory()
cudaFree(dev_walls->nx);
cudaFree(dev_walls->mvfd);
cudaFree(dev_walls->force);
- cudaFree(dev_w_force_partial);
+ //cudaFree(dev_w_force_partial);
if (verbose == 1)
printf("Done\n");
t@@ -323,19 +316,19 @@ __host__ void DEM::transferToGlobalDeviceMemory()
cudaMemcpy(dev_e, e, sizeof(Energies), cudaMemcpyHostToDevice);
cudaMemcpy(dev_time, time, sizeof(Time), cudaMemcpyHostToDevice);
cudaMemcpy(dev_walls, walls, sizeof(Walls), cudaMemcpyHostToDevice);*/
- cudaMemcpy(dev_k, k, sizeof(k), cudaMemcpyHostToDevice);
- cudaMemcpy(dev_e, e, sizeof(e), cudaMemcpyHostToDevice);
- cudaMemcpy(dev_time, time, sizeof(time), cudaMemcpyHostToDevice);
- cudaMemcpy(dev_walls, walls, sizeof(walls), cudaMemcpyHostToDevice);
+ cudaMemcpy(dev_k, &k, sizeof(k), cudaMemcpyHostToDevice);
+ cudaMemcpy(dev_e, &e, sizeof(e), cudaMemcpyHostToDevice);
+ cudaMemcpy(dev_time, &time, sizeof(time), cudaMemcpyHostToDevice);
+ cudaMemcpy(dev_walls, &walls, sizeof(walls), cudaMemcpyHostToDevice);
checkForCudaErrors("End of transferToGlobalDeviceMemory");
if (verbose == 1)
std::cout << "Done\n";
}
-__host__ void DEM::transferToGlobalDeviceMemory()
+__host__ void DEM::transferFromGlobalDeviceMemory()
{
- cout << " Transfering data to the device: ";
+ std::cout << " Transfering data to the device: ";
// Copy structure data from host to global device memory
cudaMemcpy(&k, dev_k, sizeof(k), cudaMemcpyDeviceToHost);
t@@ -409,7 +402,7 @@ __host__ void DEM::startTime()
long iter = 0;
// Create first status.dat
- sprintf(file,"output/%s.status.dat", inputbin);
+ sprintf(file,"output/%s.status.dat", sid);
fp = fopen(file, "w");
fprintf(fp,"%2.4e %2.4e %d\n",
time.current,
t@@ -418,7 +411,7 @@ __host__ void DEM::startTime()
fclose(fp);
// Write first output data file: output0.bin, thus testing writing of bin files
- sprintf(file,"output/%s.output0.bin", inputbin);
+ sprintf(file,"output/%s.output0.bin", sid);
writebin(file);
if (verbose == 1) {
t@@ -426,7 +419,7 @@ __host__ void DEM::startTime()
<< " IMPORTANT: Do not close this terminal, doing so will \n"
<< " terminate this SPHERE process. Follow the \n"
<< " progress by executing:\n"
- << " $ ./sphere_status " << inputbin << "\n\n";
+ << " $ ./sphere_status " << sid << "\n\n";
}
t@@ -475,9 +468,9 @@ __host__ void DEM::startTime()
// in the fine, uniform and homogenous grid.
if (PROFILING == 1)
startTimer(&kernel_tic);
- calcParticleCellID<<<dimGrid, dimBlock>>>(dev_sort.gridParticleCellID,
- dev_sort.gridParticleIndex,
- dev_k.x);
+ calcParticleCellID<<<dimGrid, dimBlock>>>(dev_sort->gridParticleCellID,
+ dev_sort->gridParticleIndex,
+ dev_k->x);
// Synchronization point
cudaThreadSynchronize();
t@@ -489,9 +482,9 @@ __host__ void DEM::startTime()
// Sort particle (key, particle ID) pairs by hash key with Thrust radix sort
if (PROFILING == 1)
startTimer(&kernel_tic);
- thrust::sort_by_key(thrust::device_ptr<uint>(dev_sort.gridParticleCellID),
- thrust::device_ptr<uint>(dev_sort.gridParticleCellID + np),
- thrust::device_ptr<uint>(dev_sort.gridParticleIndex));
+ thrust::sort_by_key(thrust::device_ptr<uint>(dev_sort->gridParticleCellID),
+ thrust::device_ptr<uint>(dev_sort->gridParticleCellID + np),
+ thrust::device_ptr<uint>(dev_sort->gridParticleIndex));
cudaThreadSynchronize(); // Needed? Does thrust synchronize threads implicitly?
if (PROFILING == 1)
stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed, &t_thrustsort);
t@@ -501,7 +494,7 @@ __host__ void DEM::startTime()
// Zero cell array values by setting cellStart to its highest possible value,
// specified with pointer value 0xffffffff, which for a 32 bit unsigned int
// is 4294967295.
- cudaMemset(dev_sort.cellStart, 0xffffffff,
+ cudaMemset(dev_sort->cellStart, 0xffffffff,
grid.num[0]*grid.num[1]*grid.num[2]*sizeof(unsigned int));
cudaThreadSynchronize();
checkForCudaErrors("Post cudaMemset");
t@@ -510,15 +503,15 @@ __host__ void DEM::startTime()
// coherent memory access. Save ordered configurations in new arrays (*_sorted).
if (PROFILING == 1)
startTimer(&kernel_tic);
- reorderArrays<<<dimGrid, dimBlock, smemSize>>>(dev_sort.cellStart,
- dev_sort.cellEnd,
- dev_sort.gridParticleCellID,
- dev_sort.gridParticleIndex,
- dev_k.x, dev_k.vel,
- dev_k.angvel,
- dev_sort.x_sorted,
- dev_sort.vel_sorted,
- dev_sort.angvel_sorted);
+ reorderArrays<<<dimGrid, dimBlock, smemSize>>>(dev_sort->cellStart,
+ dev_sort->cellEnd,
+ dev_sort->gridParticleCellID,
+ dev_sort->gridParticleIndex,
+ dev_k->x, dev_k->vel,
+ dev_k->angvel,
+ dev_sort->x_sorted,
+ dev_sort->vel_sorted,
+ dev_sort->angvel_sorted);
// Synchronization point
cudaThreadSynchronize();
t@@ -533,12 +526,12 @@ __host__ void DEM::startTime()
// For each particle: Search contacts in neighbor cells
if (PROFILING == 1)
startTimer(&kernel_tic);
- topology<<<dimGrid, dimBlock>>>(dev_sort.cellStart,
- dev_sort.cellEnd,
- dev_sort.gridParticleIndex,
- dev_sort.x_sorted,
- dev_k.contacts,
- dev_k.distmod);
+ topology<<<dimGrid, dimBlock>>>(dev_sort->cellStart,
+ dev_sort->cellEnd,
+ dev_sort->gridParticleIndex,
+ dev_sort->x_sorted,
+ dev_k->contacts,
+ dev_k->distmod);
// Synchronization point
t@@ -552,26 +545,28 @@ __host__ void DEM::startTime()
// For each particle: Process collisions and compute resulting forces.
if (PROFILING == 1)
startTimer(&kernel_tic);
- interact<<<dimGrid, dimBlock>>>(dev_sort.gridParticleIndex,
- dev_sort.cellStart,
- dev_sort.cellEnd,
- dev_k.x,
- dev_sort.x_sorted,
- dev_sort.vel_sorted,
- dev_sort.angvel_sorted,
- dev_k.vel,
- dev_k.angvel,
- dev_k.force,
- dev_k.torque,
- dev_e.es_dot,
- dev_e.ev_dot,
- dev_e.es, dev_e.ev, dev_e.p,
- dev_walls.nx,
- dev_walls.mvfd,
- dev_walls.force,
- dev_k.contacts,
- dev_k.distmod,
- dev_k.delta_t);
+ interact<<<dimGrid, dimBlock>>>(dev_sort->gridParticleIndex,
+ dev_sort->cellStart,
+ dev_sort->cellEnd,
+ dev_k->x,
+ dev_sort->x_sorted,
+ dev_sort->vel_sorted,
+ dev_sort->angvel_sorted,
+ dev_k->vel,
+ dev_k->angvel,
+ dev_k->force,
+ dev_k->torque,
+ dev_e->es_dot,
+ dev_e->ev_dot,
+ dev_e->es,
+ dev_e->ev,
+ dev_e->p,
+ dev_walls->nx,
+ dev_walls->mvfd,
+ dev_walls->force,
+ dev_k->contacts,
+ dev_k->distmod,
+ dev_k->delta_t);
// Synchronization point
t@@ -583,16 +578,16 @@ __host__ void DEM::startTime()
// Update particle kinematics
if (PROFILING == 1)
startTimer(&kernel_tic);
- integrate<<<dimGrid, dimBlock>>>(dev_sort.x_sorted,
- dev_sort.vel_sorted,
- dev_sort.angvel_sorted,
- dev_k.x,
- dev_k.vel,
- dev_k.angvel,
- dev_k.force,
- dev_k.torque,
- dev_k.angpos,
- dev_sort.gridParticleIndex);
+ integrate<<<dimGrid, dimBlock>>>(dev_sort->x_sorted,
+ dev_sort->vel_sorted,
+ dev_sort->angvel_sorted,
+ dev_k->x,
+ dev_k->vel,
+ dev_k->angvel,
+ dev_k->force,
+ dev_k->torque,
+ dev_k->angpos,
+ dev_sort->gridParticleIndex);
cudaThreadSynchronize();
if (PROFILING == 1)
t@@ -601,7 +596,7 @@ __host__ void DEM::startTime()
// Summation of forces on wall
if (PROFILING == 1)
startTimer(&kernel_tic);
- summation<<<dimGrid, dimBlock>>>(dev_walls.force, dev_w_force_partial);
+ summation<<<dimGrid, dimBlock>>>(dev_walls->force, dev_w_force_partial);
// Synchronization point
cudaThreadSynchronize();
t@@ -612,8 +607,7 @@ __host__ void DEM::startTime()
// Update wall kinematics
if (PROFILING == 1)
startTimer(&kernel_tic);
- integrateWalls<<< 1, walls.nw>>>(dev_walls.nx,
- dev_walls.mvfd,
+ integrateWalls<<< 1, walls.nw>>>(dev_walls,
dev_w_force_partial,
blocksPerGrid);
t@@ -651,7 +645,7 @@ __host__ void DEM::startTime()
// Write binary output file
time.step_count += 1;
- sprintf(file,"output/%s.output%d.bin", inputbin, time.step_count);
+ sprintf(file,"output/%s.output%d.bin", sid, time.step_count);
writebin(file);
t@@ -686,7 +680,7 @@ __host__ void DEM::startTime()
}
// Update status.dat at the interval of filetime
- sprintf(file,"output/%s.status.dat", inputbin);
+ sprintf(file,"output/%s.status.dat", sid);
fp = fopen(file, "w");
fprintf(fp,"%2.4e %2.4e %d\n",
time.current,
(DIR) diff --git a/src/integration.cuh b/src/integration.cuh
t@@ -15,7 +15,7 @@ __global__ void integrate(Float4* dev_x_sorted, Float4* dev_vel_sorted, // Input
{
unsigned int idx = threadIdx.x + blockIdx.x * blockDim.x; // Thread id
- if (idx < devC_params.np) { // Condition prevents block size error
+ if (idx < devC_np) { // Condition prevents block size error
// Copy data to temporary arrays to avoid any potential read-after-write,
// write-after-read, or write-after-write hazards.
t@@ -35,7 +35,7 @@ __global__ void integrate(Float4* dev_x_sorted, Float4* dev_vel_sorted, // Input
Float radius = x.w;
// Coherent read from constant memory to registers
- Float dt = devC_params.dt;
+ Float dt = devC_dt;
Float3 origo = MAKE_FLOAT3(devC_grid.origo[0], devC_grid.origo[1], devC_grid.origo[2]);
Float3 L = MAKE_FLOAT3(devC_grid.L[0], devC_grid.L[1], devC_grid.L[2]);
Float rho = devC_params.rho;
t@@ -106,7 +106,7 @@ __global__ void integrate(Float4* dev_x_sorted, Float4* dev_vel_sorted, // Input
x.w += vel.x * dt + (acc.x * dt*dt)/2.0f;
// Move particle across boundary if it is periodic
- if (devC_params.periodic == 1) {
+ if (devC_grid.periodic == 1) {
if (x.x < origo.x)
x.x += L.x;
if (x.x > L.x)
t@@ -115,7 +115,7 @@ __global__ void integrate(Float4* dev_x_sorted, Float4* dev_vel_sorted, // Input
x.y += L.y;
if (x.y > L.y)
x.y -= L.y;
- } else if (devC_params.periodic == 2) {
+ } else if (devC_grid.periodic == 2) {
if (x.x < origo.x)
x.x += L.x;
if (x.x > L.x)
t@@ -142,7 +142,7 @@ __global__ void summation(Float* in, Float *out)
unsigned int cacheIdx = threadIdx.x;
Float temp = 0.0f;
- while (idx < devC_params.np) {
+ while (idx < devC_np) {
temp += in[idx];
idx += blockDim.x * gridDim.x;
}
t@@ -168,20 +168,19 @@ __global__ void summation(Float* in, Float *out)
}
// Update wall positions
-__global__ void integrateWalls(Float4* dev_w_nx,
- Float4* dev_w_mvfd,
+__global__ void integrateWalls(Walls* dev_walls,
Float* dev_w_force_partial,
unsigned int blocksPerGrid)
{
unsigned int idx = threadIdx.x + blockIdx.x * blockDim.x; // Thread id
- if (idx < devC_params.nw) { // Condition prevents block size error
+ if (idx < devC_nw) { // Condition prevents block size error
// Copy data to temporary arrays to avoid any potential read-after-write,
// write-after-read, or write-after-write hazards.
- Float4 w_nx = dev_w_nx[idx];
- Float4 w_mvfd = dev_w_mvfd[idx];
- int wmode = devC_params.wmode[idx]; // Wall BC, 0: fixed, 1: devs, 2: vel
+ Float4 w_nx = dev_walls->nx[idx];
+ Float4 w_mvfd = dev_walls->mvfd[idx];
+ int wmode = dev_walls->wmode[idx]; // Wall BC, 0: fixed, 1: devs, 2: vel
Float acc;
if (wmode == 0) // Wall fixed: do nothing
t@@ -193,7 +192,7 @@ __global__ void integrateWalls(Float4* dev_w_nx,
w_mvfd.z += dev_w_force_partial[i];
}
- Float dt = devC_params.dt;
+ Float dt = devC_dt;
// Normal load = Deviatoric stress times wall surface area,
// directed downwards.
t@@ -216,8 +215,8 @@ __global__ void integrateWalls(Float4* dev_w_nx,
w_nx.w += w_mvfd.y * dt + (acc * dt*dt)/2.0f;
// Store data in global memory
- dev_w_nx[idx] = w_nx;
- dev_w_mvfd[idx] = w_mvfd;
+ dev_walls->nx[idx] = w_nx;
+ dev_walls->mvfd[idx] = w_mvfd;
}
} // End of integrateWalls(...)
(DIR) diff --git a/src/sorting.cuh b/src/sorting.cuh
t@@ -25,7 +25,7 @@ __global__ void calcParticleCellID(unsigned int* dev_gridParticleCellID,
//unsigned int idx = threadIdx.x + blockIdx.x * blockDim.x; // Thread id
unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x;
- if (idx < devC_params.np) { // Condition prevents block size error
+ if (idx < devC_np) { // Condition prevents block size error
//volatile Float4 x = dev_x[idx]; // Ensure coalesced read
Float4 x = dev_x[idx]; // Ensure coalesced read
t@@ -69,7 +69,7 @@ __global__ void reorderArrays(unsigned int* dev_cellStart,
unsigned int cellID;
// Read cellID data and store it in shared memory (shared_data)
- if (idx < devC_params.np) { // Condition prevents block size error
+ if (idx < devC_np) { // Condition prevents block size error
cellID = dev_gridParticleCellID[idx];
// Load hash data into shared memory, allowing access to neighbor particle cellID values
t@@ -86,7 +86,7 @@ __global__ void reorderArrays(unsigned int* dev_cellStart,
__syncthreads();
// Find lowest and highest particle index in each cell
- if (idx < devC_params.np) { // Condition prevents block size error
+ if (idx < devC_np) { // Condition prevents block size error
// If this particle has a different cell index to the previous particle, it's the first
// particle in the cell -> Store the index of this particle in the cell.
// The previous particle must be the last particle in the previous cell.
t@@ -97,7 +97,7 @@ __global__ void reorderArrays(unsigned int* dev_cellStart,
}
// Check wether the thread is the last one
- if (idx == (devC_params.np - 1))
+ if (idx == (devC_np - 1))
dev_cellEnd[cellID] = idx + 1;
(DIR) diff --git a/src/sphere.cpp b/src/sphere.cpp
t@@ -1,6 +1,7 @@
#include <iostream>
#include <cstdio>
#include <cstdlib>
+#include <cstring>
#include "typedefs.h"
#include "datatypes.h"
t@@ -9,7 +10,7 @@
// Constructor: Reads an input binary, and optionally checks
// and reports the values
-DEM::DEM(const char *inputbin,
+DEM::DEM(char *inputbin,
const int verbosity,
const int checkVals)
: verbose(verbosity)
t@@ -17,6 +18,19 @@ DEM::DEM(const char *inputbin,
using std::cout;
using std::cerr;
+ // Get basename from input file
+ char *name = inputbin;
+ char *sid = name;
+ while (*name) {
+ if (*name++ == '/') {
+ sid = name;
+ }
+ } // sid is now everything after the last '/' char
+ char *lastdot = strrchr(sid, '.');
+ if (lastdot != NULL)
+ *lastdot = '\0'; // Put in string-end char. at last dot
+
+
// Initialize CUDA
initializeGPU();
(DIR) diff --git a/src/sphere.h b/src/sphere.h
t@@ -10,6 +10,9 @@ class DEM {
// Values and functions only accessible from the class internally
private:
+ // Simulation ID
+ char *sid;
+
// Output level
int verbose;
t@@ -70,7 +73,7 @@ class DEM {
public:
// Constructor, some parameters with default values
- DEM(const char *inputbin,
+ DEM(char *inputbin,
const int verbosity = 1,
const int checkVals = 1);