tNow with correct sizeof() parameter - sphere - GPU-based 3D discrete element method algorithm with optional fluid coupling
(HTM) git clone git://src.adamsgaard.dk/sphere
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) LICENSE
---
(DIR) commit 80378c0db50ee07b6f13d09b0d1fcd3eb1b9e796
(DIR) parent 3ec084a03251a13fe4cc09d8f58426174f88e96b
(HTM) Author: Anders Damsgaard <adc@geo.au.dk>
Date: Mon, 22 Oct 2012 20:41:13 +0200
Now with correct sizeof() parameter
Diffstat:
M src/Makefile | 2 +-
M src/datatypes.h | 4 ++--
M src/device.cu | 278 ++++++++++++++++++++++---------
M src/main.cpp | 4 ++--
4 files changed, 207 insertions(+), 81 deletions(-)
---
(DIR) diff --git a/src/Makefile b/src/Makefile
t@@ -1,5 +1,5 @@
# Cuda paths
-CUDA_INSTALL_PATH=/usr/local/bin
+CUDA_INSTALL_PATH=/usr/local/cuda
#CUDA_BIN=$(CUDA_INSTALL_PATH)/bin
# Define the editor and optional arguments
(DIR) diff --git a/src/datatypes.h b/src/datatypes.h
t@@ -167,8 +167,8 @@ void gpuMain(Float4* host_x,
Float4* host_torque,
Float4* host_angpos,
uint4* host_bonds,
- Particles* p, Grid* grid,
- Time* time, Params* params,
+ Particles p, Grid grid,
+ Time time, Params params,
Float4* host_w_nx,
Float4* host_w_mvfd,
const char* cwd,
(DIR) diff --git a/src/device.cu b/src/device.cu
t@@ -17,6 +17,7 @@
#include "contactsearch.cuh"
#include "integration.cuh"
+#include "cuPrintf.cu"
// Wrapper function for initializing the CUDA components.
// Called from main.cpp
t@@ -83,6 +84,129 @@ __host__ void stopTimer(cudaEvent_t *kernel_tic,
*sum += *kernel_elapsed;
}
+// Check values of parameters in constant memory
+__global__ void checkConstantValues(int* dev_equal,
+ Grid* dev_grid,
+ Params* dev_params)
+{
+
+ // Values ok (0)
+ *dev_equal = 0;
+
+ cuPrintf("dev_grid.nd = %u\n", dev_grid->nd);
+ cuPrintf("devC_grid.nd = %u\n", devC_grid.nd);
+ cuPrintf("dev_grid.num.x = %u\n", dev_grid->num[0]);
+ cuPrintf("devC_grid.num.x = %u\n", devC_grid.num[1]);
+ cuPrintf("dev_params.np = %u\n", dev_params->np);
+ cuPrintf("devC_params.np = %u\n", devC_params.np);
+ cuPrintf("dev_params.mu_s = %f\n", dev_params->mu_s);
+ cuPrintf("devC_params.mu_s = %f\n", devC_params.mu_s);
+
+ if (dev_grid->nd != 3) {
+ *dev_equal = 3;
+ return;
+ } else if (devC_grid.nd != 3) {
+ *dev_equal = 4;
+ return;
+ } else if (devC_grid.num[0] != 4) {
+ *dev_equal = 6;
+ return;
+ } else if (dev_grid->num[0] != 4) {
+ *dev_equal = 5;
+ return;
+ }
+
+
+ // Compare values between global- and constant
+ // memory structures
+ if (dev_grid->nd != devC_grid.nd ||
+ dev_grid->origo[0] != devC_grid.origo[0] ||
+ dev_grid->origo[1] != devC_grid.origo[1] ||
+ dev_grid->origo[2] != devC_grid.origo[2] ||
+ dev_grid->L[0] != devC_grid.L[0] ||
+ dev_grid->L[1] != devC_grid.L[1] ||
+ dev_grid->L[2] != devC_grid.L[2] ||
+ dev_grid->num[0] != devC_grid.num[0] ||
+ dev_grid->num[1] != devC_grid.num[1] ||
+ dev_grid->num[2] != devC_grid.num[2])
+ *dev_equal = 1; // Not ok
+
+ else if (dev_params->global != devC_params.global ||
+ dev_params->g[0] != devC_params.g[0] ||
+ dev_params->g[1] != devC_params.g[1] ||
+ dev_params->g[2] != devC_params.g[2] ||
+ dev_params->dt != devC_params.dt ||
+ dev_params->np != devC_params.np ||
+ dev_params->nw != devC_params.nw ||
+ dev_params->wmode[0] != devC_params.wmode[0] ||
+ dev_params->k_n != devC_params.k_n ||
+ dev_params->k_t != devC_params.k_t ||
+ dev_params->k_r != devC_params.k_r ||
+ dev_params->gamma_n != devC_params.gamma_n ||
+ dev_params->gamma_t != devC_params.gamma_t ||
+ dev_params->gamma_r != devC_params.gamma_r ||
+ dev_params->gamma_wn != devC_params.gamma_wn ||
+ dev_params->gamma_wt != devC_params.gamma_wt ||
+ dev_params->gamma_wr != devC_params.gamma_wr ||
+ dev_params->mu_s != devC_params.mu_s ||
+ dev_params->mu_d != devC_params.mu_d ||
+ dev_params->mu_r != devC_params.mu_r ||
+ dev_params->rho != devC_params.rho ||
+ dev_params->kappa != devC_params.kappa ||
+ dev_params->db != devC_params.db ||
+ dev_params->V_b != devC_params.V_b ||
+ dev_params->periodic != devC_params.periodic ||
+ dev_params->shearmodel != devC_params.shearmodel)
+ *dev_equal = 2; // Not ok
+
+}
+
+
+// Copy the constant data components to device memory,
+// and check whether the values correspond to the
+// values in constant memory.
+__host__ void checkConstantMemory(Grid* grid, Params* params)
+{
+ cudaPrintfInit();
+
+ // Allocate space in global device memory
+ Grid* dev_grid;
+ Params* dev_params;
+ cudaMalloc((void**)&dev_grid, sizeof(Grid));
+ cudaMalloc((void**)&dev_params, sizeof(Params));
+
+ // Copy structure data from host to global device memory
+ cudaMemcpy(dev_grid, grid, sizeof(Grid), cudaMemcpyHostToDevice);
+ cudaMemcpy(dev_params, params, sizeof(Params), cudaMemcpyHostToDevice);
+
+ // Compare values between global and constant memory
+ // structures on the device.
+ int* equal = new int; // The values are equal = 0, if not = 1
+ *equal = 0;
+ int* dev_equal;
+ cudaMalloc((void**)&dev_equal, sizeof(int));
+ checkConstantValues<<<1,1>>>(dev_equal, dev_grid, dev_params);
+ checkForCudaErrors("After constant memory check");
+
+ // Copy result to host
+ cudaMemcpy(equal, dev_equal, sizeof(int), cudaMemcpyDeviceToHost);
+
+ // Free global device memory
+ cudaFree(dev_grid);
+ cudaFree(dev_params);
+ cudaFree(dev_equal);
+
+ cudaPrintfDisplay(stdout, true);
+
+ // Are the values equal?
+ if (*equal != 0) {
+ std::cerr << "Error! The values in constant memory do not "
+ << "seem to be correct (" << *equal << ").\n";
+ exit(1);
+ } else {
+ std::cout << " Constant values ok (" << *equal << ").\n";
+ }
+}
// Copy selected constant components to constant device memory.
//extern "C"
__host__ void transferToConstantMemory(Particles* p,
t@@ -105,7 +229,7 @@ __host__ void transferToConstantMemory(Particles* p,
//cudaMemcpyToSymbol("devC_nw", ¶ms->nw, sizeof(unsigned int));
//cudaMemcpyToSymbol("devC_periodic", ¶ms->periodic, sizeof(int));
- cudaMemcpyToSymbol(devC_grid, grid, sizeof(grid));
+ cudaMemcpyToSymbol(devC_grid, grid, sizeof(Grid));
if (params->global == 1) {
// If the physical properties of the particles are global (params.global == true),
t@@ -122,7 +246,7 @@ __host__ void transferToConstantMemory(Particles* p,
params->mu_d = p->mu_d[0];
params->mu_r = p->mu_r[0];
params->rho = p->rho[0];
- cudaMemcpyToSymbol(devC_params, params, sizeof(params));
+ cudaMemcpyToSymbol(devC_params, params, sizeof(Params));
//cudaMemcpyToSymbol("devC_k_n", ¶ms->k_n, sizeof(Float));
//cudaMemcpyToSymbol("devC_k_t", ¶ms->k_t, sizeof(Float));
//cudaMemcpyToSymbol("devC_k_r", ¶ms->k_r, sizeof(Float));
t@@ -153,6 +277,8 @@ __host__ void transferToConstantMemory(Particles* p,
}
checkForCudaErrors("After transferring to device constant memory");
+ checkConstantMemory(grid, params);
+
cout << "Done\n";
}
t@@ -167,10 +293,10 @@ __host__ void gpuMain(Float4* host_x,
Float4* host_torque,
Float4* host_angpos,
uint4* host_bonds,
- Particles* p,
- Grid* grid,
- Time* time,
- Params* params,
+ Particles p,
+ Grid grid,
+ Time time,
+ Params params,
Float4* host_w_nx,
Float4* host_w_mvfd,
const char* cwd,
t@@ -180,7 +306,7 @@ __host__ void gpuMain(Float4* host_x,
using std::cout; // Namespace directive
// Copy data to constant global device memory
- transferToConstantMemory(p, grid, time, params);
+ transferToConstantMemory(&p, &grid, &time, ¶ms);
// Declare pointers for particle variables on the device
Float4* dev_x; // Particle position
t@@ -221,18 +347,18 @@ __host__ void gpuMain(Float4* host_x,
// Particle contact bookkeeping
unsigned int* dev_contacts;
- unsigned int* host_contacts = new unsigned int[p->np*NC];
+ unsigned int* host_contacts = new unsigned int[p.np*NC];
// Particle pair distance correction across periodic boundaries
Float4* dev_distmod;
- Float4* host_distmod = new Float4[p->np*NC];
+ Float4* host_distmod = new Float4[p.np*NC];
// x,y,z contains the interparticle vector, corrected if contact
// is across a periodic boundary.
Float4* dev_delta_t; // Accumulated shear distance of contact
- Float4* host_delta_t = new Float4[p->np*NC];
+ Float4* host_delta_t = new Float4[p.np*NC];
// Particle memory size
- unsigned int memSizeF = sizeof(Float) * p->np;
- unsigned int memSizeF4 = sizeof(Float4) * p->np;
+ unsigned int memSizeF = sizeof(Float) * p.np;
+ unsigned int memSizeF4 = sizeof(Float4) * p.np;
// Allocate device memory for particle variables,
// tie to previously declared pointers
t@@ -257,24 +383,24 @@ __host__ void gpuMain(Float4* host_x,
cudaMalloc((void**)&dev_es, memSizeF);
cudaMalloc((void**)&dev_ev, memSizeF);
cudaMalloc((void**)&dev_p, memSizeF);
- //cudaMalloc((void**)&dev_bonds, sizeof(uint4) * p->np);
- //cudaMalloc((void**)&dev_bonds_sorted, sizeof(uint4) * p->np);
+ //cudaMalloc((void**)&dev_bonds, sizeof(uint4) * p.np);
+ //cudaMalloc((void**)&dev_bonds_sorted, sizeof(uint4) * p.np);
// Cell-related arrays
- cudaMalloc((void**)&dev_gridParticleCellID, sizeof(unsigned int)*p->np);
- cudaMalloc((void**)&dev_gridParticleIndex, sizeof(unsigned int)*p->np);
- cudaMalloc((void**)&dev_cellStart, sizeof(unsigned int)*grid->num[0]*grid->num[1]*grid->num[2]);
- cudaMalloc((void**)&dev_cellEnd, sizeof(unsigned int)*grid->num[0]*grid->num[1]*grid->num[2]);
+ cudaMalloc((void**)&dev_gridParticleCellID, sizeof(unsigned int)*p.np);
+ cudaMalloc((void**)&dev_gridParticleIndex, sizeof(unsigned int)*p.np);
+ cudaMalloc((void**)&dev_cellStart, sizeof(unsigned int)*grid.num[0]*grid.num[1]*grid.num[2]);
+ cudaMalloc((void**)&dev_cellEnd, sizeof(unsigned int)*grid.num[0]*grid.num[1]*grid.num[2]);
// Particle contact bookkeeping arrays
- cudaMalloc((void**)&dev_contacts, sizeof(unsigned int)*p->np*NC); // Max NC contacts per particle
- cudaMalloc((void**)&dev_distmod, sizeof(Float4)*p->np*NC);
- cudaMalloc((void**)&dev_delta_t, sizeof(Float4)*p->np*NC);
+ cudaMalloc((void**)&dev_contacts, sizeof(unsigned int)*p.np*NC); // Max NC contacts per particle
+ cudaMalloc((void**)&dev_distmod, sizeof(Float4)*p.np*NC);
+ cudaMalloc((void**)&dev_delta_t, sizeof(Float4)*p.np*NC);
// Wall arrays
- cudaMalloc((void**)&dev_w_nx, sizeof(Float)*params->nw*4);
- cudaMalloc((void**)&dev_w_mvfd, sizeof(Float)*params->nw*4);
- cudaMalloc((void**)&dev_w_force, sizeof(Float)*params->nw*p->np);
+ cudaMalloc((void**)&dev_w_nx, sizeof(Float)*params.nw*4);
+ cudaMalloc((void**)&dev_w_mvfd, sizeof(Float)*params.nw*4);
+ cudaMalloc((void**)&dev_w_force, sizeof(Float)*params.nw*p.np);
checkForCudaErrors("Post device memory allocation");
cout << "Done\n";
t@@ -291,32 +417,32 @@ __host__ void gpuMain(Float4* host_x,
cudaMemcpy(dev_force, host_force, memSizeF4, cudaMemcpyHostToDevice);
cudaMemcpy(dev_torque, host_torque, memSizeF4, cudaMemcpyHostToDevice);
cudaMemcpy(dev_angpos, host_angpos, memSizeF4, cudaMemcpyHostToDevice);
- //cudaMemcpy(dev_bonds, host_bonds, sizeof(uint4) * p->np, cudaMemcpyHostToDevice);
- cudaMemcpy(dev_radius, p->radius, memSizeF, cudaMemcpyHostToDevice);
- cudaMemcpy(dev_es_dot, p->es_dot, memSizeF, cudaMemcpyHostToDevice);
- cudaMemcpy(dev_ev_dot, p->ev_dot, memSizeF, cudaMemcpyHostToDevice);
- cudaMemcpy(dev_es, p->es, memSizeF, cudaMemcpyHostToDevice);
- cudaMemcpy(dev_ev, p->ev, memSizeF, cudaMemcpyHostToDevice);
- cudaMemcpy(dev_p, p->p, memSizeF, cudaMemcpyHostToDevice);
+ //cudaMemcpy(dev_bonds, host_bonds, sizeof(uint4) * p.np, cudaMemcpyHostToDevice);
+ cudaMemcpy(dev_radius, p.radius, memSizeF, cudaMemcpyHostToDevice);
+ cudaMemcpy(dev_es_dot, p.es_dot, memSizeF, cudaMemcpyHostToDevice);
+ cudaMemcpy(dev_ev_dot, p.ev_dot, memSizeF, cudaMemcpyHostToDevice);
+ cudaMemcpy(dev_es, p.es, memSizeF, cudaMemcpyHostToDevice);
+ cudaMemcpy(dev_ev, p.ev, memSizeF, cudaMemcpyHostToDevice);
+ cudaMemcpy(dev_p, p.p, memSizeF, cudaMemcpyHostToDevice);
// Wall data (wall mass and number in constant memory)
- cudaMemcpy(dev_w_nx, host_w_nx, sizeof(Float)*params->nw*4, cudaMemcpyHostToDevice);
- cudaMemcpy(dev_w_mvfd, host_w_mvfd, sizeof(Float)*params->nw*4, cudaMemcpyHostToDevice);
+ cudaMemcpy(dev_w_nx, host_w_nx, sizeof(Float)*params.nw*4, cudaMemcpyHostToDevice);
+ cudaMemcpy(dev_w_mvfd, host_w_mvfd, sizeof(Float)*params.nw*4, cudaMemcpyHostToDevice);
// Initialize contacts lists to p.np
- unsigned int* npu = new unsigned int[p->np*NC];
- for (unsigned int i=0; i<(p->np*NC); ++i)
- npu[i] = p->np;
- cudaMemcpy(dev_contacts, npu, sizeof(unsigned int)*p->np*NC, cudaMemcpyHostToDevice);
+ unsigned int* npu = new unsigned int[p.np*NC];
+ for (unsigned int i=0; i<(p.np*NC); ++i)
+ npu[i] = p.np;
+ cudaMemcpy(dev_contacts, npu, sizeof(unsigned int)*p.np*NC, cudaMemcpyHostToDevice);
delete[] npu;
// Create array of 0.0 values on the host and transfer these to the distance
// modifier and shear displacement arrays
- Float4* zerosF4 = new Float4[p->np*NC];
- for (unsigned int i=0; i<(p->np*NC); ++i)
+ Float4* zerosF4 = new Float4[p.np*NC];
+ for (unsigned int i=0; i<(p.np*NC); ++i)
zerosF4[i] = MAKE_FLOAT4(0.0f, 0.0f, 0.0f, 0.0f);
- cudaMemcpy(dev_distmod, zerosF4, sizeof(Float4)*p->np*NC, cudaMemcpyHostToDevice);
- cudaMemcpy(dev_delta_t, zerosF4, sizeof(Float4)*p->np*NC, cudaMemcpyHostToDevice);
+ cudaMemcpy(dev_distmod, zerosF4, sizeof(Float4)*p.np*NC, cudaMemcpyHostToDevice);
+ cudaMemcpy(dev_delta_t, zerosF4, sizeof(Float4)*p.np*NC, cudaMemcpyHostToDevice);
delete[] zerosF4;
checkForCudaErrors("Post memcopy");
t@@ -339,7 +465,7 @@ __host__ void gpuMain(Float4* host_x,
// GPU workload configuration
unsigned int threadsPerBlock = 256;
// Create enough blocks to accomodate the particles
- unsigned int blocksPerGrid = iDivUp(p->np, threadsPerBlock);
+ unsigned int blocksPerGrid = iDivUp(p.np, threadsPerBlock);
dim3 dimGrid(blocksPerGrid, 1, 1); // Blocks arranged in 1D grid
dim3 dimBlock(threadsPerBlock, 1, 1); // Threads arranged in 1D block
// Shared memory per block
t@@ -363,18 +489,18 @@ __host__ void gpuMain(Float4* host_x,
sprintf(file,"%s/output/%s.status.dat", cwd, inputbin);
fp = fopen(file, "w");
fprintf(fp,"%2.4e %2.4e %d\n",
- time->current,
- 100.0*time->current/time->total,
- time->step_count);
+ time.current,
+ 100.0*time.current/time.total,
+ time.step_count);
fclose(fp);
// Write first output data file: output0.bin, thus testing writing of bin files
sprintf(file,"%s/output/%s.output0.bin", cwd, inputbin);
- if (fwritebin(file, p, host_x, host_vel,
+ if (fwritebin(file, &p, host_x, host_vel,
host_angvel, host_force,
host_torque, host_angpos,
host_bonds,
- grid, time, params,
+ &grid, &time, ¶ms,
host_w_nx, host_w_mvfd) != 0) {
std::cerr << "\n Problem during fwritebin \n";
exit(EXIT_FAILURE);
t@@ -412,11 +538,11 @@ __host__ void gpuMain(Float4* host_x,
cudaEventCreate(&kernel_toc);
}
- cout << " Current simulation time: " << time->current << " s.";
+ cout << " Current simulation time: " << time.current << " s.";
// MAIN CALCULATION TIME LOOP
- while (time->current <= time->total) {
+ while (time.current <= time.total) {
// Increment iteration counter
++iter;
t@@ -449,7 +575,7 @@ __host__ void gpuMain(Float4* host_x,
if (PROFILING == 1)
startTimer(&kernel_tic);
thrust::sort_by_key(thrust::device_ptr<uint>(dev_gridParticleCellID),
- thrust::device_ptr<uint>(dev_gridParticleCellID + p->np),
+ thrust::device_ptr<uint>(dev_gridParticleCellID + p.np),
thrust::device_ptr<uint>(dev_gridParticleIndex));
cudaThreadSynchronize(); // Needed? Does thrust synchronize threads implicitly?
if (PROFILING == 1)
t@@ -461,7 +587,7 @@ __host__ void gpuMain(Float4* host_x,
// specified with pointer value 0xffffffff, which for a 32 bit unsigned int
// is 4294967295.
cudaMemset(dev_cellStart, 0xffffffff,
- grid->num[0]*grid->num[1]*grid->num[2]*sizeof(unsigned int));
+ grid.num[0]*grid.num[1]*grid.num[2]*sizeof(unsigned int));
cudaThreadSynchronize();
checkForCudaErrors("Post cudaMemset");
t@@ -491,7 +617,7 @@ __host__ void gpuMain(Float4* host_x,
// The contact search in topology() is only necessary for determining
// the accumulated shear distance needed in the linear elastic
// and nonlinear contact force model
- if (params->shearmodel == 2 || params->shearmodel == 3) {
+ if (params.shearmodel == 2 || params.shearmodel == 3) {
// For each particle: Search contacts in neighbor cells
if (PROFILING == 1)
startTimer(&kernel_tic);
t@@ -571,7 +697,7 @@ __host__ void gpuMain(Float4* host_x,
// Update wall kinematics
if (PROFILING == 1)
startTimer(&kernel_tic);
- integrateWalls<<< 1, params->nw>>>(dev_w_nx,
+ integrateWalls<<< 1, params.nw>>>(dev_w_nx,
dev_w_mvfd,
dev_w_force_partial,
blocksPerGrid);
t@@ -584,17 +710,17 @@ __host__ void gpuMain(Float4* host_x,
// Update timers and counters
- time->current += time->dt;
- filetimeclock += time->dt;
+ time.current += time.dt;
+ filetimeclock += time.dt;
// Report time to console
cout << "\r Current simulation time: "
- << time->current << " s. ";// << std::flush;
+ << time.current << " s. ";// << std::flush;
// Produce output binary if the time interval
// between output files has been reached
- if (filetimeclock > time->file_dt) {
+ if (filetimeclock > time.file_dt) {
// Pause the CPU thread until all CUDA calls previously issued are completed
cudaThreadSynchronize();
t@@ -610,23 +736,23 @@ __host__ void gpuMain(Float4* host_x,
cudaMemcpy(host_force, dev_force, memSizeF4, cudaMemcpyDeviceToHost);
cudaMemcpy(host_torque, dev_torque, memSizeF4, cudaMemcpyDeviceToHost);
cudaMemcpy(host_angpos, dev_angpos, memSizeF4, cudaMemcpyDeviceToHost);
- //cudaMemcpy(host_bonds, dev_bonds, sizeof(uint4) * p->np, cudaMemcpyDeviceToHost);
- cudaMemcpy(p->es_dot, dev_es_dot, memSizeF, cudaMemcpyDeviceToHost);
- cudaMemcpy(p->ev_dot, dev_ev_dot, memSizeF, cudaMemcpyDeviceToHost);
- cudaMemcpy(p->es, dev_es, memSizeF, cudaMemcpyDeviceToHost);
- cudaMemcpy(p->ev, dev_ev, memSizeF, cudaMemcpyDeviceToHost);
- cudaMemcpy(p->p, dev_p, memSizeF, cudaMemcpyDeviceToHost);
+ //cudaMemcpy(host_bonds, dev_bonds, sizeof(uint4) * p.np, cudaMemcpyDeviceToHost);
+ cudaMemcpy(p.es_dot, dev_es_dot, memSizeF, cudaMemcpyDeviceToHost);
+ cudaMemcpy(p.ev_dot, dev_ev_dot, memSizeF, cudaMemcpyDeviceToHost);
+ cudaMemcpy(p.es, dev_es, memSizeF, cudaMemcpyDeviceToHost);
+ cudaMemcpy(p.ev, dev_ev, memSizeF, cudaMemcpyDeviceToHost);
+ cudaMemcpy(p.p, dev_p, memSizeF, cudaMemcpyDeviceToHost);
// Wall data
cudaMemcpy(host_w_nx, dev_w_nx,
- sizeof(Float)*params->nw*4, cudaMemcpyDeviceToHost);
+ sizeof(Float)*params.nw*4, cudaMemcpyDeviceToHost);
cudaMemcpy(host_w_mvfd, dev_w_mvfd,
- sizeof(Float)*params->nw*4, cudaMemcpyDeviceToHost);
+ sizeof(Float)*params.nw*4, cudaMemcpyDeviceToHost);
// Contact information
if (CONTACTINFO == 1) {
- cudaMemcpy(host_contacts, dev_contacts, sizeof(unsigned int)*p->np*NC, cudaMemcpyDeviceToHost);
+ cudaMemcpy(host_contacts, dev_contacts, sizeof(unsigned int)*p.np*NC, cudaMemcpyDeviceToHost);
cudaMemcpy(host_delta_t, dev_delta_t, memSizeF4*NC, cudaMemcpyDeviceToHost);
cudaMemcpy(host_distmod, dev_distmod, memSizeF4*NC, cudaMemcpyDeviceToHost);
}
t@@ -635,14 +761,14 @@ __host__ void gpuMain(Float4* host_x,
cudaThreadSynchronize();
// Write binary output file
- time->step_count += 1;
- sprintf(file,"%s/output/%s.output%d.bin", cwd, inputbin, time->step_count);
+ time.step_count += 1;
+ sprintf(file,"%s/output/%s.output%d.bin", cwd, inputbin, time.step_count);
- if (fwritebin(file, p, host_x, host_vel,
+ if (fwritebin(file, &p, host_x, host_vel,
host_angvel, host_force,
host_torque, host_angpos,
host_bonds,
- grid, time, params,
+ &grid, &time, ¶ms,
host_w_nx, host_w_mvfd) != 0) {
cout << "\n Error during fwritebin() in main loop\n";
exit(EXIT_FAILURE);
t@@ -651,10 +777,10 @@ __host__ void gpuMain(Float4* host_x,
if (CONTACTINFO == 1) {
// Write contact information to stdout
cout << "\n\n---------------------------\n"
- << "t = " << time->current << " s.\n"
+ << "t = " << time.current << " s.\n"
<< "---------------------------\n";
- for (int n = 0; n < p->np; ++n) {
+ for (int n = 0; n < p.np; ++n) {
cout << "\n## Particle " << n << " ##\n";
cout << "- contacts:\n";
t@@ -682,9 +808,9 @@ __host__ void gpuMain(Float4* host_x,
sprintf(file,"%s/output/%s.status.dat", cwd, inputbin);
fp = fopen(file, "w");
fprintf(fp,"%2.4e %2.4e %d\n",
- time->current,
- 100.0*time->current/time->total,
- time->step_count);
+ time.current,
+ 100.0*time.current/time.total,
+ time.step_count);
fclose(fp);
filetimeclock = 0.0;
t@@ -701,7 +827,7 @@ __host__ void gpuMain(Float4* host_x,
cout << "\nSimulation ended. Statistics:\n"
<< " - Last output file number: "
- << time->step_count << "\n"
+ << time.step_count << "\n"
<< " - GPU time spent: "
<< dev_time_spent/1000.0f << " s\n"
<< " - CPU time spent: "
(DIR) diff --git a/src/main.cpp b/src/main.cpp
t@@ -456,8 +456,8 @@ int main(int argc, char *argv[])
host_torque,
host_angpos,
host_bonds,
- &p, &grid,
- &time, ¶ms,
+ p, grid,
+ time, params,
host_w_nx,
host_w_mvfd,
cwd, inputbin);