tfirst tests with Darcy kernels OK, input to python is not - sphere - GPU-based 3D discrete element method algorithm with optional fluid coupling
(HTM) git clone git://src.adamsgaard.dk/sphere
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) LICENSE
---
(DIR) commit 2d5d74563607bc73c6ea0e679445a51bb512a2c4
(DIR) parent 4f3a852283c68ac86643c460215f160f68288e95
(HTM) Author: Anders Damsgaard <anders.damsgaard@geo.au.dk>
Date: Wed, 9 Oct 2013 14:38:04 +0200
first tests with Darcy kernels OK, input to python is not
Diffstat:
M src/device.cu | 36 ++++++++++++++++++++++---------
1 file changed, 26 insertions(+), 10 deletions(-)
---
(DIR) diff --git a/src/device.cu b/src/device.cu
t@@ -897,7 +897,7 @@ __host__ void DEM::startTime()
if (params.nu > 0.0 && darcy == 1) {
#ifdef DARCY_GPU
- /*
+ //*
checkForCudaErrors("Before findPorositiesDev", iter);
// Find cell porosities
if (PROFILING == 1)
t@@ -961,8 +961,11 @@ __host__ void DEM::startTime()
&t_explDarcyStepDev);
checkForCudaErrors("Post explDarcyStepDev", iter);
- // Flop flop
- swapFloatArrays(dev_d_H, dev_d_H_new);
+ // Flip flop
+ Float* tmp = dev_d_H;
+ dev_d_H = dev_d_H_new;
+ dev_d_H_new = tmp;
+
// Find the pressure gradients
if (PROFILING == 1)
t@@ -989,7 +992,7 @@ __host__ void DEM::startTime()
stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
&t_findDarcyVelocitiesDev);
checkForCudaErrors("Post findDarcyVelocitiesDev", iter);
- */
+ //*/
#else
// Copy device data to host memory
t@@ -1182,21 +1185,34 @@ __host__ void DEM::startTime()
// Report time spent on each kernel
if (PROFILING == 1 && verbose == 1) {
- double t_sum = t_calcParticleCellID + t_thrustsort + t_reorderArrays
- + t_topology + t_interact + t_summation + t_integrateWalls;
+ double t_sum = t_calcParticleCellID + t_thrustsort + t_reorderArrays +
+ t_topology + t_interact + t_bondsLinear + t_latticeBoltzmannD3Q19 +
+ t_integrate + t_summation + t_integrateWalls + t_findPorositiesDev +
+ t_findDarcyTransmissivitiesDev + t_setDarcyGhostNodesDev +
+ t_explDarcyStepDev + t_findDarcyGradientsDev +
+ t_findDarcyVelocitiesDev;
+
cout << "\nKernel profiling statistics:\n"
- << " - calcParticleCellID:\t\t" << t_calcParticleCellID/1000.0 << " s"
+ << " - calcParticleCellID:\t\t" << t_calcParticleCellID/1000.0
+ << " s"
<< "\t(" << 100.0*t_calcParticleCellID/t_sum << " %)\n"
<< " - thrustsort:\t\t\t" << t_thrustsort/1000.0 << " s"
<< "\t(" << 100.0*t_thrustsort/t_sum << " %)\n"
<< " - reorderArrays:\t\t" << t_reorderArrays/1000.0 << " s"
- << "\t(" << 100.0*t_reorderArrays/t_sum << " %)\n"
+ << "\t(" << 100.0*t_reorderArrays/t_sum << " %)\n";
+ if (params.contactmodel == 2 || params.contactmodel == 3) {
+ cout
<< " - topology:\t\t\t" << t_topology/1000.0 << " s"
- << "\t(" << 100.0*t_topology/t_sum << " %)\n"
+ << "\t(" << 100.0*t_topology/t_sum << " %)\n";
+ }
+ cout
<< " - interact:\t\t\t" << t_interact/1000.0 << " s"
- << "\t(" << 100.0*t_interact/t_sum << " %)\n"
+ << "\t(" << 100.0*t_interact/t_sum << " %)\n";
+ if (params.nb0 > 0) {
+ cout
<< " - bondsLinear:\t\t" << t_bondsLinear/1000.0 << " s"
<< "\t(" << 100.0*t_bondsLinear/t_sum << " %)\n";
+ }
if (params.nu > 0.0 && darcy == 0) {
cout
<< " - latticeBoltzmann:\t\t" << t_latticeBoltzmannD3Q19/1000.0 <<