tExpanded profiling information - sphere - GPU-based 3D discrete element method algorithm with optional fluid coupling
 (HTM) git clone git://src.adamsgaard.dk/sphere
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) LICENSE
       ---
 (DIR) commit cc37e07feeb2da74ccd3d284296f2d75a9636500
 (DIR) parent c8fda5738803686667567217cc927beb0c39a0fe
 (HTM) Author: Anders Damsgaard <adc@geo.au.dk>
       Date:   Tue, 16 Oct 2012 12:26:51 +0200
       
       Expanded profiling information
       
       Diffstat:
         M src/device.cu                       |      26 ++++++++++++++++++--------
       
       1 file changed, 18 insertions(+), 8 deletions(-)
       ---
 (DIR) diff --git a/src/device.cu b/src/device.cu
       t@@ -713,15 +713,25 @@ __host__ void gpuMain(Float4* host_x,
        
          // Report time spent on each kernel
          if (PROFILING == 1) {
       +    double t_sum = t_calcParticleCellID + t_thrustsort + t_reorderArrays
       +                 + t_topology + t_interact + t_summation + t_integrateWalls;
            cout << "\nKernel profiling statistics:\n"
       -         << "  - calcParticleCellID:\t" << t_calcParticleCellID/1000.0 << " s\n"
       -         << "  - thrustsort:\t\t" << t_thrustsort/1000.0 << " s\n"
       -         << "  - reorderArrays:\t" << t_reorderArrays/1000.0 << " s\n"
       -         << "  - topology:\t\t" << t_topology/1000.0 << " s\n"
       -         << "  - interact:\t\t" << t_interact/1000.0 << " s\n"
       -         << "  - integrate:\t\t" << t_integrate/1000.0 << " s\n"
       -         << "  - summation:\t\t" << t_summation/1000.0 << " s\n"
       -         << "  - integrateWalls:\t" << t_integrateWalls/1000.0 << " s\n";
       +         << "  - calcParticleCellID:\t" << t_calcParticleCellID/1000.0 << " s"
       +         << " (" << 100.0*t_calcParticleCellID/t_sum << " %)\n"
       +         << "  - thrustsort:\t\t" << t_thrustsort/1000.0 << " s"
       +         << " (" << 100.0*t_thrustsort/t_sum << " %)\n"
       +         << "  - reorderArrays:\t" << t_reorderArrays/1000.0 << " s"
       +         << " (" << 100.0*t_reorderArrays/t_sum << " %)\n"
       +         << "  - topology:\t\t" << t_topology/1000.0 << " s"
       +         << " (" << 100.0*t_topology/t_sum << " %)\n"
       +         << "  - interact:\t\t" << t_interact/1000.0 << " s"
       +         << " (" << 100.0*t_interact/t_sum << " %)\n"
       +         << "  - integrate:\t\t" << t_integrate/1000.0 << " s"
       +         << " (" << 100.0*t_integrate/t_sum << " %)\n"
       +         << "  - summation:\t\t" << t_summation/1000.0 << " s"
       +         << " (" << 100.0*t_summation/t_sum << " %)\n"
       +         << "  - integrateWalls:\t" << t_integrateWalls/1000.0 << " s"
       +         << " (" << 100.0*t_integrateWalls/t_sum << " %)\n";
          }