tfirst tests with Darcy kernels OK, input to python is not - sphere - GPU-based 3D discrete element method algorithm with optional fluid coupling
 (HTM) git clone git://src.adamsgaard.dk/sphere
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) LICENSE
       ---
 (DIR) commit 2d5d74563607bc73c6ea0e679445a51bb512a2c4
 (DIR) parent 4f3a852283c68ac86643c460215f160f68288e95
 (HTM) Author: Anders Damsgaard <anders.damsgaard@geo.au.dk>
       Date:   Wed,  9 Oct 2013 14:38:04 +0200
       
       first tests with Darcy kernels OK, input to python is not
       
       Diffstat:
         M src/device.cu                       |      36 ++++++++++++++++++++++---------
       
       1 file changed, 26 insertions(+), 10 deletions(-)
       ---
 (DIR) diff --git a/src/device.cu b/src/device.cu
       t@@ -897,7 +897,7 @@ __host__ void DEM::startTime()
                if (params.nu > 0.0 && darcy == 1) {
        
        #ifdef DARCY_GPU
       -            /*
       +            //*
                    checkForCudaErrors("Before findPorositiesDev", iter);
                    // Find cell porosities
                    if (PROFILING == 1)
       t@@ -961,8 +961,11 @@ __host__ void DEM::startTime()
                                &t_explDarcyStepDev);
                    checkForCudaErrors("Post explDarcyStepDev", iter);
        
       -            // Flop flop
       -            swapFloatArrays(dev_d_H, dev_d_H_new);
       +            // Flip flop
       +            Float* tmp = dev_d_H;
       +            dev_d_H = dev_d_H_new;
       +            dev_d_H_new = tmp;
       +            
        
                    // Find the pressure gradients
                    if (PROFILING == 1)
       t@@ -989,7 +992,7 @@ __host__ void DEM::startTime()
                        stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
                                &t_findDarcyVelocitiesDev);
                    checkForCudaErrors("Post findDarcyVelocitiesDev", iter);
       -            */
       +            //*/
        
        #else
                    // Copy device data to host memory
       t@@ -1182,21 +1185,34 @@ __host__ void DEM::startTime()
        
            // Report time spent on each kernel
            if (PROFILING == 1 && verbose == 1) {
       -        double t_sum = t_calcParticleCellID + t_thrustsort + t_reorderArrays
       -            + t_topology + t_interact + t_summation + t_integrateWalls;
       +        double t_sum = t_calcParticleCellID + t_thrustsort + t_reorderArrays +
       +            t_topology + t_interact + t_bondsLinear + t_latticeBoltzmannD3Q19 +
       +            t_integrate + t_summation + t_integrateWalls + t_findPorositiesDev +
       +            t_findDarcyTransmissivitiesDev + t_setDarcyGhostNodesDev +
       +            t_explDarcyStepDev + t_findDarcyGradientsDev +
       +            t_findDarcyVelocitiesDev;
       +
                cout << "\nKernel profiling statistics:\n"
       -            << "  - calcParticleCellID:\t\t" << t_calcParticleCellID/1000.0 << " s"
       +            << "  - calcParticleCellID:\t\t" << t_calcParticleCellID/1000.0
       +            << " s"
                    << "\t(" << 100.0*t_calcParticleCellID/t_sum << " %)\n"
                    << "  - thrustsort:\t\t\t" << t_thrustsort/1000.0 << " s"
                    << "\t(" << 100.0*t_thrustsort/t_sum << " %)\n"
                    << "  - reorderArrays:\t\t" << t_reorderArrays/1000.0 << " s"
       -            << "\t(" << 100.0*t_reorderArrays/t_sum << " %)\n"
       +            << "\t(" << 100.0*t_reorderArrays/t_sum << " %)\n";
       +        if (params.contactmodel == 2 || params.contactmodel == 3) {
       +            cout
                    << "  - topology:\t\t\t" << t_topology/1000.0 << " s"
       -            << "\t(" << 100.0*t_topology/t_sum << " %)\n"
       +            << "\t(" << 100.0*t_topology/t_sum << " %)\n";
       +        }
       +        cout
                    << "  - interact:\t\t\t" << t_interact/1000.0 << " s"
       -            << "\t(" << 100.0*t_interact/t_sum << " %)\n"
       +            << "\t(" << 100.0*t_interact/t_sum << " %)\n";
       +        if (params.nb0 > 0) {
       +            cout
                    << "  - bondsLinear:\t\t" << t_bondsLinear/1000.0 << " s"
                    << "\t(" << 100.0*t_bondsLinear/t_sum << " %)\n";
       +        }
                if (params.nu > 0.0 && darcy == 0) {
                    cout
                    << "  - latticeBoltzmann:\t\t" << t_latticeBoltzmannD3Q19/1000.0 <<