tpass new BC flags to cuda kernels - sphere - GPU-based 3D discrete element method algorithm with optional fluid coupling
 (HTM) git clone git://src.adamsgaard.dk/sphere
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) LICENSE
       ---
 (DIR) commit 6b42a6886653c7e1e620ca470ffa953820ca6de0
 (DIR) parent 833df6a19e3c16b0bf1e1cbbe760841b03ec3c54
 (HTM) Author: Anders Damsgaard Christensen <adc@geo.au.dk>
       Date:   Thu, 11 Aug 2016 11:50:52 -0700
       
       pass new BC flags to cuda kernels
       
       Diffstat:
         M src/darcy.cuh                       |      12 ++++++++++++
         M src/device.cu                       |      38 ++++++++++++++++++++++++++-----
       
       2 files changed, 44 insertions(+), 6 deletions(-)
       ---
 (DIR) diff --git a/src/darcy.cuh b/src/darcy.cuh
       t@@ -204,6 +204,10 @@ __global__ void setDarcyZeros(T* __restrict__ dev_scalarfield)
            template<typename T>
        __global__ void setDarcyGhostNodes(
                T* __restrict__ dev_scalarfield,
       +        const int bc_xn,
       +        const int bc_xp,
       +        const int bc_yn,
       +        const int bc_yp,
                const int bc_bot,
                const int bc_top)
        {
       t@@ -1557,6 +1561,10 @@ __global__ void firstDarcySolution(
                const Float3* __restrict__ dev_darcy_grad_k,  // in
                const Float beta_f,                           // in
                const Float mu,                               // in
       +        const int bc_xn,                              // in
       +        const int bc_xp,                              // in
       +        const int bc_yn,                              // in
       +        const int bc_yp,                              // in
                const int bc_bot,                             // in
                const int bc_top,                             // in
                const unsigned int ndem,                      // in
       t@@ -1731,6 +1739,10 @@ __global__ void updateDarcySolution(
                const Float3* __restrict__ dev_darcy_grad_k,  // in
                const Float beta_f,                           // in
                const Float mu,                               // in
       +        const int bc_xn,                              // in
       +        const int bc_xp,                              // in
       +        const int bc_yn,                              // in
       +        const int bc_yp,                              // in
                const int bc_bot,                             // in
                const int bc_top,                             // in
                const unsigned int ndem,                      // in
 (DIR) diff --git a/src/device.cu b/src/device.cu
       t@@ -1854,7 +1854,10 @@ __host__ void DEM::startTime()
                            if (PROFILING == 1)
                                startTimer(&kernel_tic);
                            setDarcyGhostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
       -                            dev_darcy_p, darcy.bc_bot, darcy.bc_top);
       +                            dev_darcy_p,
       +                            darcy.bc_xn, darcy.bc_xp,
       +                            darcy.bc_yn, darcy.bc_yp,
       +                            darcy.bc_bot, darcy.bc_top);
                            cudaThreadSynchronize();
                            if (PROFILING == 1)
                                stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
       t@@ -1872,7 +1875,10 @@ __host__ void DEM::startTime()
                                    iter);
        
                            setDarcyGhostNodes<Float3><<<dimGridFluid, dimBlockFluid>>>(
       -                            dev_darcy_grad_p, darcy.bc_bot, darcy.bc_top);
       +                            dev_darcy_grad_p,
       +                            darcy.xn, darcy.xp,
       +                            darcy.yn, darcy.yp,
       +                            darcy.bc_bot, darcy.bc_top);
                            cudaThreadSynchronize();
                            if (PROFILING == 1)
                                stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
       t@@ -2003,7 +2009,10 @@ __host__ void DEM::startTime()
                            if (PROFILING == 1)
                                startTimer(&kernel_tic);
                            setDarcyGhostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
       -                            dev_darcy_phi, darcy.bc_bot, darcy.bc_top);
       +                            dev_darcy_phi,
       +                            darcy.xn, darcy.xp,
       +                            darcy.yn, darcy.yp,
       +                            darcy.bc_bot, darcy.bc_top);
                            cudaThreadSynchronize();
                            if (PROFILING == 1)
                                stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
       t@@ -2014,7 +2023,10 @@ __host__ void DEM::startTime()
                            if (PROFILING == 1)
                                startTimer(&kernel_tic);
                            setDarcyGhostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
       -                            dev_darcy_k, darcy.bc_bot, darcy.bc_top);
       +                            dev_darcy_k,
       +                            darcy.xn, darcy.xp,
       +                            darcy.yn, darcy.yp,
       +                            darcy.bc_bot, darcy.bc_top);
                            cudaThreadSynchronize();
                            if (PROFILING == 1)
                                stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
       t@@ -2109,7 +2121,10 @@ __host__ void DEM::startTime()
                                    startTimer(&kernel_tic);
                                setDarcyGhostNodes<Float>
                                    <<<dimGridFluid, dimBlockFluid>>>(
       -                                dev_darcy_p, darcy.bc_bot, darcy.bc_top);
       +                                dev_darcy_p,
       +                                darcy.xn, darcy.xp,
       +                                darcy.yn, darcy.yp,
       +                                darcy.bc_bot, darcy.bc_top);
                                cudaThreadSynchronize();
                                if (PROFILING == 1)
                                    stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
       t@@ -2130,6 +2145,10 @@ __host__ void DEM::startTime()
                                            dev_darcy_grad_k,
                                            darcy.beta_f,
                                            darcy.mu,
       +                                    darcy.xn,
       +                                    darcy.xp,
       +                                    darcy.yn,
       +                                    darcy.yp,
                                            darcy.bc_bot,
                                            darcy.bc_top,
                                            darcy.ndem,
       t@@ -2159,6 +2178,10 @@ __host__ void DEM::startTime()
                                        dev_darcy_grad_k,
                                        darcy.beta_f,
                                        darcy.mu,
       +                                darcy.xn,
       +                                darcy.xp,
       +                                darcy.yn,
       +                                darcy.yp,
                                        darcy.bc_bot,
                                        darcy.bc_top,
                                        darcy.ndem,
       t@@ -2289,7 +2312,10 @@ __host__ void DEM::startTime()
                            if (PROFILING == 1)
                                startTimer(&kernel_tic);
                            setDarcyGhostNodes<Float> <<<dimGridFluid, dimBlockFluid>>>
       -                        (dev_darcy_p, darcy.bc_bot, darcy.bc_top);
       +                        (dev_darcy_p,
       +                         darcy.bc_xn, darcy.bc_xp,
       +                         darcy.bc_yn, darcy.bc_yp,
       +                         darcy.bc_bot, darcy.bc_top);
                            cudaThreadSynchronize();
                            if (PROFILING == 1)
                                stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,