tdevice.cu: transition from deprecated host synchronization call - sphere - GPU-based 3D discrete element method algorithm with optional fluid coupling
 (HTM) git clone git://src.adamsgaard.dk/sphere
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) LICENSE
       ---
 (DIR) commit b2a1cf15f12e85f9c396cb65dd3fbf997c27b738
 (DIR) parent e0731b7097b1e026f653a9f6debdfd88f0b7f281
 (HTM) Author: Anders Damsgaard <anders@adamsgaard.dk>
       Date:   Tue, 21 Feb 2023 11:23:15 +0100
       
       device.cu: transition from deprecated host synchronization call
       
       Diffstat:
         M src/device.cu                       |     174 ++++++++++++++++----------------
       
       1 file changed, 87 insertions(+), 87 deletions(-)
       ---
 (DIR) diff --git a/src/device.cu b/src/device.cu
       t@@ -379,7 +379,7 @@ void DEM::updateGridSize()
            checkForCudaErrors("DEM::updateGridSize(): copying wall position");
        
            //printWorldSize<<<1,1>>>(dev_walls_nx);
       -    //cudaThreadSynchronize();
       +    //cudaDeviceSynchronize();
            //checkForCudaErrors("DEM::updateGridSize(): first printWorldSize");
        
            //printf("\nLz = %f\n", *Lz);
       t@@ -394,7 +394,7 @@ void DEM::updateGridSize()
            checkForCudaErrors("DEM::updateGridSize(): write to devC_grid.L[2]");
        
            //printWorldSize<<<1,1>>>(dev_walls_nx);
       -    //cudaThreadSynchronize();
       +    //cudaDeviceSynchronize();
            //checkForCudaErrors("DEM::updateGridSize(): second printWorldSize");
        
            // check value only during debugging
       t@@ -843,7 +843,7 @@ void DEM::startTime()
            FILE *fp;
        
            // Synchronization point
       -    cudaThreadSynchronize();
       +    cudaDeviceSynchronize();
            checkForCudaErrors("Start of startTime()");
        
            // Write initial data to output/<sid>.output00000.bin
       t@@ -1050,7 +1050,7 @@ void DEM::startTime()
                    // check if particle positions have finite values
        #ifdef CHECK_PARTICLES_FINITE
                    checkParticlePositions<<<dimGrid, dimBlock>>>(dev_x);
       -            cudaThreadSynchronize();
       +            cudaDeviceSynchronize();
                    checkForCudaErrorsIter("Post checkParticlePositions", iter);
        #endif
        
       t@@ -1070,7 +1070,7 @@ void DEM::startTime()
                            dev_x);
        
                    // Synchronization point
       -            cudaThreadSynchronize();
       +            cudaDeviceSynchronize();
                    if (PROFILING == 1)
                        stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
                                &t_calcParticleCellID);
       t@@ -1085,7 +1085,7 @@ void DEM::startTime()
                            thrust::device_ptr<uint>(dev_gridParticleCellID),
                            thrust::device_ptr<uint>(dev_gridParticleCellID + np),
                            thrust::device_ptr<uint>(dev_gridParticleIndex));
       -            cudaThreadSynchronize(); // Maybe Thrust synchronizes implicitly?
       +            cudaDeviceSynchronize(); // Maybe Thrust synchronizes implicitly?
                    if (PROFILING == 1)
                        stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
                                &t_thrustsort);
       t@@ -1097,7 +1097,7 @@ void DEM::startTime()
                    // for a 32 bit unsigned int is 4294967295.
                    cudaMemset(dev_cellStart, 0xffffffff,
                            grid.num[0]*grid.num[1]*grid.num[2]*sizeof(unsigned int));
       -            cudaThreadSynchronize();
       +            cudaDeviceSynchronize();
                    checkForCudaErrorsIter("Post cudaMemset", iter);
        
                    // Use sorted order to reorder particle arrays (position,
       t@@ -1116,7 +1116,7 @@ void DEM::startTime()
                            dev_angvel_sorted);
        
                    // Synchronization point
       -            cudaThreadSynchronize();
       +            cudaDeviceSynchronize();
                    if (PROFILING == 1)
                        stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
                                &t_reorderArrays);
       t@@ -1137,7 +1137,7 @@ void DEM::startTime()
                                dev_distmod);
        
                        // Synchronization point
       -                cudaThreadSynchronize();
       +                cudaDeviceSynchronize();
                        if (PROFILING == 1)
                            stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
                                    &t_topology);
       t@@ -1176,7 +1176,7 @@ void DEM::startTime()
                            dev_delta_t);
        
                    // Synchronization point
       -            cudaThreadSynchronize();
       +            cudaDeviceSynchronize();
                    //cudaPrintfDisplay(stdout, true);
                    if (PROFILING == 1)
                        stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
       t@@ -1199,7 +1199,7 @@ void DEM::startTime()
                                dev_force,
                                dev_torque);
                        // Synchronization point
       -                cudaThreadSynchronize();
       +                cudaDeviceSynchronize();
                        //cudaPrintfDisplay(stdout, true);
                        if (PROFILING == 1)
                            stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
       t@@ -1234,7 +1234,7 @@ void DEM::startTime()
                                    iter,
                                    np,
                                    ns.c_phi);
       -                cudaThreadSynchronize();
       +                cudaDeviceSynchronize();
                        if (PROFILING == 1)
                            stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
                                    &t_findPorositiesDev);
       t@@ -1259,7 +1259,7 @@ void DEM::startTime()
                                        dev_ns_v_x,
                                        dev_ns_v_y,
                                        dev_ns_v_z);
       -                    cudaThreadSynchronize();
       +                    cudaDeviceSynchronize();
                            checkForCudaErrors("Post interpolateCenterToFace");
                        }
        
       t@@ -1270,7 +1270,7 @@ void DEM::startTime()
                                    dev_ns_v_z,
                                    ns.bc_bot,
                                    ns.bc_top);
       -                cudaThreadSynchronize();
       +                cudaDeviceSynchronize();
                        checkForCudaErrorsIter("Post setNSghostNodesFace", iter);
        
                        findFaceDivTau<<<dimGridFluidFace, dimBlockFluidFace>>>(
       t@@ -1281,7 +1281,7 @@ void DEM::startTime()
                                dev_ns_div_tau_x,
                                dev_ns_div_tau_y,
                                dev_ns_div_tau_z);
       -                cudaThreadSynchronize();
       +                cudaDeviceSynchronize();
                        checkForCudaErrorsIter("Post findFaceDivTau", iter);
        
                        setNSghostNodesFace<Float>
       t@@ -1291,18 +1291,18 @@ void DEM::startTime()
                                    dev_ns_div_tau_z,
                                    ns.bc_bot,
                                    ns.bc_top);
       -                cudaThreadSynchronize();
       +                cudaDeviceSynchronize();
                        checkForCudaErrorsIter("Post setNSghostNodes(dev_ns_div_tau)",
                                iter);
        
                        setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
                                dev_ns_p, ns.bc_bot, ns.bc_top);
       -                cudaThreadSynchronize();
       +                cudaDeviceSynchronize();
                        checkForCudaErrorsIter("Post setNSghostNodes(dev_ns_p)", iter);
        
                        setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
                                dev_ns_phi, ns.bc_bot, ns.bc_top);
       -                cudaThreadSynchronize();
       +                cudaDeviceSynchronize();
                        checkForCudaErrorsIter("Post setNSghostNodes(dev_ns_p)", iter);
        
        
       t@@ -1330,12 +1330,12 @@ void DEM::startTime()
                                    dev_ns_f_p,
                                    dev_ns_f_v,
                                    dev_ns_f_sum);
       -                    cudaThreadSynchronize();
       +                    cudaDeviceSynchronize();
                            checkForCudaErrorsIter("Post findInteractionForce", iter);
        
                            setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
                                    dev_ns_p, ns.bc_bot, ns.bc_top);
       -                    cudaThreadSynchronize();
       +                    cudaDeviceSynchronize();
                            checkForCudaErrorsIter("Post setNSghostNodes(dev_ns_p)",
                                    iter);
        
       t@@ -1350,13 +1350,13 @@ void DEM::startTime()
                            //dev_ns_F_pf_x,
                            //dev_ns_F_pf_y,
                            //dev_ns_F_pf_z);
       -                    cudaThreadSynchronize();
       +                    cudaDeviceSynchronize();
                            checkForCudaErrorsIter("Post applyInteractionForceToFluid",
                                    iter);
        
                            setNSghostNodes<Float3><<<dimGridFluid, dimBlockFluid>>>(
                                    dev_ns_F_pf, ns.bc_bot, ns.bc_top);
       -                    cudaThreadSynchronize();
       +                    cudaDeviceSynchronize();
                            checkForCudaErrorsIter("Post setNSghostNodes(F_pf)", iter);
                        }
        #endif
       t@@ -1372,7 +1372,7 @@ void DEM::startTime()
                                    dev_ns_epsilon,
                                    dev_ns_epsilon_new,
                                    epsilon_value);
       -                    cudaThreadSynchronize();
       +                    cudaDeviceSynchronize();
                            checkForCudaErrorsIter("Post setNSepsilonTop", iter);
        
        #if defined(REPORT_EPSILON) || defined(REPORT_V_P_COMPONENTS) || defined(REPORT_V_C_COMPONENTS)
       t@@ -1391,7 +1391,7 @@ void DEM::startTime()
                                        wall0_iz,
                                        epsilon_value,
                                        dp_dz);
       -                        cudaThreadSynchronize();
       +                        cudaDeviceSynchronize();
                                checkForCudaErrorsIter("Post setNSepsilonAtTopWall",
                                        iter);
        
       t@@ -1417,7 +1417,7 @@ void DEM::startTime()
                                        dev_ns_epsilon_new,
                                        ns.beta,
                                        new_pressure);
       -                        cudaThreadSynchronize();
       +                        cudaDeviceSynchronize();
                                checkForCudaErrorsIter("Post setUpperPressureNS", iter);
        
        #ifdef REPORT_MORE_EPSILON
       t@@ -1453,7 +1453,7 @@ void DEM::startTime()
                            setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
                                    dev_ns_dphi, ns.bc_bot, ns.bc_top);
        
       -                    cudaThreadSynchronize();
       +                    cudaDeviceSynchronize();
                            if (PROFILING == 1)
                                stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
                                        &t_setNSghostNodesDev);
       t@@ -1471,14 +1471,14 @@ void DEM::startTime()
                                    dev_ns_v_y,
                                    dev_ns_v_z,
                                    dev_ns_v);
       -                    cudaThreadSynchronize();
       +                    cudaDeviceSynchronize();
                            checkForCudaErrorsIter(
                                    "Post interpolateFaceToCenter", iter);
        
                            // Set cell-center velocity ghost nodes
                            setNSghostNodes<Float3><<<dimGridFluid, dimBlockFluid>>>(
                                    dev_ns_v, ns.bc_bot, ns.bc_top);
       -                    cudaThreadSynchronize();
       +                    cudaDeviceSynchronize();
                            checkForCudaErrorsIter("Post setNSghostNodes(v)", iter);
        
                            // Find the divergence of phi*vi*v, needed for predicting
       t@@ -1489,7 +1489,7 @@ void DEM::startTime()
                                    dev_ns_phi,
                                    dev_ns_v,
                                    dev_ns_div_phi_vi_v);
       -                    cudaThreadSynchronize();
       +                    cudaDeviceSynchronize();
                            if (PROFILING == 1)
                                stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
                                        &t_findNSdivphiviv);
       t@@ -1498,7 +1498,7 @@ void DEM::startTime()
                            // Set cell-center ghost nodes
                            setNSghostNodes<Float3><<<dimGridFluid, dimBlockFluid>>>(
                                    dev_ns_div_phi_vi_v, ns.bc_bot, ns.bc_top);
       -                    cudaThreadSynchronize();
       +                    cudaDeviceSynchronize();
                            checkForCudaErrorsIter("Post setNSghostNodes(div_phi_vi_v)",
                                    iter);
        
       t@@ -1530,7 +1530,7 @@ void DEM::startTime()
                                    dev_ns_v_p_x,
                                    dev_ns_v_p_y,
                                    dev_ns_v_p_z);
       -                    cudaThreadSynchronize();
       +                    cudaDeviceSynchronize();
                            if (PROFILING == 1)
                                stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
                                        &t_findPredNSvelocities);
       t@@ -1542,7 +1542,7 @@ void DEM::startTime()
                                        dev_ns_v_p_y,
                                        dev_ns_v_p_z,
                                        ns.bc_bot, ns.bc_top);
       -                    cudaThreadSynchronize();
       +                    cudaDeviceSynchronize();
                            checkForCudaErrorsIter(
                                    "Post setNSghostNodesFace(dev_ns_v_p)", iter);
        
       t@@ -1551,7 +1551,7 @@ void DEM::startTime()
                                    dev_ns_v_p_y,
                                    dev_ns_v_p_z,
                                    dev_ns_v_p);
       -                    cudaThreadSynchronize();
       +                    cudaDeviceSynchronize();
                            checkForCudaErrorsIter(
                                    "Post interpolateFaceToCenter", iter);
        
       t@@ -1570,11 +1570,11 @@ void DEM::startTime()
                                    startTimer(&kernel_tic);
                                setNSepsilonInterior<<<dimGridFluid, dimBlockFluid>>>(
                                        dev_ns_epsilon, epsilon_value);
       -                        cudaThreadSynchronize();
       +                        cudaDeviceSynchronize();
        
                                setNSnormZero<<<dimGridFluid, dimBlockFluid>>>
                                    (dev_ns_norm);
       -                        cudaThreadSynchronize();
       +                        cudaDeviceSynchronize();
        
                                if (PROFILING == 1)
                                    stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
       t@@ -1600,7 +1600,7 @@ void DEM::startTime()
                                        dev_ns_epsilon,
                                        dev_ns_epsilon_new,
                                        epsilon_value);
       -                        cudaThreadSynchronize();
       +                        cudaDeviceSynchronize();
                                if (PROFILING == 1)
                                    stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
                                            &t_setNSdirichlet);
       t@@ -1617,14 +1617,14 @@ void DEM::startTime()
                                /*setNSghostNodes<Float>
                                  <<<dimGridFluid, dimBlockFluid>>>(
                                  dev_ns_epsilon);
       -                          cudaThreadSynchronize();
       +                          cudaDeviceSynchronize();
                                  checkForCudaErrors(
                                  "Post setNSghostNodesFloat(dev_ns_epsilon)",
                                  iter);*/
                                setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
                                        dev_ns_epsilon,
                                        ns.bc_bot, ns.bc_top);
       -                        cudaThreadSynchronize();
       +                        cudaDeviceSynchronize();
                                checkForCudaErrorsIter("Post setNSghostNodesEpsilon(1)",
                                        iter);
        
       t@@ -1681,7 +1681,7 @@ void DEM::startTime()
                                        dev_ns_f1,
                                        dev_ns_f2,
                                        dev_ns_f);
       -                        cudaThreadSynchronize();
       +                        cudaDeviceSynchronize();
                                if (PROFILING == 1)
                                    stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
                                            &t_findNSforcing);
       t@@ -1692,14 +1692,14 @@ void DEM::startTime()
                                  dev_ns_f2,
                                  dev_ns_f,
                                  nijac);
       -                          cudaThreadSynchronize();
       +                          cudaDeviceSynchronize();
                                  checkForCudaErrors("Post setNSghostNodesForcing",
                                  iter);*/
        
                                setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
                                        dev_ns_epsilon,
                                        ns.bc_bot, ns.bc_top);
       -                        cudaThreadSynchronize();
       +                        cudaDeviceSynchronize();
                                checkForCudaErrorsIter("Post setNSghostNodesEpsilon(2)",
                                        iter);
        
       t@@ -1725,7 +1725,7 @@ void DEM::startTime()
                                        ns.theta,
                                        wall0_iz,
                                        dp_dz);
       -                        cudaThreadSynchronize();
       +                        cudaDeviceSynchronize();
                                if (PROFILING == 1)
                                    stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
                                            &t_jacobiIterationNS);
       t@@ -1741,7 +1741,7 @@ void DEM::startTime()
                                  wall0_iz,
                                  epsilon_value,
                                  dp_dz);
       -                          cudaThreadSynchronize();
       +                          cudaDeviceSynchronize();
                                  checkForCudaErrorsIter("Post setNSepsilonAtTopWall",
                                  iter);
                                  }*/
       t@@ -1750,7 +1750,7 @@ void DEM::startTime()
                                copyValues<Float><<<dimGridFluid, dimBlockFluid>>>(
                                        dev_ns_epsilon_new,
                                        dev_ns_epsilon);
       -                        cudaThreadSynchronize();
       +                        cudaDeviceSynchronize();
                                checkForCudaErrorsIter
                                    ("Post copyValues (epsilon_new->epsilon)", iter);
        
       t@@ -1791,7 +1791,7 @@ void DEM::startTime()
                                        <<<dimGridFluid, dimBlockFluid>>>(
                                                dev_ns_epsilon,
                                                ns.bc_bot, ns.bc_top);
       -                            cudaThreadSynchronize();
       +                            cudaDeviceSynchronize();
                                    checkForCudaErrorsIter
                                        ("Post setNSghostNodesEpsilon(4)", iter);
        
       t@@ -1802,14 +1802,14 @@ void DEM::startTime()
                                                dev_ns_epsilon,
                                                ns.gamma,
                                                ns.bc_bot, ns.bc_top);
       -                                cudaThreadSynchronize();
       +                                cudaDeviceSynchronize();
                                        checkForCudaErrorsIter("Post smoothing", iter);
        
                                        setNSghostNodes<Float>
                                            <<<dimGridFluid, dimBlockFluid>>>(
                                                    dev_ns_epsilon,
                                                    ns.bc_bot, ns.bc_top);
       -                                cudaThreadSynchronize();
       +                                cudaDeviceSynchronize();
                                        checkForCudaErrorsIter
                                            ("Post setNSghostNodesEpsilon(4)", iter);
                                    }
       t@@ -1852,7 +1852,7 @@ void DEM::startTime()
                                    dev_ns_epsilon,
                                    ns.beta,
                                    dev_ns_p);
       -                    cudaThreadSynchronize();
       +                    cudaDeviceSynchronize();
                            checkForCudaErrorsIter("Post updateNSpressure", iter);
        
                            updateNSvelocity<<<dimGridFluidFace, dimBlockFluidFace>>>(
       t@@ -1872,7 +1872,7 @@ void DEM::startTime()
                                    dev_ns_v_x,
                                    dev_ns_v_y,
                                    dev_ns_v_z);
       -                    cudaThreadSynchronize();
       +                    cudaDeviceSynchronize();
                            if (PROFILING == 1)
                                stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
                                        &t_updateNSvelocityPressure);
       t@@ -1884,7 +1884,7 @@ void DEM::startTime()
                                        dev_ns_v_p_y,
                                        dev_ns_v_p_z,
                                        ns.bc_bot, ns.bc_top);
       -                    cudaThreadSynchronize();
       +                    cudaDeviceSynchronize();
                            checkForCudaErrorsIter(
                                    "Post setNSghostNodesFace(dev_ns_v)", iter);
        
       t@@ -1893,7 +1893,7 @@ void DEM::startTime()
                                    dev_ns_v_y,
                                    dev_ns_v_z,
                                    dev_ns_v);
       -                    cudaThreadSynchronize();
       +                    cudaDeviceSynchronize();
                            checkForCudaErrorsIter("Post interpolateFaceToCenter",
                                    iter);
                        } // end iter % ns.dem == 0
       t@@ -1921,7 +1921,7 @@ void DEM::startTime()
                                    darcy.bc_xn, darcy.bc_xp,
                                    darcy.bc_yn, darcy.bc_yp,
                                    darcy.bc_bot, darcy.bc_top);
       -                    cudaThreadSynchronize();
       +                    cudaDeviceSynchronize();
                            if (PROFILING == 1)
                                stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
                                        &t_setDarcyGhostNodes);
       t@@ -1933,7 +1933,7 @@ void DEM::startTime()
                            findDarcyPressureGradient<<<dimGridFluid, dimBlockFluid>>>(
                                    dev_darcy_p,
                                    dev_darcy_grad_p);
       -                    cudaThreadSynchronize();
       +                    cudaDeviceSynchronize();
                            checkForCudaErrorsIter("After findDarcyPressureGradient",
                                    iter);
        
       t@@ -1942,7 +1942,7 @@ void DEM::startTime()
                                    darcy.bc_xn, darcy.bc_xp,
                                    darcy.bc_yn, darcy.bc_yp,
                                    darcy.bc_bot, darcy.bc_top);
       -                    cudaThreadSynchronize();
       +                    cudaDeviceSynchronize();
                            if (PROFILING == 1)
                                stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
                                        &t_setDarcyGhostNodes);
       t@@ -1963,7 +1963,7 @@ void DEM::startTime()
                                    dev_darcy_phi,
                                    dev_darcy_dphi,
                                    dev_darcy_div_v_p);
       -                    cudaThreadSynchronize();
       +                    cudaDeviceSynchronize();
                            if (PROFILING == 1)
                                stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
                                        &t_findDarcyPorosities);
       t@@ -1985,7 +1985,7 @@ void DEM::startTime()
                                    darcy.bc_top,
                                    dev_force,
                                    dev_darcy_f_p);
       -                    cudaThreadSynchronize();
       +                    cudaDeviceSynchronize();
                            if (PROFILING == 1)
                                stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
                                        &t_findDarcyPressureForce);
       t@@ -2021,7 +2021,7 @@ void DEM::startTime()
                                    dev_darcy_dphi,
                                    dev_darcy_div_v_p,
                                    dev_darcy_vp_avg);
       -                    cudaThreadSynchronize();
       +                    cudaDeviceSynchronize();
                            if (PROFILING == 1)
                                stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
                                        &t_findDarcyPorosities);
       t@@ -2035,7 +2035,7 @@ void DEM::startTime()
                                        dev_darcy_dphi,
                                        dev_darcy_div_v_p,
                                        dev_darcy_vp_avg);
       -                        cudaThreadSynchronize();
       +                        cudaDeviceSynchronize();
                            }
        
                            // copy porosities to the frictionless lower Z boundary
       t@@ -2046,7 +2046,7 @@ void DEM::startTime()
                                        dev_darcy_dphi,
                                        dev_darcy_div_v_p,
                                        dev_darcy_vp_avg);
       -                        cudaThreadSynchronize();
       +                        cudaDeviceSynchronize();
                            }
        
                            // Modulate the pressures at the upper boundary cells
       t@@ -2063,7 +2063,7 @@ void DEM::startTime()
                                        new_pressure,
                                        dev_darcy_p,
                                        wall0_iz);
       -                        cudaThreadSynchronize();
       +                        cudaDeviceSynchronize();
                                checkForCudaErrorsIter("Post setUpperPressureNS", iter);
        
                                // Modulate the pressures at the top wall
       t@@ -2072,7 +2072,7 @@ void DEM::startTime()
                                            new_pressure,
                                            wall0_iz,
                                            dev_darcy_p);
       -                        cudaThreadSynchronize();
       +                        cudaDeviceSynchronize();
                                checkForCudaErrorsIter("Post setDarcyTopWallPressure",
                                        iter);
        
       t@@ -2085,7 +2085,7 @@ void DEM::startTime()
                                startTimer(&kernel_tic);
                            findDarcyPermeabilities<<<dimGridFluid, dimBlockFluid>>>(
                                    darcy.k_c, dev_darcy_phi, dev_darcy_k);
       -                    cudaThreadSynchronize();
       +                    cudaDeviceSynchronize();
                            if (PROFILING == 1)
                                stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
                                        &t_findDarcyPermeabilities);
       t@@ -2099,7 +2099,7 @@ void DEM::startTime()
                                    darcy.bc_xn, darcy.bc_xp,
                                    darcy.bc_yn, darcy.bc_yp,
                                    darcy.bc_bot, darcy.bc_top);
       -                    cudaThreadSynchronize();
       +                    cudaDeviceSynchronize();
                            if (PROFILING == 1)
                                stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
                                        &t_setDarcyGhostNodes);
       t@@ -2113,7 +2113,7 @@ void DEM::startTime()
                                    darcy.bc_xn, darcy.bc_xp,
                                    darcy.bc_yn, darcy.bc_yp,
                                    darcy.bc_bot, darcy.bc_top);
       -                    cudaThreadSynchronize();
       +                    cudaDeviceSynchronize();
                            if (PROFILING == 1)
                                stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
                                        &t_setDarcyGhostNodes);
       t@@ -2125,7 +2125,7 @@ void DEM::startTime()
                            findDarcyPermeabilityGradients
                                <<<dimGridFluid, dimBlockFluid>>>
                                (dev_darcy_k, dev_darcy_grad_k);
       -                    cudaThreadSynchronize();
       +                    cudaDeviceSynchronize();
                            if (PROFILING == 1)
                                stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
                                        &t_findDarcyPermeabilityGradients);
       t@@ -2135,7 +2135,7 @@ void DEM::startTime()
                            if (iter == 0) {
                                setDarcyNormZero<<<dimGridFluid, dimBlockFluid>>>(
                                        dev_darcy_norm);
       -                        cudaThreadSynchronize();
       +                        cudaDeviceSynchronize();
                                checkForCudaErrorsIter("Post setDarcyNormZero", iter);
        
                                if (PROFILING == 1)
       t@@ -2143,7 +2143,7 @@ void DEM::startTime()
                                copyValues<Float><<<dimGridFluid, dimBlockFluid>>>(
                                        dev_darcy_p,
                                        dev_darcy_p_old);
       -                        cudaThreadSynchronize();
       +                        cudaDeviceSynchronize();
                                if (PROFILING == 1)
                                    stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
                                            &t_copyValues);
       t@@ -2163,7 +2163,7 @@ void DEM::startTime()
                                        darcy.bc_top_flux,
                                        dev_darcy_k,
                                        darcy.mu);
       -                        cudaThreadSynchronize();
       +                        cudaDeviceSynchronize();
                                if (PROFILING == 1)
                                    stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
                                            &t_setDarcyGhostNodes);
       t@@ -2195,7 +2195,7 @@ void DEM::startTime()
                                    copyValues<Float><<<dimGridFluid, dimBlockFluid>>>(
                                            dev_darcy_p,
                                            dev_darcy_p_old);
       -                            cudaThreadSynchronize();
       +                            cudaDeviceSynchronize();
                                    if (PROFILING == 1)
                                        stopTimer(&kernel_tic, &kernel_toc,
                                                &kernel_elapsed, &t_copyValues);
       t@@ -2211,7 +2211,7 @@ void DEM::startTime()
                                        darcy.bc_xn, darcy.bc_xp,
                                        darcy.bc_yn, darcy.bc_yp,
                                        darcy.bc_bot, darcy.bc_top);
       -                        cudaThreadSynchronize();
       +                        cudaDeviceSynchronize();
                                if (PROFILING == 1)
                                    stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
                                            &t_setDarcyGhostNodes);
       t@@ -2241,7 +2241,7 @@ void DEM::startTime()
                                            wall0_iz,
                                            dev_darcy_p_constant,
                                            dev_darcy_dp_expl);
       -                            cudaThreadSynchronize();
       +                            cudaDeviceSynchronize();
                                    if (PROFILING == 1)
                                        stopTimer(&kernel_tic, &kernel_toc,
                                                &kernel_elapsed,
       t@@ -2276,7 +2276,7 @@ void DEM::startTime()
                                        dev_darcy_p_constant,
                                        dev_darcy_p_new,
                                        dev_darcy_norm);
       -                        cudaThreadSynchronize();
       +                        cudaDeviceSynchronize();
                                if (PROFILING == 1)
                                    stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
                                            &t_updateDarcySolution);
       t@@ -2289,7 +2289,7 @@ void DEM::startTime()
                                    setDarcyTopWallFixedFlow
                                        <<<dimGridFluid, dimBlockFluid>>>
                                        (wall0_iz, dev_darcy_p);
       -                            cudaThreadSynchronize();
       +                            cudaDeviceSynchronize();
                                    if (PROFILING == 1)
                                        stopTimer(&kernel_tic, &kernel_toc,
                                                &kernel_elapsed,
       t@@ -2310,7 +2310,7 @@ void DEM::startTime()
                                                darcy.bc_top_flux,
                                                dev_darcy_k,
                                                darcy.mu);
       -                            cudaThreadSynchronize();
       +                            cudaDeviceSynchronize();
                                    if (PROFILING == 1)
                                        stopTimer(&kernel_tic, &kernel_toc,
                                                &kernel_elapsed,
       t@@ -2325,7 +2325,7 @@ void DEM::startTime()
                                copyValues<Float><<<dimGridFluid, dimBlockFluid>>>(
                                        dev_darcy_p_new,
                                        dev_darcy_p);
       -                        cudaThreadSynchronize();
       +                        cudaDeviceSynchronize();
                                if (PROFILING == 1)
                                    stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
                                            &t_copyValues);
       t@@ -2392,7 +2392,7 @@ void DEM::startTime()
                            if (filetimeclock + time.dt < time.file_dt) {
                                setDarcyZeros<Float> <<<dimGridFluid, dimBlockFluid>>>
                                    (dev_darcy_dphi);
       -                        cudaThreadSynchronize();
       +                        cudaDeviceSynchronize();
                                checkForCudaErrorsIter(
                                        "After setDarcyZeros(dev_darcy_dphi)", iter);
                            }
       t@@ -2404,7 +2404,7 @@ void DEM::startTime()
                                 darcy.bc_xn, darcy.bc_xp,
                                 darcy.bc_yn, darcy.bc_yp,
                                 darcy.bc_bot, darcy.bc_top);
       -                    cudaThreadSynchronize();
       +                    cudaDeviceSynchronize();
                            if (PROFILING == 1)
                                stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
                                        &t_setDarcyGhostNodes);
       t@@ -2419,7 +2419,7 @@ void DEM::startTime()
                                    dev_darcy_k,
                                    darcy.mu,
                                    dev_darcy_v);
       -                    cudaThreadSynchronize();
       +                    cudaDeviceSynchronize();
                            if (PROFILING == 1)
                                stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
                                        &t_findDarcyVelocities);
       t@@ -2442,7 +2442,7 @@ void DEM::startTime()
                             dev_vel,
                             dev_force,
                             dev_walls_tau_eff_x_pp);
       -                cudaThreadSynchronize();
       +                cudaDeviceSynchronize();
                        if (PROFILING == 1)
                            stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
                                    &t_summation);
       t@@ -2453,7 +2453,7 @@ void DEM::startTime()
                            startTimer(&kernel_tic);
                        summation<<<dimGrid, dimBlock>>>(dev_walls_tau_eff_x_pp,
                                dev_walls_tau_eff_x_partial);
       -                cudaThreadSynchronize();
       +                cudaDeviceSynchronize();
                        if (PROFILING == 1)
                            stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
                                    &t_summation);
       t@@ -2497,7 +2497,7 @@ void DEM::startTime()
                            change_velocity_state,
                            velocity_factor,
                            blocksPerGrid);
       -            cudaThreadSynchronize();
       +            cudaDeviceSynchronize();
                    checkForCudaErrorsIter("Post integrate", iter);
                    if (PROFILING == 1)
                        stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
       t@@ -2513,7 +2513,7 @@ void DEM::startTime()
                        summation<<<dimGrid, dimBlock>>>(dev_walls_force_pp,
                                dev_walls_force_partial);
                    }
       -            cudaThreadSynchronize();
       +            cudaDeviceSynchronize();
                    if (PROFILING == 1)
                        stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
                                &t_summation);
       t@@ -2533,7 +2533,7 @@ void DEM::startTime()
                                time.current,
                                iter);
                    }
       -            cudaThreadSynchronize();
       +            cudaDeviceSynchronize();
                    if (PROFILING == 1)
                        stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
                                &t_integrateWalls);
       t@@ -2579,7 +2579,7 @@ void DEM::startTime()
        
                    // Pause the CPU thread until all CUDA calls previously issued are
                    // completed
       -            cudaThreadSynchronize();
       +            cudaDeviceSynchronize();
                    checkForCudaErrorsIter("Beginning of file output section", iter);
        
                    // v_x, v_y, v_z -> v
       t@@ -2589,7 +2589,7 @@ void DEM::startTime()
                                dev_ns_v_y,
                                dev_ns_v_z,
                                dev_ns_v);
       -                cudaThreadSynchronize();
       +                cudaDeviceSynchronize();
                        checkForCudaErrorsIter("Post interpolateFaceToCenter", iter);
                    }
        
       t@@ -2603,7 +2603,7 @@ void DEM::startTime()
                        if (cfd_solver == 1) {
                            setDarcyZeros<Float> <<<dimGridFluid, dimBlockFluid>>>
                                (dev_darcy_dphi);
       -                    cudaThreadSynchronize();
       +                    cudaDeviceSynchronize();
                            checkForCudaErrorsIter(
                                    "After setDarcyZeros(dev_darcy_dphi) after transfer",
                                    iter);
       t@@ -2612,7 +2612,7 @@ void DEM::startTime()
        
                    // Pause the CPU thread until all CUDA calls previously issued are
                    // completed
       -            cudaThreadSynchronize();
       +            cudaDeviceSynchronize();
        
                    // Check the numerical stability of the NS solver
                    if (fluid == 1)