timplemented ndem parameter for DEM/CFD computation ratio - sphere - GPU-based 3D discrete element method algorithm with optional fluid coupling
(HTM) git clone git://src.adamsgaard.dk/sphere
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) LICENSE
---
(DIR) commit f8ab5f09ac2e8cd7d89525a6e619d6f8c382fa1d
(DIR) parent e567138fef8fd2ec15afe7a583c3274c757839d0
(HTM) Author: Anders Damsgaard <anders.damsgaard@geo.au.dk>
Date: Thu, 1 May 2014 10:51:45 +0200
implemented ndem parameter for DEM/CFD computation ratio
Diffstat:
M src/device.cu | 839 ++++++++++++++++---------------
M src/navierstokes.cuh | 30 +++++++++++++++++-------------
2 files changed, 440 insertions(+), 429 deletions(-)
---
(DIR) diff --git a/src/device.cu b/src/device.cu
t@@ -761,7 +761,6 @@ __host__ void DEM::startTime()
dev_contacts,
dev_distmod);
-
// Synchronization point
cudaThreadSynchronize();
if (PROFILING == 1)
t@@ -774,7 +773,6 @@ __host__ void DEM::startTime()
" too large?", iter);
}
-
// For each particle process collisions and compute resulting forces
//cudaPrintfInit();
if (PROFILING == 1)
t@@ -897,484 +895,493 @@ __host__ void DEM::startTime()
}
#endif
- // Initial guess for the top epsilon values. These may be changed in
- // setUpperPressureNS
- Float pressure = ns.p[idx(0,0,ns.nz-1)];
- Float pressure_new = pressure; // Dirichlet
- Float epsilon_value = pressure_new - ns.beta*pressure;
- setNSepsilonTop<<<dimGridFluid, dimBlockFluid>>>(
- dev_ns_epsilon,
- dev_ns_epsilon_new,
- epsilon_value);
- cudaThreadSynchronize();
- checkForCudaErrorsIter("Post setNSepsilonTop", iter);
-
- // Modulate the pressures at the upper boundary cells
- if ((ns.p_mod_A > 1.0e-5 || ns.p_mod_A < -1.0e-5) &&
- ns.p_mod_f > 1.0e-7) {
- Float new_pressure = ns.p[idx(0,0,ns.nz-1)] // original pressure
- + ns.p_mod_A*sin(2.0*M_PI*ns.p_mod_f*time.current
- + ns.p_mod_phi);
- setUpperPressureNS<<<dimGridFluid, dimBlockFluid>>>(
- dev_ns_p,
+ if ((iter % ns.ndem) == 0) {
+ // Initial guess for the top epsilon values. These may be
+ // changed in setUpperPressureNS
+ Float pressure = ns.p[idx(0,0,ns.nz-1)];
+ Float pressure_new = pressure; // Dirichlet
+ Float epsilon_value = pressure_new - ns.beta*pressure;
+ setNSepsilonTop<<<dimGridFluid, dimBlockFluid>>>(
dev_ns_epsilon,
dev_ns_epsilon_new,
- ns.beta,
- new_pressure);
+ epsilon_value);
cudaThreadSynchronize();
- checkForCudaErrorsIter("Post setUpperPressureNS", iter);
-
- if (report_even_more_epsilon == 1) {
- std::cout
- << "\n@@@@@@ TIME STEP " << iter << " @@@@@@"
- << "\n###### EPSILON AFTER setUpperPressureNS ######"
- << std::endl;
- transferNSepsilonFromGlobalDeviceMemory();
- printNSarray(stdout, ns.epsilon, "epsilon");
+ checkForCudaErrorsIter("Post setNSepsilonTop", iter);
+
+ // Modulate the pressures at the upper boundary cells
+ if ((ns.p_mod_A > 1.0e-5 || ns.p_mod_A < -1.0e-5) &&
+ ns.p_mod_f > 1.0e-7) {
+ // original pressure
+ Float new_pressure = ns.p[idx(0,0,ns.nz-1)]
+ + ns.p_mod_A*sin(2.0*M_PI*ns.p_mod_f*time.current
+ + ns.p_mod_phi);
+ setUpperPressureNS<<<dimGridFluid, dimBlockFluid>>>(
+ dev_ns_p,
+ dev_ns_epsilon,
+ dev_ns_epsilon_new,
+ ns.beta,
+ new_pressure);
+ cudaThreadSynchronize();
+ checkForCudaErrorsIter("Post setUpperPressureNS", iter);
+
+ if (report_even_more_epsilon == 1) {
+ std::cout
+ << "\n@@@@@@ TIME STEP " << iter << " @@@@@@"
+ << "\n###### EPSILON AFTER setUpperPressureNS "
+ << "######" << std::endl;
+ transferNSepsilonFromGlobalDeviceMemory();
+ printNSarray(stdout, ns.epsilon, "epsilon");
+ }
}
- }
-
- // Set the values of the ghost nodes in the grid
- if (PROFILING == 1)
- startTimer(&kernel_tic);
- /*setNSghostNodesDev<<<dimGridFluid, dimBlockFluid>>>(
- dev_ns_p,
- dev_ns_v,
- dev_ns_v_p,
- dev_ns_phi,
- dev_ns_dphi,
- dev_ns_epsilon);*/
- setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
- dev_ns_p, ns.bc_bot, ns.bc_top);
-
- setNSghostNodes<Float3><<<dimGridFluid, dimBlockFluid>>>(
- dev_ns_v, ns.bc_bot, ns.bc_top);
-
- setNSghostNodes<Float3><<<dimGridFluid, dimBlockFluid>>>(
- dev_ns_v_p, ns.bc_bot, ns.bc_top);
-
- setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
- dev_ns_phi, ns.bc_bot, ns.bc_top);
-
- setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
- dev_ns_dphi, ns.bc_bot, ns.bc_top);
-
- cudaThreadSynchronize();
- if (PROFILING == 1)
- stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
- &t_setNSghostNodesDev);
- checkForCudaErrorsIter("Post setNSghostNodesDev", iter);
- /*std::cout << "\n###### EPSILON AFTER setNSghostNodesDev ######"
- << std::endl;
- transferNSepsilonFromGlobalDeviceMemory();
- printNSarray(stdout, ns.epsilon, "epsilon");*/
-
-
- // Find the fluid stress tensor, needed for predicting the fluid
- // velocities
- if (PROFILING == 1)
- startTimer(&kernel_tic);
- findNSstressTensor<<<dimGridFluid, dimBlockFluid>>>(
- dev_ns_v,
- dev_ns_tau);
- cudaThreadSynchronize();
- if (PROFILING == 1)
- stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
- &t_findNSstressTensor);
- checkForCudaErrorsIter("Post findNSstressTensor", iter);
-
- // Set stress tensor values in the ghost nodes
- setNSghostNodes_tau<<<dimGridFluid, dimBlockFluid>>>(
- dev_ns_tau,
- ns.bc_bot,
- ns.bc_top);
- cudaThreadSynchronize();
- checkForCudaErrorsIter("Post setNSghostNodes_tau", iter);
- // Find the divergence of phi*vi*v, needed for predicting the fluid
- // velocities
- if (PROFILING == 1)
- startTimer(&kernel_tic);
- findNSdivphiviv<<<dimGridFluid, dimBlockFluid>>>(
- dev_ns_phi,
- dev_ns_v,
- dev_ns_div_phi_vi_v);
- cudaThreadSynchronize();
- if (PROFILING == 1)
- stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
- &t_findNSdivphiviv);
- checkForCudaErrorsIter("Post findNSdivphiviv", iter);
-
- // Find the divergence of phi*tau, needed for predicting the fluid
- // velocities
- if (PROFILING == 1)
- startTimer(&kernel_tic);
- findNSdivphitau<<<dimGridFluid, dimBlockFluid>>>(
- dev_ns_phi,
- dev_ns_tau,
- dev_ns_div_phi_tau);
- cudaThreadSynchronize();
- if (PROFILING == 1)
- stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
- &t_findNSdivphitau);
- checkForCudaErrorsIter("Post findNSdivphitau", iter);
+ // Set the values of the ghost nodes in the grid
+ if (PROFILING == 1)
+ startTimer(&kernel_tic);
+ /*setNSghostNodesDev<<<dimGridFluid, dimBlockFluid>>>(
+ dev_ns_p,
+ dev_ns_v,
+ dev_ns_v_p,
+ dev_ns_phi,
+ dev_ns_dphi,
+ dev_ns_epsilon);*/
+ setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
+ dev_ns_p, ns.bc_bot, ns.bc_top);
- // Predict the fluid velocities on the base of the old pressure
- // field and ignoring the incompressibility constraint
- if (PROFILING == 1)
- startTimer(&kernel_tic);
- findPredNSvelocities<<<dimGridFluid, dimBlockFluid>>>(
- dev_ns_p,
- dev_ns_v,
- dev_ns_phi,
- dev_ns_dphi,
- dev_ns_div_phi_vi_v,
- dev_ns_div_phi_tau,
- ns.bc_bot,
- ns.bc_top,
- ns.beta,
- dev_ns_fi,
- dev_ns_v_p);
- cudaThreadSynchronize();
- if (PROFILING == 1)
- stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
- &t_findPredNSvelocities);
- checkForCudaErrorsIter("Post findPredNSvelocities", iter);
+ setNSghostNodes<Float3><<<dimGridFluid, dimBlockFluid>>>(
+ dev_ns_v, ns.bc_bot, ns.bc_top);
- setNSghostNodes<Float3><<<dimGridFluid, dimBlockFluid>>>(
- dev_ns_v_p);
- cudaThreadSynchronize();
- checkForCudaErrorsIter("Post setNSghostNodesFloat3(dev_ns_v_p)",
- iter);
+ setNSghostNodes<Float3><<<dimGridFluid, dimBlockFluid>>>(
+ dev_ns_v_p, ns.bc_bot, ns.bc_top);
- // In the first iteration of the sphere program, we'll need to
- // manually estimate the values of epsilon. In the subsequent
- // iterations, the previous values are used.
- if (iter == 0) {
+ setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
+ dev_ns_phi, ns.bc_bot, ns.bc_top);
- // Define the first estimate of the values of epsilon.
- // The initial guess depends on the value of ns.beta.
- Float pressure = ns.p[idx(2,2,2)];
- Float pressure_new = pressure; // Guess p_current = p_new
- Float epsilon_value = pressure_new - ns.beta*pressure;
- if (PROFILING == 1)
- startTimer(&kernel_tic);
- setNSepsilonInterior<<<dimGridFluid, dimBlockFluid>>>(
- dev_ns_epsilon, epsilon_value);
- cudaThreadSynchronize();
+ setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
+ dev_ns_dphi, ns.bc_bot, ns.bc_top);
- setNSnormZero<<<dimGridFluid, dimBlockFluid>>>(dev_ns_norm);
cudaThreadSynchronize();
-
if (PROFILING == 1)
stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
- &t_setNSepsilon);
- checkForCudaErrorsIter("Post setNSepsilonInterior", iter);
-
- if (report_even_more_epsilon == 1) {
- std::cout
- << "\n###### EPSILON AFTER setNSepsilonInterior ######"
- << std::endl;
- transferNSepsilonFromGlobalDeviceMemory();
- printNSarray(stdout, ns.epsilon, "epsilon");
- }
-
- // Set the epsilon values at the lower boundary
- pressure = ns.p[idx(0,0,0)];
- pressure_new = pressure; // Guess p_current = p_new
- epsilon_value = pressure_new - ns.beta*pressure;
+ &t_setNSghostNodesDev);
+ checkForCudaErrorsIter("Post setNSghostNodesDev", iter);
+ /*std::cout << "\n###### EPSILON AFTER setNSghostNodesDev #####"
+ << std::endl;
+ transferNSepsilonFromGlobalDeviceMemory();
+ printNSarray(stdout, ns.epsilon, "epsilon");*/
+
+ // Find the fluid stress tensor, needed for predicting the fluid
+ // velocities
if (PROFILING == 1)
startTimer(&kernel_tic);
- setNSepsilonBottom<<<dimGridFluid, dimBlockFluid>>>(
- dev_ns_epsilon,
- dev_ns_epsilon_new,
- epsilon_value);
+ findNSstressTensor<<<dimGridFluid, dimBlockFluid>>>(
+ dev_ns_v,
+ dev_ns_tau);
cudaThreadSynchronize();
if (PROFILING == 1)
stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
- &t_setNSdirichlet);
- checkForCudaErrorsIter("Post setNSepsilonBottom", iter);
+ &t_findNSstressTensor);
+ checkForCudaErrorsIter("Post findNSstressTensor", iter);
- if (report_even_more_epsilon == 1) {
- std::cout
- << "\n###### EPSILON AFTER setNSepsilonBottom ######"
- << std::endl;
- transferNSepsilonFromGlobalDeviceMemory();
- printNSarray(stdout, ns.epsilon, "epsilon");
- }
-
- /*setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
- dev_ns_epsilon);
- cudaThreadSynchronize();
- checkForCudaErrors("Post setNSghostNodesFloat(dev_ns_epsilon)",
- iter);*/
- setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
- dev_ns_epsilon,
- ns.bc_bot, ns.bc_top);
+ // Set stress tensor values in the ghost nodes
+ setNSghostNodes_tau<<<dimGridFluid, dimBlockFluid>>>(
+ dev_ns_tau,
+ ns.bc_bot,
+ ns.bc_top);
cudaThreadSynchronize();
- checkForCudaErrorsIter("Post setNSghostNodesEpsilon(1)",
- iter);
+ checkForCudaErrorsIter("Post setNSghostNodes_tau", iter);
- if (report_even_more_epsilon == 1) {
- std::cout <<
- "\n###### EPSILON AFTER setNSghostNodes(epsilon) ######"
- << std::endl;
- transferNSepsilonFromGlobalDeviceMemory();
- printNSarray(stdout, ns.epsilon, "epsilon");
- }
- }
-
- // Solve the system of epsilon using a Jacobi iterative solver.
- // The average normalized residual is initialized to a large value.
- //double avg_norm_res;
- double max_norm_res;
-
- // Write a log file of the normalized residuals during the Jacobi
- // iterations
- std::ofstream reslog;
- if (write_res_log == 1)
- reslog.open("max_res_norm.dat");
-
- // transfer normalized residuals from GPU to CPU
- if (report_epsilon == 1) {
- std::cout << "\n###### BEFORE FIRST JACOBI ITERATION ######"
- << "\n@@@@@@ TIME STEP " << iter << " @@@@@@"
- << std::endl;
- transferNSepsilonFromGlobalDeviceMemory();
- printNSarray(stdout, ns.epsilon, "epsilon");
- }
-
- for (unsigned int nijac = 0; nijac<ns.maxiter; ++nijac) {
-
- // Only grad(epsilon) changes during the Jacobi iterations. The
- // remaining terms of the forcing function are only calculated
- // during the first iteration.
+ // Find the divergence of phi*vi*v, needed for predicting the
+ // fluid velocities
if (PROFILING == 1)
startTimer(&kernel_tic);
- findNSforcing<<<dimGridFluid, dimBlockFluid>>>(
- dev_ns_epsilon,
- dev_ns_f1,
- dev_ns_f2,
- dev_ns_f,
+ findNSdivphiviv<<<dimGridFluid, dimBlockFluid>>>(
dev_ns_phi,
- dev_ns_dphi,
- dev_ns_v_p,
- nijac);
+ dev_ns_v,
+ dev_ns_div_phi_vi_v);
cudaThreadSynchronize();
if (PROFILING == 1)
stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
- &t_findNSforcing);
- checkForCudaErrorsIter("Post findNSforcing", iter);
- /*setNSghostNodesForcing<<<dimGridFluid, dimBlockFluid>>>(
- dev_ns_f1,
- dev_ns_f2,
- dev_ns_f,
- nijac);
- cudaThreadSynchronize();
- checkForCudaErrors("Post setNSghostNodesForcing", iter);*/
-
- setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
- dev_ns_epsilon,
- ns.bc_bot, ns.bc_top);
- cudaThreadSynchronize();
- checkForCudaErrorsIter("Post setNSghostNodesEpsilon(2)",
- iter);
-
- if (report_epsilon == 1) {
- std::cout << "\n###### JACOBI ITERATION "
- << nijac << " after setNSghostNodes(epsilon,2) ######"
- << std::endl;
- transferNSepsilonFromGlobalDeviceMemory();
- printNSarray(stdout, ns.epsilon, "epsilon");
- }
-
- /*smoothing<Float><<<dimGridFluid, dimBlockFluid>>>(
- dev_ns_epsilon,
- ns.bc_bot, ns.bc_top);
- cudaThreadSynchronize();
- checkForCudaErrorsIter("Post smoothing", iter);
-
- if (report_epsilon == 1) {
- std::cout << "\n###### JACOBI ITERATION "
- << nijac << " after smoothing(epsilon) ######"
- << std::endl;
- transferNSepsilonFromGlobalDeviceMemory();
- printNSarray(stdout, ns.epsilon, "epsilon");
- }
-
- setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
- dev_ns_epsilon,
- ns.bc_bot, ns.bc_top);
- cudaThreadSynchronize();
- checkForCudaErrorsIter("Post setNSghostNodesEpsilon(3)",
- iter);
- */
-
- /*if (report_epsilon == 1) {
- std::cout << "\n###### JACOBI ITERATION "
- << nijac
- << " after setNSghostNodesEpsilon(epsilon,3) ######"
- << std::endl;
- transferNSepsilonFromGlobalDeviceMemory();
- printNSarray(stdout, ns.epsilon, "epsilon");
- }*/
+ &t_findNSdivphiviv);
+ checkForCudaErrorsIter("Post findNSdivphiviv", iter);
- // Store old values
- /*copyValues<Float><<<dimGridFluid, dimBlockFluid>>>(
- dev_ns_epsilon,
- dev_ns_epsilon_old);
+ // Find the divergence of phi*tau, needed for predicting the
+ // fluid velocities
+ if (PROFILING == 1)
+ startTimer(&kernel_tic);
+ findNSdivphitau<<<dimGridFluid, dimBlockFluid>>>(
+ dev_ns_phi,
+ dev_ns_tau,
+ dev_ns_div_phi_tau);
cudaThreadSynchronize();
- checkForCudaErrorsIter("Post copyValues (epsilon->epsilon_old)",
- iter);*/
+ if (PROFILING == 1)
+ stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
+ &t_findNSdivphitau);
+ checkForCudaErrorsIter("Post findNSdivphitau", iter);
- // Perform a single Jacobi iteration
+ // Predict the fluid velocities on the base of the old pressure
+ // field and ignoring the incompressibility constraint
if (PROFILING == 1)
startTimer(&kernel_tic);
- jacobiIterationNS<<<dimGridFluid, dimBlockFluid>>>(
- dev_ns_epsilon,
- dev_ns_epsilon_new,
- dev_ns_norm,
- dev_ns_f,
+ findPredNSvelocities<<<dimGridFluid, dimBlockFluid>>>(
+ dev_ns_p,
+ dev_ns_v,
+ dev_ns_phi,
+ dev_ns_dphi,
+ dev_ns_div_phi_vi_v,
+ dev_ns_div_phi_tau,
ns.bc_bot,
ns.bc_top,
- ns.theta);
+ ns.beta,
+ dev_ns_fi,
+ ns.ndem,
+ dev_ns_v_p);
cudaThreadSynchronize();
if (PROFILING == 1)
stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
- &t_jacobiIterationNS);
- checkForCudaErrorsIter("Post jacobiIterationNS", iter);
-
- // Flip flop: swap new and current array pointers
- /*Float* tmp = dev_ns_epsilon;
- dev_ns_epsilon = dev_ns_epsilon_new;
- dev_ns_epsilon_new = tmp;*/
+ &t_findPredNSvelocities);
+ checkForCudaErrorsIter("Post findPredNSvelocities", iter);
- // Copy new values to current values
- copyValues<Float><<<dimGridFluid, dimBlockFluid>>>(
- dev_ns_epsilon_new,
- dev_ns_epsilon);
+ setNSghostNodes<Float3><<<dimGridFluid, dimBlockFluid>>>(
+ dev_ns_v_p);
cudaThreadSynchronize();
- checkForCudaErrorsIter("Post copyValues (epsilon_new->epsilon)",
+ checkForCudaErrorsIter("Post setNSghostNodesFloat3(dev_ns_v_p)",
iter);
- /*findNormalizedResiduals<<<dimGridFluid, dimBlockFluid>>>(
- dev_ns_epsilon_old,
- dev_ns_epsilon,
- dev_ns_norm,
- ns.bc_bot, ns.bc_top);
- cudaThreadSynchronize();
- checkForCudaErrorsIter("Post findNormalizedResiduals",
- iter);*/
+ // In the first iteration of the sphere program, we'll need to
+ // manually estimate the values of epsilon. In the subsequent
+ // iterations, the previous values are used.
+ if (iter == 0) {
+
+ // Define the first estimate of the values of epsilon.
+ // The initial guess depends on the value of ns.beta.
+ Float pressure = ns.p[idx(2,2,2)];
+ Float pressure_new = pressure; // Guess p_current = p_new
+ Float epsilon_value = pressure_new - ns.beta*pressure;
+ if (PROFILING == 1)
+ startTimer(&kernel_tic);
+ setNSepsilonInterior<<<dimGridFluid, dimBlockFluid>>>(
+ dev_ns_epsilon, epsilon_value);
+ cudaThreadSynchronize();
+
+ setNSnormZero<<<dimGridFluid, dimBlockFluid>>>(dev_ns_norm);
+ cudaThreadSynchronize();
+
+ if (PROFILING == 1)
+ stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
+ &t_setNSepsilon);
+ checkForCudaErrorsIter("Post setNSepsilonInterior", iter);
+
+ if (report_even_more_epsilon == 1) {
+ std::cout
+ << "\n###### EPSILON AFTER setNSepsilonInterior "
+ << "######" << std::endl;
+ transferNSepsilonFromGlobalDeviceMemory();
+ printNSarray(stdout, ns.epsilon, "epsilon");
+ }
+ // Set the epsilon values at the lower boundary
+ pressure = ns.p[idx(0,0,0)];
+ pressure_new = pressure; // Guess p_current = p_new
+ epsilon_value = pressure_new - ns.beta*pressure;
+ if (PROFILING == 1)
+ startTimer(&kernel_tic);
+ setNSepsilonBottom<<<dimGridFluid, dimBlockFluid>>>(
+ dev_ns_epsilon,
+ dev_ns_epsilon_new,
+ epsilon_value);
+ cudaThreadSynchronize();
+ if (PROFILING == 1)
+ stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
+ &t_setNSdirichlet);
+ checkForCudaErrorsIter("Post setNSepsilonBottom", iter);
+
+ if (report_even_more_epsilon == 1) {
+ std::cout
+ << "\n###### EPSILON AFTER setNSepsilonBottom "
+ << "######" << std::endl;
+ transferNSepsilonFromGlobalDeviceMemory();
+ printNSarray(stdout, ns.epsilon, "epsilon");
+ }
+
+ /*setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
+ dev_ns_epsilon);
+ cudaThreadSynchronize();
+ checkForCudaErrors("Post setNSghostNodesFloat(dev_ns_epsilon)",
+ iter);*/
+ setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
+ dev_ns_epsilon,
+ ns.bc_bot, ns.bc_top);
+ cudaThreadSynchronize();
+ checkForCudaErrorsIter("Post setNSghostNodesEpsilon(1)",
+ iter);
+
+ if (report_even_more_epsilon == 1) {
+ std::cout <<
+ "\n###### EPSILON AFTER setNSghostNodes(epsilon) "
+ << "######" << std::endl;
+ transferNSepsilonFromGlobalDeviceMemory();
+ printNSarray(stdout, ns.epsilon, "epsilon");
+ }
+ }
+
+ // Solve the system of epsilon using a Jacobi iterative solver.
+ // The average normalized residual is initialized to a large
+ // value.
+ //double avg_norm_res;
+ double max_norm_res;
+
+ // Write a log file of the normalized residuals during the Jacobi
+ // iterations
+ std::ofstream reslog;
+ if (write_res_log == 1)
+ reslog.open("max_res_norm.dat");
+
+ // transfer normalized residuals from GPU to CPU
if (report_epsilon == 1) {
- std::cout << "\n###### JACOBI ITERATION "
- << nijac << " after jacobiIterationNS ######"
+ std::cout << "\n###### BEFORE FIRST JACOBI ITERATION ######"
+ << "\n@@@@@@ TIME STEP " << iter << " @@@@@@"
<< std::endl;
transferNSepsilonFromGlobalDeviceMemory();
printNSarray(stdout, ns.epsilon, "epsilon");
}
- if (nijac % nijacnorm == 0) {
-
- // Read the normalized residuals from the device
- transferNSnormFromGlobalDeviceMemory();
-
- // Write the normalized residuals to the terminal
- //printNSarray(stdout, ns.norm, "norm");
-
- // Find the maximum value of the normalized residuals
- max_norm_res = maxNormResNS();
-
- // Write the Jacobi iteration number and maximum value of
- // the normalized residual to the log file
- if (write_res_log == 1)
- reslog << nijac << '\t' << max_norm_res << std::endl;
- }
+ for (unsigned int nijac = 0; nijac<ns.maxiter; ++nijac) {
+
+ // Only grad(epsilon) changes during the Jacobi iterations.
+ // The remaining terms of the forcing function are only
+ // calculated during the first iteration.
+ if (PROFILING == 1)
+ startTimer(&kernel_tic);
+ findNSforcing<<<dimGridFluid, dimBlockFluid>>>(
+ dev_ns_epsilon,
+ dev_ns_f1,
+ dev_ns_f2,
+ dev_ns_f,
+ dev_ns_phi,
+ dev_ns_dphi,
+ dev_ns_v_p,
+ nijac,
+ ns.ndem);
+ cudaThreadSynchronize();
+ if (PROFILING == 1)
+ stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
+ &t_findNSforcing);
+ checkForCudaErrorsIter("Post findNSforcing", iter);
+ /*setNSghostNodesForcing<<<dimGridFluid, dimBlockFluid>>>(
+ dev_ns_f1,
+ dev_ns_f2,
+ dev_ns_f,
+ nijac);
+ cudaThreadSynchronize();
+ checkForCudaErrors("Post setNSghostNodesForcing", iter);*/
+
+ setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
+ dev_ns_epsilon,
+ ns.bc_bot, ns.bc_top);
+ cudaThreadSynchronize();
+ checkForCudaErrorsIter("Post setNSghostNodesEpsilon(2)",
+ iter);
- if (max_norm_res < ns.tolerance) {
-
- if (write_conv_log == 1 && iter % conv_log_interval == 0)
- convlog << iter << '\t' << nijac << std::endl;
-
- // Apply smoothing if requested
- if (ns.gamma > 0.0) {
- setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
- dev_ns_epsilon,
- ns.bc_bot, ns.bc_top);
- cudaThreadSynchronize();
- checkForCudaErrorsIter("Post setNSghostNodesEpsilon(4)",
- iter);
-
- smoothing<<<dimGridFluid, dimBlockFluid>>>(
- dev_ns_epsilon,
- ns.gamma,
- ns.bc_bot, ns.bc_top);
- cudaThreadSynchronize();
- checkForCudaErrorsIter("Post smoothing", iter);
-
- setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
- dev_ns_epsilon,
- ns.bc_bot, ns.bc_top);
- cudaThreadSynchronize();
- checkForCudaErrorsIter("Post setNSghostNodesEpsilon(4)",
- iter);
+ if (report_epsilon == 1) {
+ std::cout << "\n###### JACOBI ITERATION "
+ << nijac
+ << " after setNSghostNodes(epsilon,2) ######"
+ << std::endl;
+ transferNSepsilonFromGlobalDeviceMemory();
+ printNSarray(stdout, ns.epsilon, "epsilon");
}
+ /*smoothing<Float><<<dimGridFluid, dimBlockFluid>>>(
+ dev_ns_epsilon,
+ ns.bc_bot, ns.bc_top);
+ cudaThreadSynchronize();
+ checkForCudaErrorsIter("Post smoothing", iter);
+
+ if (report_epsilon == 1) {
+ std::cout << "\n###### JACOBI ITERATION "
+ << nijac << " after smoothing(epsilon) ######"
+ << std::endl;
+ transferNSepsilonFromGlobalDeviceMemory();
+ printNSarray(stdout, ns.epsilon, "epsilon");
+ }
+
+ setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
+ dev_ns_epsilon,
+ ns.bc_bot, ns.bc_top);
+ cudaThreadSynchronize();
+ checkForCudaErrorsIter("Post setNSghostNodesEpsilon(3)",
+ iter);
+ */
+
+ /*if (report_epsilon == 1) {
+ std::cout << "\n###### JACOBI ITERATION "
+ << nijac
+ << " after setNSghostNodesEpsilon(epsilon,3) ######"
+ << std::endl;
+ transferNSepsilonFromGlobalDeviceMemory();
+ printNSarray(stdout, ns.epsilon, "epsilon");
+ }*/
+
+ // Store old values
+ /*copyValues<Float><<<dimGridFluid, dimBlockFluid>>>(
+ dev_ns_epsilon,
+ dev_ns_epsilon_old);
+ cudaThreadSynchronize();
+ checkForCudaErrorsIter
+ ("Post copyValues (epsilon->epsilon_old)", iter);*/
+
+ // Perform a single Jacobi iteration
+ if (PROFILING == 1)
+ startTimer(&kernel_tic);
+ jacobiIterationNS<<<dimGridFluid, dimBlockFluid>>>(
+ dev_ns_epsilon,
+ dev_ns_epsilon_new,
+ dev_ns_norm,
+ dev_ns_f,
+ ns.bc_bot,
+ ns.bc_top,
+ ns.theta);
+ cudaThreadSynchronize();
+ if (PROFILING == 1)
+ stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
+ &t_jacobiIterationNS);
+ checkForCudaErrorsIter("Post jacobiIterationNS", iter);
+
+ // Flip flop: swap new and current array pointers
+ /*Float* tmp = dev_ns_epsilon;
+ dev_ns_epsilon = dev_ns_epsilon_new;
+ dev_ns_epsilon_new = tmp;*/
+
+ // Copy new values to current values
+ copyValues<Float><<<dimGridFluid, dimBlockFluid>>>(
+ dev_ns_epsilon_new,
+ dev_ns_epsilon);
+ cudaThreadSynchronize();
+ checkForCudaErrorsIter
+ ("Post copyValues (epsilon_new->epsilon)", iter);
+
+ /*findNormalizedResiduals<<<dimGridFluid, dimBlockFluid>>>(
+ dev_ns_epsilon_old,
+ dev_ns_epsilon,
+ dev_ns_norm,
+ ns.bc_bot, ns.bc_top);
+ cudaThreadSynchronize();
+ checkForCudaErrorsIter("Post findNormalizedResiduals",
+ iter);*/
+
if (report_epsilon == 1) {
std::cout << "\n###### JACOBI ITERATION "
- << nijac << " after smoothing ######"
+ << nijac << " after jacobiIterationNS ######"
<< std::endl;
transferNSepsilonFromGlobalDeviceMemory();
printNSarray(stdout, ns.epsilon, "epsilon");
}
- break; // solution has converged, exit Jacobi iter. loop
- }
+ if (nijac % nijacnorm == 0) {
- if (nijac >= ns.maxiter-1) {
+ // Read the normalized residuals from the device
+ transferNSnormFromGlobalDeviceMemory();
- if (write_conv_log == 1)
- convlog << iter << '\t' << nijac << std::endl;
+ // Write the normalized residuals to the terminal
+ //printNSarray(stdout, ns.norm, "norm");
- std::cerr << "\nIteration " << iter << ", time "
- << iter*time.dt << " s: "
- "Error, the epsilon solution in the fluid "
- "calculations did not converge. Try increasing the "
- "value of 'ns.maxiter' (" << ns.maxiter
- << ") or increase 'ns.tolerance' ("
- << ns.tolerance << ")." << std::endl;
- }
- //break; // end after Jacobi first iteration
- } // end Jacobi iteration loop
+ // Find the maximum value of the normalized residuals
+ max_norm_res = maxNormResNS();
- if (write_res_log == 1)
- reslog.close();
+ // Write the Jacobi iteration number and maximum value
+ // of the normalized residual to the log file
+ if (write_res_log == 1)
+ reslog << nijac << '\t' << max_norm_res << std::endl;
+ }
- // Find the new pressures and velocities
- if (PROFILING == 1)
- startTimer(&kernel_tic);
- updateNSvelocityPressure<<<dimGridFluid, dimBlockFluid>>>(
- dev_ns_p,
- dev_ns_v,
- dev_ns_v_p,
- dev_ns_phi,
- dev_ns_epsilon,
- ns.beta,
- ns.bc_bot,
- ns.bc_top);
- cudaThreadSynchronize();
- if (PROFILING == 1)
- stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
- &t_updateNSvelocityPressure);
- checkForCudaErrorsIter("Post updateNSvelocityPressure", iter);
- }
+ if (max_norm_res < ns.tolerance) {
+
+ if (write_conv_log == 1 && iter % conv_log_interval == 0)
+ convlog << iter << '\t' << nijac << std::endl;
+
+ // Apply smoothing if requested
+ if (ns.gamma > 0.0) {
+ setNSghostNodes<Float>
+ <<<dimGridFluid, dimBlockFluid>>>(
+ dev_ns_epsilon,
+ ns.bc_bot, ns.bc_top);
+ cudaThreadSynchronize();
+ checkForCudaErrorsIter
+ ("Post setNSghostNodesEpsilon(4)", iter);
+
+ smoothing<<<dimGridFluid, dimBlockFluid>>>(
+ dev_ns_epsilon,
+ ns.gamma,
+ ns.bc_bot, ns.bc_top);
+ cudaThreadSynchronize();
+ checkForCudaErrorsIter("Post smoothing", iter);
+
+ setNSghostNodes<Float>
+ <<<dimGridFluid, dimBlockFluid>>>(
+ dev_ns_epsilon,
+ ns.bc_bot, ns.bc_top);
+ cudaThreadSynchronize();
+ checkForCudaErrorsIter
+ ("Post setNSghostNodesEpsilon(4)", iter);
+ }
+
+ if (report_epsilon == 1) {
+ std::cout << "\n###### JACOBI ITERATION "
+ << nijac << " after smoothing ######"
+ << std::endl;
+ transferNSepsilonFromGlobalDeviceMemory();
+ printNSarray(stdout, ns.epsilon, "epsilon");
+ }
+
+ break; // solution has converged, exit Jacobi loop
+ }
- /*std::cout << "\n###### ITERATION "
- << iter << " ######" << std::endl;
- transferNSepsilonFromGlobalDeviceMemory();
- printNSarray(stdout, ns.epsilon, "epsilon");*/
- //transferNSepsilonNewFromGlobalDeviceMemory();
- //printNSarray(stdout, ns.epsilon_new, "epsilon_new");
+ if (nijac >= ns.maxiter-1) {
+
+ if (write_conv_log == 1)
+ convlog << iter << '\t' << nijac << std::endl;
+
+ std::cerr << "\nIteration " << iter << ", time "
+ << iter*time.dt << " s: "
+ "Error, the epsilon solution in the fluid "
+ "calculations did not converge. Try increasing the "
+ "value of 'ns.maxiter' (" << ns.maxiter
+ << ") or increase 'ns.tolerance' ("
+ << ns.tolerance << ")." << std::endl;
+ }
+ //break; // end after Jacobi first iteration
+ } // end Jacobi iteration loop
+
+ if (write_res_log == 1)
+ reslog.close();
+
+ // Find the new pressures and velocities
+ if (PROFILING == 1)
+ startTimer(&kernel_tic);
+ updateNSvelocityPressure<<<dimGridFluid, dimBlockFluid>>>(
+ dev_ns_p,
+ dev_ns_v,
+ dev_ns_v_p,
+ dev_ns_phi,
+ dev_ns_epsilon,
+ ns.beta,
+ ns.bc_bot,
+ ns.bc_top,
+ ns.ndem);
+ cudaThreadSynchronize();
+ if (PROFILING == 1)
+ stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
+ &t_updateNSvelocityPressure);
+ checkForCudaErrorsIter("Post updateNSvelocityPressure", iter);
+ }
+
+ /*std::cout << "\n###### ITERATION "
+ << iter << " ######" << std::endl;
+ transferNSepsilonFromGlobalDeviceMemory();
+ printNSarray(stdout, ns.epsilon, "epsilon");*/
+ //transferNSepsilonNewFromGlobalDeviceMemory();
+ //printNSarray(stdout, ns.epsilon_new, "epsilon_new");
+ }
if (np > 0) {
// Update particle kinematics
(DIR) diff --git a/src/navierstokes.cuh b/src/navierstokes.cuh
t@@ -1059,6 +1059,7 @@ __global__ void findPorositiesVelocitiesDiametersSphericalGradient(
Float3* dev_ns_vp_avg,
Float* dev_ns_d_avg,
const unsigned int iteration,
+ const unsigned int ndem,
const unsigned int np)
{
// 3D thread index
t@@ -1207,8 +1208,8 @@ __global__ void findPorositiesVelocitiesDiametersSphericalGradient(
dot_epsilon_ii.x + dot_epsilon_ii.y + dot_epsilon_ii.z;
const Float dphi =
- (1.0 - fmin(phi_0,0.99))*dot_epsilon_kk*devC_dt;
- phi = phi_0 + dphi/devC_dt;
+ (1.0 - fmin(phi_0,0.99))*dot_epsilon_kk*ndem*devC_dt;
+ phi = phi_0 + dphi/(ndem*devC_dt);
//if (dot_epsilon_kk != 0.0)
//printf("%d,%d,%d\tdot_epsilon_kk = %f\tdphi = %f\tphi = %f\n",
t@@ -1911,6 +1912,7 @@ __global__ void findPredNSvelocities(
int bc_top, // in
Float beta, // in
Float3* dev_ns_fi, // in
+ unsigned int ndem, // in
Float3* dev_ns_v_p) // out
{
// 3D thread index
t@@ -1959,18 +1961,18 @@ __global__ void findPredNSvelocities(
Float3 pressure_term = MAKE_FLOAT3(0.0, 0.0, 0.0);
if (beta > 0.0) {
grad_p = gradient(dev_ns_p, x, y, z, dx, dy, dz);
- pressure_term = -beta/devC_params.rho_f*grad_p*devC_dt/phi;
+ pressure_term = -beta/devC_params.rho_f*grad_p*ndem*devC_dt/phi;
}
// Calculate the predicted velocity
Float3 v_p = v
+ pressure_term
- + 1.0/devC_params.rho_f*div_phi_tau*devC_dt/phi
+ + 1.0/devC_params.rho_f*div_phi_tau*ndem*devC_dt/phi
+ MAKE_FLOAT3(devC_params.g[0], devC_params.g[1], devC_params.g[2])
- *devC_dt
- - devC_dt/(devC_params.rho_f*phi)*f_i
+ *ndem*devC_dt
+ - ndem*devC_dt/(devC_params.rho_f*phi)*f_i
- v*dphi/phi
- - div_phi_vi_v*devC_dt/phi // advection term
+ - div_phi_vi_v*ndem*devC_dt/phi // advection term
;
// Report velocity components to stdout for debugging
t@@ -2017,7 +2019,8 @@ __global__ void findNSforcing(
Float* dev_ns_phi,
Float* dev_ns_dphi,
Float3* dev_ns_v_p,
- unsigned int nijac)
+ unsigned int nijac,
+ unsigned int ndem)
{
// 3D thread index
const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
t@@ -2066,9 +2069,9 @@ __global__ void findNSforcing(
+ dot(grad_phi, v_p)*devC_params.rho_f/(devC_dt*phi)
+ dphi*devC_params.rho_f/(devC_dt*devC_dt*phi);
f2 = grad_phi/phi;*/
- f1 = div_v_p*devC_params.rho_f*phi/devC_dt
- + dot(grad_phi, v_p)*devC_params.rho_f/devC_dt
- + dphi*devC_params.rho_f/(devC_dt*devC_dt);
+ f1 = div_v_p*devC_params.rho_f*phi/(ndem*devC_dt)
+ + dot(grad_phi, v_p)*devC_params.rho_f/(ndem*devC_dt)
+ + dphi*devC_params.rho_f/(ndem*devC_dt*devC_dt);
f2 = grad_phi/phi;
// Report values terms in the forcing function for debugging
t@@ -2392,7 +2395,8 @@ __global__ void updateNSvelocityPressure(
Float* dev_ns_epsilon,
Float beta,
int bc_bot,
- int bc_top)
+ int bc_top,
+ unsigned int ndem)
{
// 3D thread index
const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
t@@ -2431,7 +2435,7 @@ __global__ void updateNSvelocityPressure(
// Find new velocity
//Float3 v = v_p - devC_dt/devC_params.rho_f*grad_epsilon;
- Float3 v = v_p - devC_dt/(devC_params.rho_f*phi)*grad_epsilon;
+ Float3 v = v_p - ndem*devC_dt/(devC_params.rho_f*phi)*grad_epsilon;
// Print values for debugging
/* if (z == 0) {