tdevice.cu: estimate number of cudaCoresPerSM for CC version 8 - sphere - GPU-based 3D discrete element method algorithm with optional fluid coupling
(HTM) git clone git://src.adamsgaard.dk/sphere
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) LICENSE
---
(DIR) commit 04f7aa6ff1ec5eb78167e8b40fb399ed3f63a093
(DIR) parent 401f418676345ca4b80e2725106c40fc37f28e31
(HTM) Author: Anders Damsgaard <anders@adamsgaard.dk>
Date: Thu, 9 Feb 2023 12:43:46 +0100
device.cu: estimate number of cudaCoresPerSM for CC version 8
Diffstat:
M src/device.cu | 104 ++++++++++++++++---------------
1 file changed, 53 insertions(+), 51 deletions(-)
---
(DIR) diff --git a/src/device.cu b/src/device.cu
t@@ -54,6 +54,8 @@ int cudaCoresPerSM(int major, int minor)
return 128;
else if (major == 7)
return 32;
+ else if (major == 8)
+ return 64;
else
printf("Error in cudaCoresPerSM Device compute capability value "
"(%d.%d) not recognized.", major, minor);
t@@ -112,11 +114,11 @@ void DEM::initializeGPU(void)
if (verbose == 1) {
cout << " CUDA device ID: " << d << "\n";
- cout << " - Name: " << prop.name << ", compute capability: "
+ cout << " - Name: " << prop.name << ", compute capability: "
<< prop.major << "." << prop.minor << ".\n";
- cout << " - CUDA Driver version: " << cudaDriverVersion/1000
- << "." << cudaDriverVersion%100
- << ", runtime version " << cudaRuntimeVersion/1000 << "."
+ cout << " - CUDA Driver version: " << cudaDriverVersion/1000
+ << "." << cudaDriverVersion%100
+ << ", runtime version " << cudaRuntimeVersion/1000 << "."
<< cudaRuntimeVersion%100 << std::endl;
}
}
t@@ -138,11 +140,11 @@ void DEM::initializeGPU(void)
if (verbose == 1) {
cout << " CUDA device ID: " << device << "\n";
- cout << " - Name: " << prop.name << ", compute capability: "
+ cout << " - Name: " << prop.name << ", compute capability: "
<< prop.major << "." << prop.minor << ".\n";
- cout << " - CUDA Driver version: " << cudaDriverVersion/1000
- << "." << cudaDriverVersion%100
- << ", runtime version " << cudaRuntimeVersion/1000 << "."
+ cout << " - CUDA Driver version: " << cudaDriverVersion/1000
+ << "." << cudaDriverVersion%100
+ << ", runtime version " << cudaRuntimeVersion/1000 << "."
<< cudaRuntimeVersion%100
<< "\n - " << ncudacores << " CUDA cores" << std::endl;
}
t@@ -284,7 +286,7 @@ __global__ void checkParticlePositions(
// Copy the constant data components to device memory,
-// and check whether the values correspond to the
+// and check whether the values correspond to the
// values in constant memory.
void DEM::checkConstantMemory()
{
t@@ -386,7 +388,7 @@ void DEM::updateGridSize()
grid.L[2] = *Lz;
// Write value to devC_grid.L[2]
- //cudaMemcpyToSymbol(devC_grid.L[2], &Lz, sizeof(Float));
+ //cudaMemcpyToSymbol(devC_grid.L[2], &Lz, sizeof(Float));
cudaMemcpyToSymbol(devC_grid, &grid, sizeof(Grid));
checkForCudaErrors("DEM::updateGridSize(): write to devC_grid.L[2]");
t@@ -857,15 +859,15 @@ void DEM::startTime()
tic = clock();
//// GPU workload configuration
- unsigned int threadsPerBlock = 256;
- //unsigned int threadsPerBlock = 512;
+ unsigned int threadsPerBlock = 256;
+ //unsigned int threadsPerBlock = 512;
// Create enough blocks to accomodate the particles
- unsigned int blocksPerGrid = iDivUp(np, threadsPerBlock);
+ unsigned int blocksPerGrid = iDivUp(np, threadsPerBlock);
dim3 dimGrid(blocksPerGrid, 1, 1); // Blocks arranged in 1D grid
dim3 dimBlock(threadsPerBlock, 1, 1); // Threads arranged in 1D block
- unsigned int blocksPerGridBonds = iDivUp(params.nb0, threadsPerBlock);
+ unsigned int blocksPerGridBonds = iDivUp(params.nb0, threadsPerBlock);
dim3 dimGridBonds(blocksPerGridBonds, 1, 1); // Blocks arranged in 1D grid
// Use 3D block and grid layout for cell-centered fluid calculations
t@@ -930,9 +932,9 @@ void DEM::startTime()
//sprintf(file,"output/%s.status.dat", sid);
outfile = "output/" + sid + ".status.dat";
fp = fopen(outfile.c_str(), "w");
- fprintf(fp,"%2.4e %2.4e %d\n",
- time.current,
- 100.0*time.current/time.total,
+ fprintf(fp,"%2.4e %2.4e %d\n",
+ time.current,
+ 100.0*time.current/time.total,
time.step_count);
fclose(fp);
t@@ -1052,19 +1054,19 @@ void DEM::startTime()
checkForCudaErrorsIter("Post checkParticlePositions", iter);
#endif
- // If the grid is adaptive, readjust the grid height to equal the
+ // If the grid is adaptive, readjust the grid height to equal the
// positions of the dynamic walls
if (grid.adaptive == 1 && walls.nw > 0) {
updateGridSize();
}
- // For each particle:
- // Compute hash key (cell index) from position
+ // For each particle:
+ // Compute hash key (cell index) from position
// in the fine, uniform and homogenous grid.
if (PROFILING == 1)
startTimer(&kernel_tic);
calcParticleCellID<<<dimGrid, dimBlock>>>(dev_gridParticleCellID,
- dev_gridParticleIndex,
+ dev_gridParticleIndex,
dev_x);
// Synchronization point
t@@ -1093,7 +1095,7 @@ void DEM::startTime()
// Zero cell array values by setting cellStart to its highest
// possible value, specified with pointer value 0xffffffff, which
// for a 32 bit unsigned int is 4294967295.
- cudaMemset(dev_cellStart, 0xffffffff,
+ cudaMemset(dev_cellStart, 0xffffffff,
grid.num[0]*grid.num[1]*grid.num[2]*sizeof(unsigned int));
cudaThreadSynchronize();
checkForCudaErrorsIter("Post cudaMemset", iter);
t@@ -1103,14 +1105,14 @@ void DEM::startTime()
// configurations in new arrays (*_sorted).
if (PROFILING == 1)
startTimer(&kernel_tic);
- reorderArrays<<<dimGrid, dimBlock, smemSize>>>(dev_cellStart,
+ reorderArrays<<<dimGrid, dimBlock, smemSize>>>(dev_cellStart,
dev_cellEnd,
- dev_gridParticleCellID,
+ dev_gridParticleCellID,
dev_gridParticleIndex,
- dev_x, dev_vel,
+ dev_x, dev_vel,
dev_angvel,
- dev_x_sorted,
- dev_vel_sorted,
+ dev_x_sorted,
+ dev_vel_sorted,
dev_angvel_sorted);
// Synchronization point
t@@ -1127,10 +1129,10 @@ void DEM::startTime()
// For each particle: Search contacts in neighbor cells
if (PROFILING == 1)
startTimer(&kernel_tic);
- topology<<<dimGrid, dimBlock>>>(dev_cellStart,
+ topology<<<dimGrid, dimBlock>>>(dev_cellStart,
dev_cellEnd,
dev_gridParticleIndex,
- dev_x_sorted,
+ dev_x_sorted,
dev_contacts,
dev_distmod);
t@@ -1159,10 +1161,10 @@ void DEM::startTime()
dev_angvel_sorted,
dev_vel,
dev_angvel,
- dev_force,
- dev_torque,
+ dev_force,
+ dev_torque,
dev_es_dot,
- dev_ev_dot,
+ dev_ev_dot,
dev_es,
dev_ev,
dev_p,
t@@ -1828,7 +1830,7 @@ void DEM::startTime()
if (write_conv_log == 1)
convlog << iter+1 << '\t' << nijac << std::endl;
- std::cerr << "\nIteration " << iter << ", time "
+ std::cerr << "\nIteration " << iter << ", time "
<< iter*time.dt << " s: "
"Error, the epsilon solution in the fluid "
"calculations did not converge. Try increasing "
t@@ -1898,7 +1900,7 @@ void DEM::startTime()
} // end cfd_solver == 0
// Darcy solution
- else if (cfd_solver == 1) {
+ else if (cfd_solver == 1) {
#if defined(REPORT_EPSILON) || defined(REPORT_FORCING_TERMS)
std::cout << "\n\n@@@@@@ TIME STEP " << iter << " @@@"
t@@ -2027,7 +2029,7 @@ void DEM::startTime()
// copy porosities to the frictionless Y boundaries
if (grid.periodic == 2) {
- copyDarcyPorositiesToEdges<<<dimGridFluid,
+ copyDarcyPorositiesToEdges<<<dimGridFluid,
dimBlockFluid>>>(
dev_darcy_phi,
dev_darcy_dphi,
t@@ -2038,7 +2040,7 @@ void DEM::startTime()
// copy porosities to the frictionless lower Z boundary
if (grid.periodic == 2) {
- copyDarcyPorositiesToBottom<<<dimGridFluid,
+ copyDarcyPorositiesToBottom<<<dimGridFluid,
dimBlockFluid>>>(
dev_darcy_phi,
dev_darcy_dphi,
t@@ -2369,7 +2371,7 @@ void DEM::startTime()
if (write_conv_log == 1)
convlog << iter+1 << '\t' << nijac << std::endl;
- std::cerr << "\nIteration " << iter << ", time "
+ std::cerr << "\nIteration " << iter << ", time "
<< iter*time.dt << " s: "
"Error, the pressure solution in the fluid "
"calculations did not converge. Try increasing "
t@@ -2471,14 +2473,14 @@ void DEM::startTime()
// Update particle kinematics
if (PROFILING == 1)
startTimer(&kernel_tic);
- integrate<<<dimGrid, dimBlock>>>(dev_x_sorted,
- dev_vel_sorted,
+ integrate<<<dimGrid, dimBlock>>>(dev_x_sorted,
+ dev_vel_sorted,
dev_angvel_sorted,
- dev_x,
- dev_vel,
+ dev_x,
+ dev_vel,
dev_angvel,
dev_force,
- dev_torque,
+ dev_torque,
dev_angpos,
dev_acc,
dev_angacc,
t@@ -2571,7 +2573,7 @@ void DEM::startTime()
}
- // Produce output binary if the time interval
+ // Produce output binary if the time interval
// between output files has been reached
if (filetimeclock >= time.file_dt) {
t@@ -2646,19 +2648,19 @@ void DEM::startTime()
cout << "\n## Particle " << n << " ##\n";
cout << "- contacts:\n";
- for (int nc = 0; nc < NC; ++nc)
+ for (int nc = 0; nc < NC; ++nc)
cout << "[" << nc << "]=" << k.contacts[nc+NC*n] <<
'\n';
cout << "\n- delta_t:\n";
- for (int nc = 0; nc < NC; ++nc)
+ for (int nc = 0; nc < NC; ++nc)
cout << k.delta_t[nc+NC*n].x << '\t'
<< k.delta_t[nc+NC*n].y << '\t'
<< k.delta_t[nc+NC*n].z << '\t'
<< k.delta_t[nc+NC*n].w << '\n';
cout << "\n- distmod:\n";
- for (int nc = 0; nc < NC; ++nc)
+ for (int nc = 0; nc < NC; ++nc)
cout << k.distmod[nc+NC*n].x << '\t'
<< k.distmod[nc+NC*n].y << '\t'
<< k.distmod[nc+NC*n].z << '\t'
t@@ -2667,11 +2669,11 @@ void DEM::startTime()
cout << '\n';
}
- // Update status.dat at the interval of filetime
+ // Update status.dat at the interval of filetime
outfile = "output/" + sid + ".status.dat";
fp = fopen(outfile.c_str(), "w");
- fprintf(fp,"%2.4e %2.4e %d\n",
- time.current,
+ fprintf(fp,"%2.4e %2.4e %d\n",
+ time.current,
100.0*time.current/time.total,
time.step_count);
fclose(fp);
t@@ -2697,7 +2699,7 @@ void DEM::startTime()
if (verbose == 1) {
cout << "\nSimulation ended. Statistics:\n"
- << " - Last output file number: "
+ << " - Last output file number: "
<< time.step_count << "\n"
<< " - GPU time spent: "
<< dev_time_spent/1000.0f << " s\n"
t@@ -2705,7 +2707,7 @@ void DEM::startTime()
<< time_spent << " s\n"
<< " - Mean duration of iteration:\n"
<< " " << dev_time_spent/((double)iter*1000.0f) << " s"
- << std::endl;
+ << std::endl;
}
cudaEventDestroy(dev_tic);