tfixed segfault tau due to missing allocation of dev_ns_v_p - sphere - GPU-based 3D discrete element method algorithm with optional fluid coupling
(HTM) git clone git://src.adamsgaard.dk/sphere
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) LICENSE
---
(DIR) commit 3ff1e9afe86667d4638dfd5b9776bb7d821c1b72
(DIR) parent 3470d966f37438cb7ae3861d7b062c92d314dc0b
(HTM) Author: Anders Damsgaard <anders.damsgaard@geo.au.dk>
Date: Thu, 5 Jun 2014 08:39:21 +0200
fixed segfault tau due to missing allocation of dev_ns_v_p
Diffstat:
M src/navierstokes.cuh | 25 ++++++++++++++++++-------
1 file changed, 18 insertions(+), 7 deletions(-)
---
(DIR) diff --git a/src/navierstokes.cuh b/src/navierstokes.cuh
t@@ -69,7 +69,7 @@ void DEM::initNSmemDev(void)
cudaMalloc((void**)&dev_ns_v_x, memSizeFvel);// velocity in stag. grid
cudaMalloc((void**)&dev_ns_v_y, memSizeFvel);// velocity in stag. grid
cudaMalloc((void**)&dev_ns_v_z, memSizeFvel);// velocity in stag. grid
- //cudaMalloc((void**)&dev_ns_v_p, memSizeF*3); // predicted cell velocity
+ cudaMalloc((void**)&dev_ns_v_p, memSizeF*3); // predicted cell velocity
cudaMalloc((void**)&dev_ns_v_p_x, memSizeFvel); // pred. vel. in stag. grid
cudaMalloc((void**)&dev_ns_v_p_y, memSizeFvel); // pred. vel. in stag. grid
cudaMalloc((void**)&dev_ns_v_p_z, memSizeFvel); // pred. vel. in stag. grid
t@@ -106,7 +106,7 @@ void DEM::freeNSmemDev()
cudaFree(dev_ns_v_x);
cudaFree(dev_ns_v_y);
cudaFree(dev_ns_v_z);
- //cudaFree(dev_ns_v_p);
+ cudaFree(dev_ns_v_p);
cudaFree(dev_ns_v_p_x);
cudaFree(dev_ns_v_p_y);
cudaFree(dev_ns_v_p_z);
t@@ -3017,11 +3017,14 @@ __global__ void applyInteractionForceToFluid(
}
}
+ const Float3 F_pf = fi/(dx*dy*dz);
+
__syncthreads();
#ifdef CHECK_NS_FINITE
checkFiniteFloat3("fi", x, y, z, fi/(dx*dy*dz));
#endif
- dev_ns_fi[cellidx] = fi/(dx*dy*dz);
+ //printf("F_pf [%d,%d,%d] = %f,%f,%f\n", x,y,z, F_pf.x, F_pf.y, F_pf.z);
+ dev_ns_fi[cellidx] = F_pf;
}
}
t@@ -3076,8 +3079,14 @@ __global__ void interpolateFaceToCenter(
const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
+ // Grid dimensions
+ const unsigned int nx = devC_grid.num[0];
+ const unsigned int ny = devC_grid.num[1];
+ const unsigned int nz = devC_grid.num[2];
+
// Check that we are not outside the fluid grid
- if (x < devC_grid.num[0] && y < devC_grid.num[1] && z < devC_grid.num[2]) {
+ //if (x < devC_grid.num[0] && y < devC_grid.num[1] && z < devC_grid.num[2]) {
+ if (x < nx && y < ny && z < nz) {
__syncthreads();
const Float x_n = dev_in_x[vidx(x,y,z)];
t@@ -3093,13 +3102,15 @@ __global__ void interpolateFaceToCenter(
(z_n + z_p)/2.0);
__syncthreads();
- //printf("[%d,%d,%d] = %f\n", x,y,z, val);
+ //printf("[%d,%d,%d] = %f, %f, %f\n", x,y,z, val.x, val.y, val.z);
dev_out[idx(x,y,z)] = val;
}
}
// Launch per cell face node. Set velocity ghost nodes beforehand.
// Find div(tau) at all cell faces.
+// Warning: The grid-corner values will be invalid, along with the non-normal
+// components of the ghost nodes
__global__ void findFaceDivTau(
Float* dev_ns_v_x,
Float* dev_ns_v_y,
t@@ -3171,8 +3182,8 @@ __global__ void findFaceDivTau(
(v_z_zp - 2.0*v_z + v_z_zn)/(dz*dz));
__syncthreads();
- printf("div_tau [%d,%d,%d] = %f, %f, %f\n", x,y,z,
- div_tau_x, div_tau_y, div_tau_z);
+ //printf("div_tau [%d,%d,%d] = %f, %f, %f\n", x,y,z,
+ //div_tau_x, div_tau_y, div_tau_z);
dev_ns_div_tau_x[faceidx] = div_tau_x;
dev_ns_div_tau_y[faceidx] = div_tau_y;
dev_ns_div_tau_z[faceidx] = div_tau_z;