tfunction overloading doesnt seem to work with device kernels - sphere - GPU-based 3D discrete element method algorithm with optional fluid coupling
(HTM) git clone git://src.adamsgaard.dk/sphere
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) LICENSE
---
(DIR) commit 47aa1cb121030dda5698b6c732d38e8cd8f718a9
(DIR) parent f831d67e3a3dcd24b6a27d638279b7d6e36a10a7
(HTM) Author: Anders Damsgaard <anders.damsgaard@geo.au.dk>
Date: Thu, 3 Apr 2014 13:05:37 +0200
function overloading doesnt seem to work with device kernels
Diffstat:
M src/navierstokes.cuh | 99 +++++++++++++++++--------------
1 file changed, 55 insertions(+), 44 deletions(-)
---
(DIR) diff --git a/src/navierstokes.cuh b/src/navierstokes.cuh
t@@ -23,30 +23,36 @@ __inline__ __device__ Float hmean(Float a, Float b) {
return (2.0*a*b)/(a+b);
}
-// Overloaded helper function for checking whether a value is NaN or Inf
-__device__ void checkFinite(
+// Helper functions for checking whether a value is NaN or Inf
+__device__ int checkFiniteFloat(
const char* desc,
const unsigned int x,
const unsigned int y,
const unsigned int z,
- const Float3 v)
+ const Float s)
{
__syncthreads();
- if (!isfinite(v.x) || !isfinite(v.y) || !isfinite(v.z))
- printf("\n[%d,%d,%d]: Error: %s = %f, %f, %f\n",
- x, y, z, desc, v.x, v.y, v.z);
+ if (!isfinite(s)) {
+ printf("\n[%d,%d,%d]: Error: %s = %f, %f, %f\n", x, y, z, desc, s);
+ return 1;
+ }
+ return 0;
}
-__device__ void checkFinite(
+__device__ int checkFiniteFloat3(
const char* desc,
const unsigned int x,
const unsigned int y,
const unsigned int z,
- const Float s)
+ const Float3 v)
{
__syncthreads();
- if (!isfinite(s))
- printf("\n[%d,%d,%d]: Error: %s = %f, %f, %f\n", x, y, z, desc, s);
+ if (!isfinite(v.x) || !isfinite(v.y) || !isfinite(v.z)) {
+ printf("\n[%d,%d,%d]: Error: %s = %f, %f, %f\n",
+ x, y, z, desc, v.x, v.y, v.z);
+ return 1;
+ }
+ return 0;
}
// Initialize memory
t@@ -1006,10 +1012,10 @@ __global__ void findPorositiesVelocitiesDiametersSpherical(
dev_ns_d_avg[cellidx] = d_avg;
#ifdef CHECK_NS_FINITE
- checkFinite("phi", x, y, z, phi);
- checkFinite("dphi", x, y, z, dphi);
- checkFinite("v_avg", x, y, z, v_avg);
- checkFinite("d_avg", x, y, z, d_avg);
+ (void)checkFiniteFloat("phi", x, y, z, phi);
+ (void)checkFiniteFloat("dphi", x, y, z, dphi);
+ (void)checkFiniteFloat3("v_avg", x, y, z, v_avg);
+ (void)checkFiniteFloat("d_avg", x, y, z, d_avg);
#endif
} else {
t@@ -1058,8 +1064,8 @@ __global__ void setUpperPressureNS(
dev_ns_p[cellidx] = new_pressure;
#ifdef CHECK_NS_FINITE
- checkFinite("epsilon", x, y, z, epsilon);
- checkFinite("new_pressure", x, y, z, new_pressure);
+ (void)checkFiniteFloat("epsilon", x, y, z, epsilon);
+ (void)checkFiniteFloat("new_pressure", x, y, z, new_pressure);
#endif
}
}
t@@ -1216,7 +1222,7 @@ __global__ void findNSgradientsDev(
dev_vectorfield[cellidx] = grad;
#ifdef CHECK_NS_FINITE
- checkFinite("grad", x, y, z, grad);
+ (void)checkFiniteFloat3("grad", x, y, z, grad);
#endif
}
}
t@@ -1260,12 +1266,12 @@ __global__ void findvvOuterProdNS(
dev_ns_v_prod[cellidx6+5] = v.z*v.z;
#ifdef CHECK_NS_FINITE
- checkFinite("v_prod[0]", x, y, z, v.x*v.x);
- checkFinite("v_prod[1]", x, y, z, v.x*v.y);
- checkFinite("v_prod[2]", x, y, z, v.x*v.z);
- checkFinite("v_prod[3]", x, y, z, v.y*v.y);
- checkFinite("v_prod[4]", x, y, z, v.y*v.z);
- checkFinite("v_prod[5]", x, y, z, v.z*v.z);
+ (void)checkFiniteFloat("v_prod[0]", x, y, z, v.x*v.x);
+ (void)checkFiniteFloat("v_prod[1]", x, y, z, v.x*v.y);
+ (void)checkFiniteFloat("v_prod[2]", x, y, z, v.x*v.z);
+ (void)checkFiniteFloat("v_prod[3]", x, y, z, v.y*v.y);
+ (void)checkFiniteFloat("v_prod[4]", x, y, z, v.y*v.z);
+ (void)checkFiniteFloat("v_prod[5]", x, y, z, v.z*v.z);
#endif
}
}
t@@ -1353,12 +1359,12 @@ __global__ void findNSstressTensor(
dev_ns_tau[cellidx6+5] = tau_zz;
#ifdef CHECK_NS_FINITE
- checkFinite("tau_xx", x, y, z, tau_xx);
- checkFinite("tau_xy", x, y, z, tau_xy);
- checkFinite("tau_xz", x, y, z, tau_xz);
- checkFinite("tau_yy", x, y, z, tau_yy);
- checkFinite("tau_yz", x, y, z, tau_yz);
- checkFinite("tau_zz", x, y, z, tau_zz);
+ (void)checkFiniteFloat("tau_xx", x, y, z, tau_xx);
+ (void)checkFiniteFloat("tau_xy", x, y, z, tau_xy);
+ (void)checkFiniteFloat("tau_xz", x, y, z, tau_xz);
+ (void)checkFiniteFloat("tau_yy", x, y, z, tau_yy);
+ (void)checkFiniteFloat("tau_yz", x, y, z, tau_yz);
+ (void)checkFiniteFloat("tau_zz", x, y, z, tau_zz);
#endif
}
}
t@@ -1427,7 +1433,7 @@ __global__ void findNSdivphiviv(
dev_ns_div_phi_vi_v[cellidx] = div_phi_vi_v;
#ifdef CHECK_NS_FINITE
- checkFinite("div_phi_vi_v", x, y, z, div_phi_vi_v);
+ (void)checkFiniteFloat3("div_phi_vi_v", x, y, z, div_phi_vi_v);
#endif
}
}
t@@ -1531,7 +1537,7 @@ __global__ void findNSdivphitau(
dev_ns_div_phi_tau[cellidx] = div_phi_tau;
#ifdef CHECK_NS_FINITE
- checkFinite("div_phi_tau", x, y, z, div_phi_tau);
+ (void)checkFiniteFloat3("div_phi_tau", x, y, z, div_phi_tau);
#endif
}
}
t@@ -1618,7 +1624,7 @@ __global__ void findNSdivphivv(
dev_ns_div_phi_v_v[cellidx] = div;
#ifdef CHECK_NS_FINITE
- checkFinite("div_phi_v_v", x, y, z, div);
+ (void)checkFiniteFloat3("div_phi_v_v", x, y, z, div);
#endif
}
}
t@@ -1723,7 +1729,7 @@ __global__ void findPredNSvelocities(
dev_ns_v_p[cellidx] = v_p;
#ifdef CHECK_NS_FINITE
- checkFinite("v_p", x, y, z, v_p);
+ (void)checkFiniteFloat3("v_p", x, y, z, v_p);
#endif
}
}
t@@ -1858,7 +1864,7 @@ __global__ void findNSforcing(
dev_ns_f[cellidx] = f;
#ifdef CHECK_NS_FINITE
- checkFinite("f", x, y, z, f);
+ (void)checkFiniteFloat("f", x, y, z, f);
#endif
}
}
t@@ -1921,7 +1927,7 @@ __global__ void smoothing(
e_yn, e_yp, e_zn, e_zp);*/
#ifdef CHECK_NS_FINITE
- checkFinite("e_smooth", x, y, z, e_smooth);
+ (void)checkFiniteFloat("e_smooth", x, y, z, e_smooth);
#endif
}
}
t@@ -2018,8 +2024,13 @@ __global__ void jacobiIterationNS(
dev_ns_norm[cellidx] = res_norm;
#ifdef CHECK_NS_FINITE
- checkFinite("e_relax", x, y, z, e_relax);
- checkFinite("res_norm", x, y, z, res_norm);
+ (void)checkFiniteFloat("e_new", x, y, z, e_new);
+ (void)checkFiniteFloat("e_relax", x, y, z, e_relax);
+ //(void)checkFiniteFloat("res_norm", x, y, z, res_norm);
+ if (checkFiniteFloat("res_norm", x, y, z, res_norm)) {
+ printf("[%d,%d,%d]\t e = %f\tf = %f\te_new = %f\tres_norm = %f\n",
+ x,y,z, e, f, e_new, res_norm);
+ }
#endif
}
}
t@@ -2104,7 +2115,7 @@ __global__ void findNormalizedResiduals(
dev_ns_norm[cellidx] = res_norm;
#ifdef CHECK_NS_FINITE
- checkFinite("res_norm", x, y, z, res_norm);
+ checkFiniteFloat("res_norm", x, y, z, res_norm);
#endif
}
}
t@@ -2180,8 +2191,8 @@ __global__ void updateNSvelocityPressure(
dev_ns_v[cellidx] = v;
#ifdef CHECK_NS_FINITE
- checkFinite("p", x, y, z, p);
- checkFinite("v", x, y, z, v);
+ checkFiniteFloat("p", x, y, z, p);
+ checkFiniteFloat3("v", x, y, z, v);
#endif
}
}
t@@ -2250,8 +2261,8 @@ __global__ void findAvgParticleVelocityDiameter(
dev_ns_d_avg[cellidx] = d_avg;
#ifdef CHECK_NS_FINITE
- checkFinite("v_avg", x, y, z, v_avg);
- checkFinite("d_avg", x, y, z, d_avg);
+ checkFiniteFloat3("v_avg", x, y, z, v_avg);
+ checkFiniteFloat("d_avg", x, y, z, d_avg);
#endif
}
}
t@@ -2341,7 +2352,7 @@ __global__ void findInteractionForce(
dev_ns_fi[cellidx] = fi;
#ifdef CHECK_NS_FINITE
- checkFinite("fi", x, y, z, fi);
+ checkFiniteFloat3("fi", x, y, z, fi);
#endif
}
}
t@@ -2408,7 +2419,7 @@ __global__ void applyParticleInteractionForce(
//x,y,z, fd.x, fd.y, fd.z, origidx);
#ifdef CHECK_NS_FINITE
- checkFinite("fd", x, y, z, fd);
+ checkFiniteFloat3("fd", x, y, z, fd);
#endif
}
}