tnavierstokes.cuh - sphere - GPU-based 3D discrete element method algorithm with optional fluid coupling
(HTM) git clone git://src.adamsgaard.dk/sphere
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) LICENSE
---
tnavierstokes.cuh (128731B)
---
1 // navierstokes.cuh
2 // CUDA implementation of porous flow
3
4 #include <iostream>
5 #include <cuda.h>
6 //#include <cutil_math.h>
7 #include <helper_math.h>
8
9 #include "vector_arithmetic.h" // for arbitrary precision vectors
10 #include "sphere.h"
11 #include "datatypes.h"
12 #include "utility.h"
13 #include "constants.cuh"
14 #include "debug.h"
15
16 // Arithmetic mean of two numbers
17 __inline__ __device__ Float amean(Float a, Float b) {
18 return (a+b)*0.5;
19 }
20
21 // Harmonic mean of two numbers
22 __inline__ __device__ Float hmean(Float a, Float b) {
23 return (2.0*a*b)/(a+b);
24 }
25
26 // Helper functions for checking whether a value is NaN or Inf
27 __device__ int checkFiniteFloat(
28 const char* desc,
29 const unsigned int x,
30 const unsigned int y,
31 const unsigned int z,
32 const Float s)
33 {
34 __syncthreads();
35 if (!isfinite(s)) {
36 printf("\n[%d,%d,%d]: Error: %s = %f\n", x, y, z, desc, s);
37 return 1;
38 }
39 return 0;
40 }
41
42 __device__ int checkFiniteFloat3(
43 const char* desc,
44 const unsigned int x,
45 const unsigned int y,
46 const unsigned int z,
47 const Float3 v)
48 {
49 __syncthreads();
50 if (!isfinite(v.x) || !isfinite(v.y) || !isfinite(v.z)) {
51 printf("\n[%d,%d,%d]: Error: %s = %f, %f, %f\n",
52 x, y, z, desc, v.x, v.y, v.z);
53 return 1;
54 }
55 return 0;
56 }
57
58 // Initialize memory
59 void DEM::initNSmemDev(void)
60 {
61 // size of scalar field
62 unsigned int memSizeF = sizeof(Float)*NScells();
63
64 // size of cell-face arrays in staggered grid discretization
65 unsigned int memSizeFface = sizeof(Float)*NScellsVelocity();
66
67 cudaMalloc((void**)&dev_ns_p, memSizeF); // hydraulic pressure
68 cudaMalloc((void**)&dev_ns_v, memSizeF*3); // cell hydraulic velocity
69 cudaMalloc((void**)&dev_ns_v_x, memSizeFface);// velocity in stag. grid
70 cudaMalloc((void**)&dev_ns_v_y, memSizeFface);// velocity in stag. grid
71 cudaMalloc((void**)&dev_ns_v_z, memSizeFface);// velocity in stag. grid
72 cudaMalloc((void**)&dev_ns_v_p, memSizeF*3); // predicted cell velocity
73 cudaMalloc((void**)&dev_ns_v_p_x, memSizeFface); // pred. vel. in stag. grid
74 cudaMalloc((void**)&dev_ns_v_p_y, memSizeFface); // pred. vel. in stag. grid
75 cudaMalloc((void**)&dev_ns_v_p_z, memSizeFface); // pred. vel. in stag. grid
76 cudaMalloc((void**)&dev_ns_vp_avg, memSizeF*3); // avg. particle velocity
77 cudaMalloc((void**)&dev_ns_d_avg, memSizeF); // avg. particle diameter
78 cudaMalloc((void**)&dev_ns_F_pf, memSizeF*3); // interaction force
79 //cudaMalloc((void**)&dev_ns_F_pf_x, memSizeFface); // interaction force
80 //cudaMalloc((void**)&dev_ns_F_pf_y, memSizeFface); // interaction force
81 //cudaMalloc((void**)&dev_ns_F_pf_z, memSizeFface); // interaction force
82 cudaMalloc((void**)&dev_ns_phi, memSizeF); // cell porosity
83 cudaMalloc((void**)&dev_ns_dphi, memSizeF); // cell porosity change
84 //cudaMalloc((void**)&dev_ns_div_phi_v_v, memSizeF*3); // div(phi v v)
85 cudaMalloc((void**)&dev_ns_epsilon, memSizeF); // pressure difference
86 cudaMalloc((void**)&dev_ns_epsilon_new, memSizeF); // new pressure diff.
87 cudaMalloc((void**)&dev_ns_epsilon_old, memSizeF); // old pressure diff.
88 cudaMalloc((void**)&dev_ns_norm, memSizeF); // normalized residual
89 cudaMalloc((void**)&dev_ns_f, memSizeF); // forcing function value
90 cudaMalloc((void**)&dev_ns_f1, memSizeF); // constant addition in forcing
91 cudaMalloc((void**)&dev_ns_f2, memSizeF*3); // constant slope in forcing
92 //cudaMalloc((void**)&dev_ns_tau, memSizeF*6); // stress tensor
93 //cudaMalloc((void**)&dev_ns_div_tau, memSizeF*3); // div(tau), cell center
94 cudaMalloc((void**)&dev_ns_div_tau_x, memSizeFface); // div(tau), cell face
95 cudaMalloc((void**)&dev_ns_div_tau_y, memSizeFface); // div(tau), cell face
96 cudaMalloc((void**)&dev_ns_div_tau_z, memSizeFface); // div(tau), cell face
97 cudaMalloc((void**)&dev_ns_div_phi_vi_v, memSizeF*3); // div(phi*vi*v)
98 //cudaMalloc((void**)&dev_ns_div_phi_tau, memSizeF*3); // div(phi*tau)
99 cudaMalloc((void**)&dev_ns_f_pf, sizeof(Float3)*np); // particle fluid force
100 cudaMalloc((void**)&dev_ns_f_d, sizeof(Float4)*np); // drag force
101 cudaMalloc((void**)&dev_ns_f_p, sizeof(Float4)*np); // pressure force
102 cudaMalloc((void**)&dev_ns_f_v, sizeof(Float4)*np); // viscous force
103 cudaMalloc((void**)&dev_ns_f_sum, sizeof(Float4)*np); // sum of int. forces
104
105 checkForCudaErrors("End of initNSmemDev");
106 }
107
108 // Free memory
109 void DEM::freeNSmemDev()
110 {
111 cudaFree(dev_ns_p);
112 cudaFree(dev_ns_v);
113 cudaFree(dev_ns_v_x);
114 cudaFree(dev_ns_v_y);
115 cudaFree(dev_ns_v_z);
116 cudaFree(dev_ns_v_p);
117 cudaFree(dev_ns_v_p_x);
118 cudaFree(dev_ns_v_p_y);
119 cudaFree(dev_ns_v_p_z);
120 cudaFree(dev_ns_vp_avg);
121 cudaFree(dev_ns_d_avg);
122 cudaFree(dev_ns_F_pf);
123 //cudaFree(dev_ns_F_pf_x);
124 //cudaFree(dev_ns_F_pf_y);
125 //cudaFree(dev_ns_F_pf_z);
126 cudaFree(dev_ns_phi);
127 cudaFree(dev_ns_dphi);
128 //cudaFree(dev_ns_div_phi_v_v);
129 cudaFree(dev_ns_epsilon);
130 cudaFree(dev_ns_epsilon_new);
131 cudaFree(dev_ns_epsilon_old);
132 cudaFree(dev_ns_norm);
133 cudaFree(dev_ns_f);
134 cudaFree(dev_ns_f1);
135 cudaFree(dev_ns_f2);
136 //cudaFree(dev_ns_tau);
137 cudaFree(dev_ns_div_phi_vi_v);
138 //cudaFree(dev_ns_div_phi_tau);
139 //cudaFree(dev_ns_div_tau);
140 cudaFree(dev_ns_div_tau_x);
141 cudaFree(dev_ns_div_tau_y);
142 cudaFree(dev_ns_div_tau_z);
143 cudaFree(dev_ns_f_pf);
144 cudaFree(dev_ns_f_d);
145 cudaFree(dev_ns_f_p);
146 cudaFree(dev_ns_f_v);
147 cudaFree(dev_ns_f_sum);
148 }
149
150 // Transfer to device
151 void DEM::transferNStoGlobalDeviceMemory(int statusmsg)
152 {
153 checkForCudaErrors("Before attempting cudaMemcpy in "
154 "transferNStoGlobalDeviceMemory");
155
156 //if (verbose == 1 && statusmsg == 1)
157 //std::cout << " Transfering fluid data to the device: ";
158
159 // memory size for a scalar field
160 unsigned int memSizeF = sizeof(Float)*NScells();
161
162 //writeNSarray(ns.p, "ns.p.txt");
163
164 cudaMemcpy(dev_ns_p, ns.p, memSizeF, cudaMemcpyHostToDevice);
165 checkForCudaErrors("transferNStoGlobalDeviceMemory after first cudaMemcpy");
166 cudaMemcpy(dev_ns_v, ns.v, memSizeF*3, cudaMemcpyHostToDevice);
167 //cudaMemcpy(dev_ns_v_p, ns.v_p, memSizeF*3, cudaMemcpyHostToDevice);
168 cudaMemcpy(dev_ns_phi, ns.phi, memSizeF, cudaMemcpyHostToDevice);
169 cudaMemcpy(dev_ns_dphi, ns.dphi, memSizeF, cudaMemcpyHostToDevice);
170
171 checkForCudaErrors("End of transferNStoGlobalDeviceMemory");
172 //if (verbose == 1 && statusmsg == 1)
173 //std::cout << "Done" << std::endl;
174 }
175
176 // Transfer from device
177 void DEM::transferNSfromGlobalDeviceMemory(int statusmsg)
178 {
179 if (verbose == 1 && statusmsg == 1)
180 std::cout << " Transfering fluid data from the device: ";
181
182 // memory size for a scalar field
183 unsigned int memSizeF = sizeof(Float)*NScells();
184
185 cudaMemcpy(ns.p, dev_ns_p, memSizeF, cudaMemcpyDeviceToHost);
186 checkForCudaErrors("In transferNSfromGlobalDeviceMemory, dev_ns_p", 0);
187 cudaMemcpy(ns.v, dev_ns_v, memSizeF*3, cudaMemcpyDeviceToHost);
188 cudaMemcpy(ns.v_x, dev_ns_v_x, memSizeF, cudaMemcpyDeviceToHost);
189 cudaMemcpy(ns.v_y, dev_ns_v_y, memSizeF, cudaMemcpyDeviceToHost);
190 cudaMemcpy(ns.v_z, dev_ns_v_z, memSizeF, cudaMemcpyDeviceToHost);
191 //cudaMemcpy(ns.v_p, dev_ns_v_p, memSizeF*3, cudaMemcpyDeviceToHost);
192 cudaMemcpy(ns.phi, dev_ns_phi, memSizeF, cudaMemcpyDeviceToHost);
193 cudaMemcpy(ns.dphi, dev_ns_dphi, memSizeF, cudaMemcpyDeviceToHost);
194 cudaMemcpy(ns.norm, dev_ns_norm, memSizeF, cudaMemcpyDeviceToHost);
195 cudaMemcpy(ns.f_d, dev_ns_f_d, sizeof(Float4)*np, cudaMemcpyDeviceToHost);
196 cudaMemcpy(ns.f_p, dev_ns_f_p, sizeof(Float4)*np, cudaMemcpyDeviceToHost);
197 cudaMemcpy(ns.f_v, dev_ns_f_v, sizeof(Float4)*np, cudaMemcpyDeviceToHost);
198 cudaMemcpy(ns.f_sum, dev_ns_f_sum, sizeof(Float4)*np,
199 cudaMemcpyDeviceToHost);
200
201 checkForCudaErrors("End of transferNSfromGlobalDeviceMemory", 0);
202 if (verbose == 1 && statusmsg == 1)
203 std::cout << "Done" << std::endl;
204 }
205
206 // Transfer the normalized residuals from device to host
207 void DEM::transferNSnormFromGlobalDeviceMemory()
208 {
209 cudaMemcpy(ns.norm, dev_ns_norm, sizeof(Float)*NScells(),
210 cudaMemcpyDeviceToHost);
211 checkForCudaErrors("End of transferNSnormFromGlobalDeviceMemory");
212 }
213
214 // Transfer the pressure change from device to host
215 void DEM::transferNSepsilonFromGlobalDeviceMemory()
216 {
217 cudaMemcpy(ns.epsilon, dev_ns_epsilon, sizeof(Float)*NScells(),
218 cudaMemcpyDeviceToHost);
219 checkForCudaErrors("End of transferNSepsilonFromGlobalDeviceMemory");
220 }
221
222 // Transfer the pressure change from device to host
223 void DEM::transferNSepsilonNewFromGlobalDeviceMemory()
224 {
225 cudaMemcpy(ns.epsilon_new, dev_ns_epsilon_new, sizeof(Float)*NScells(),
226 cudaMemcpyDeviceToHost);
227 checkForCudaErrors("End of transferNSepsilonFromGlobalDeviceMemory");
228 }
229
230 // Get linear index from 3D grid position
231 __inline__ __device__ unsigned int idx(
232 const int x, const int y, const int z)
233 {
234 // without ghost nodes
235 //return x + dev_grid.num[0]*y + dev_grid.num[0]*dev_grid.num[1]*z;
236
237 // with ghost nodes
238 // the ghost nodes are placed at x,y,z = -1 and WIDTH
239 return (x+1) + (devC_grid.num[0]+2)*(y+1) +
240 (devC_grid.num[0]+2)*(devC_grid.num[1]+2)*(z+1);
241 }
242
243 // Get linear index of velocity node from 3D grid position in staggered grid
244 __inline__ __device__ unsigned int vidx(
245 const int x, const int y, const int z)
246 {
247 // without ghost nodes
248 //return x + (devC_grid.num[0]+1)*y
249 //+ (devC_grid.num[0]+1)*(devC_grid.num[1]+1)*z;
250
251 // with ghost nodes
252 // the ghost nodes are placed at x,y,z = -1 and WIDTH+1
253 return (x+1) + (devC_grid.num[0]+3)*(y+1)
254 + (devC_grid.num[0]+3)*(devC_grid.num[1]+3)*(z+1);
255 }
256
257 // Find averaged cell velocities from cell-face velocities. This function works
258 // for both normal and predicted velocities. Launch for every cell in the
259 // dev_ns_v or dev_ns_v_p array. This function does not set the averaged
260 // velocity values in the ghost node cells.
261 __global__ void findNSavgVel(
262 Float3* __restrict__ dev_ns_v, // out
263 const Float* __restrict__ dev_ns_v_x, // in
264 const Float* __restrict__ dev_ns_v_y, // in
265 const Float* __restrict__ dev_ns_v_z) // in
266 {
267
268 // 3D thread index
269 const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
270 const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
271 const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
272
273 // check that we are not outside the fluid grid
274 if (x<devC_grid.num[0] && y<devC_grid.num[1] && z<devC_grid.num[2]-1) {
275 const unsigned int cellidx = idx(x,y,z);
276
277 // Read cell-face velocities
278 __syncthreads();
279 const Float v_xn = dev_ns_v_x[vidx(x,y,z)];
280 const Float v_xp = dev_ns_v_x[vidx(x+1,y,z)];
281 const Float v_yn = dev_ns_v_y[vidx(x,y,z)];
282 const Float v_yp = dev_ns_v_y[vidx(x,y+1,z)];
283 const Float v_zn = dev_ns_v_z[vidx(x,y,z)];
284 const Float v_zp = dev_ns_v_z[vidx(x,y,z+1)];
285
286 // Find average velocity using arithmetic means
287 const Float3 v_bar = MAKE_FLOAT3(
288 amean(v_xn, v_xp),
289 amean(v_yn, v_yp),
290 amean(v_zn, v_zp));
291
292 // Save value
293 __syncthreads();
294 dev_ns_v[idx(x,y,z)] = v_bar;
295 }
296 }
297
298 // Find cell-face velocities from averaged velocities. This function works for
299 // both normal and predicted velocities. Launch for every cell in the dev_ns_v
300 // or dev_ns_v_p array. Make sure that the averaged velocity ghost nodes are set
301 // beforehand.
302 __global__ void findNScellFaceVel(
303 const Float3* __restrict__ dev_ns_v, // in
304 Float* __restrict__ dev_ns_v_x, // out
305 Float* __restrict__ dev_ns_v_y, // out
306 Float* __restrict__ dev_ns_v_z) // out
307 {
308
309 // 3D thread index
310 const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
311 const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
312 const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
313
314 // Grid dimensions
315 const unsigned int nx = devC_grid.num[0];
316 const unsigned int ny = devC_grid.num[1];
317 const unsigned int nz = devC_grid.num[2];
318
319 // check that we are not outside the fluid grid
320 if (x < nx && y < ny && x < nz) {
321 const unsigned int cellidx = idx(x,y,z);
322
323 // Read the averaged velocity from this cell as well as the required
324 // components from the neighbor cells
325 __syncthreads();
326 const Float3 v = dev_ns_v[idx(x,y,z)];
327 const Float v_xn = dev_ns_v[idx(x-1,y,z)].x;
328 const Float v_xp = dev_ns_v[idx(x+1,y,z)].x;
329 const Float v_yn = dev_ns_v[idx(x,y-1,z)].y;
330 const Float v_yp = dev_ns_v[idx(x,y+1,z)].y;
331 const Float v_zn = dev_ns_v[idx(x,y,z-1)].z;
332 const Float v_zp = dev_ns_v[idx(x,y,z+1)].z;
333
334 // Find cell-face velocities and save them right away
335 __syncthreads();
336
337 // Values at the faces closest to the coordinate system origo
338 dev_ns_v_x[vidx(x,y,z)] = amean(v_xn, v.x);
339 dev_ns_v_y[vidx(x,y,z)] = amean(v_yn, v.y);
340 dev_ns_v_z[vidx(x,y,z)] = amean(v_zn, v.z);
341
342 // Values at the cell faces furthest from the coordinate system origo.
343 // These values should only be written at the corresponding boundaries
344 // in order to avoid write conflicts.
345 if (x == nx-1)
346 dev_ns_v_x[vidx(x+1,y,z)] = amean(v.x, v_xp);
347 if (y == ny-1)
348 dev_ns_v_x[vidx(x+1,y,z)] = amean(v.y, v_yp);
349 if (z == nz-1)
350 dev_ns_v_x[vidx(x+1,y,z)] = amean(v.z, v_zp);
351 }
352 }
353
354
355 // Set the initial guess of the values of epsilon.
356 __global__ void setNSepsilonInterior(
357 Float* __restrict__ dev_ns_epsilon,
358 const Float value)
359 {
360 // 3D thread index
361 const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
362 const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
363 const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
364
365 // check that we are not outside the fluid grid
366 if (x < devC_grid.num[0] && y < devC_grid.num[1] &&
367 z > 0 && z < devC_grid.num[2]-1) {
368 __syncthreads();
369 const unsigned int cellidx = idx(x,y,z);
370 dev_ns_epsilon[cellidx] = value;
371 }
372 }
373
374 // The normalized residuals are given an initial value of 0, since the values at
375 // the Dirichlet boundaries aren't written during the iterations.
376 __global__ void setNSnormZero(Float* __restrict__ dev_ns_norm)
377 {
378 // 3D thread index
379 const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
380 const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
381 const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
382
383 // check that we are not outside the fluid grid
384 if (x < devC_grid.num[0] && y < devC_grid.num[1] && z < devC_grid.num[2]) {
385 __syncthreads();
386 const unsigned int cellidx = idx(x,y,z);
387 dev_ns_norm[idx(x,y,z)] = 0.0;
388 }
389 }
390
391
392 // Set the constant values of epsilon at the lower boundary. Since the
393 // Dirichlet boundary values aren't transfered during array swapping, the values
394 // also need to be written to the new array of epsilons. A value of 0 equals
395 // the Dirichlet boundary condition: the new value should be identical to the
396 // old value, i.e. the temporal gradient is 0
397 __global__ void setNSepsilonBottom(
398 Float* __restrict__ dev_ns_epsilon,
399 Float* __restrict__ dev_ns_epsilon_new,
400 const Float value)
401 {
402 // 3D thread index
403 const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
404 const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
405 const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
406
407 // check that we are not outside the fluid grid, and at the z boundaries
408 //if (x < devC_grid.num[0] && y < devC_grid.num[1] &&
409 // (z == devC_grid.num[2]-1 || z == 0)) {
410 // check that we are not outside the fluid grid, and at the lower z boundary
411 if (x < devC_grid.num[0] && y < devC_grid.num[1] && z == 0) {
412
413 __syncthreads();
414 const unsigned int cellidx = idx(x,y,z);
415 dev_ns_epsilon[cellidx] = value;
416 dev_ns_epsilon_new[cellidx] = value;
417 }
418 }
419
420 // Set the constant values of epsilon at the upper boundary. Since the
421 // Dirichlet boundary values aren't transfered during array swapping, the values
422 // also need to be written to the new array of epsilons.
423 __global__ void setNSepsilonTop(
424 Float* __restrict__ dev_ns_epsilon,
425 Float* __restrict__ dev_ns_epsilon_new,
426 const Float value)
427 {
428 // 3D thread index
429 const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
430 const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
431 const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
432
433 // check that we are not outside the fluid grid, and at the upper z boundary
434 if (x < devC_grid.num[0] && y < devC_grid.num[1] &&
435 z == devC_grid.num[2]-1) {
436
437 __syncthreads();
438 const unsigned int cellidx = idx(x,y,z);
439 dev_ns_epsilon[cellidx] = value;
440 dev_ns_epsilon_new[cellidx] = value;
441 }
442 }
443
444 // Set the constant values of epsilon and grad_z(epsilon) at the upper wall, if
445 // it is dynamic (Dirichlet+Neumann). Since the Dirichlet boundary values aren't
446 // transfered during array swapping, the values also need to be written to the
447 // new array of epsilons.
448 __global__ void setNSepsilonAtTopWall(
449 Float* __restrict__ dev_ns_epsilon,
450 Float* __restrict__ dev_ns_epsilon_new,
451 const unsigned int iz,
452 const Float value,
453 const Float dp_dz)
454 {
455 // 3D thread index
456 const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
457 const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
458 const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
459
460 const unsigned int cellidx = idx(x,y,z);
461
462 // cells containing the wall (Dirichlet BC)
463 if (x < devC_grid.num[0] && y < devC_grid.num[1] && z < devC_grid.num[2] &&
464 z == iz) {
465 __syncthreads();
466 dev_ns_epsilon[cellidx] = value;
467 dev_ns_epsilon_new[cellidx] = value;
468 }
469
470 // cells above the wall (Neumann BC)
471 if (x < devC_grid.num[0] && y < devC_grid.num[1] && z < devC_grid.num[2] &&
472 z == iz+1) {
473
474 // Pressure value in order to obtain hydrostatic pressure distribution
475 // for Neumann BC. The pressure should equal the value at the top wall
476 // minus the contribution due to the fluid weight.
477 // p_iz+1 = p_iz - rho_f*g*dz
478 const Float p = value - dp_dz;
479
480 __syncthreads();
481 dev_ns_epsilon[cellidx] = p;
482 dev_ns_epsilon_new[cellidx] = p;
483 }
484 }
485
486 __device__ void copyNSvalsDev(
487 const unsigned int read,
488 const unsigned int write,
489 Float* __restrict__ dev_ns_p,
490 Float3* __restrict__ dev_ns_v,
491 Float3* __restrict__ dev_ns_v_p,
492 Float* __restrict__ dev_ns_phi,
493 Float* __restrict__ dev_ns_dphi,
494 Float* __restrict__ dev_ns_epsilon)
495 {
496 // Coalesced read
497 const Float p = dev_ns_p[read];
498 const Float3 v = dev_ns_v[read];
499 const Float3 v_p = dev_ns_v_p[read];
500 const Float phi = dev_ns_phi[read];
501 const Float dphi = dev_ns_dphi[read];
502 const Float epsilon = dev_ns_epsilon[read];
503
504 // Coalesced write
505 __syncthreads();
506 dev_ns_p[write] = p;
507 dev_ns_v[write] = v;
508 dev_ns_v_p[write] = v_p;
509 dev_ns_phi[write] = phi;
510 dev_ns_dphi[write] = dphi;
511 dev_ns_epsilon[write] = epsilon;
512 }
513
514
515 // Update ghost nodes from their parent cell values. The edge (diagonal) cells
516 // are not written since they are not read. Launch this kernel for all cells in
517 // the grid
518 __global__ void setNSghostNodesDev(
519 Float* __restrict__ dev_ns_p,
520 Float3* __restrict__ dev_ns_v,
521 Float3* __restrict__ dev_ns_v_p,
522 Float* __restrict__ dev_ns_phi,
523 Float* __restrict__ dev_ns_dphi,
524 Float* __restrict__ dev_ns_epsilon)
525 {
526 // 3D thread index
527 const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
528 const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
529 const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
530
531 // Grid dimensions
532 const unsigned int nx = devC_grid.num[0];
533 const unsigned int ny = devC_grid.num[1];
534 const unsigned int nz = devC_grid.num[2];
535
536 // 1D thread index
537 const unsigned int cellidx = idx(x,y,z);
538
539 // 1D position of ghost node
540 unsigned int writeidx;
541
542 // check that we are not outside the fluid grid
543 if (x < nx && y < ny && z < nz) {
544
545 if (x == 0) {
546 writeidx = idx(nx,y,z);
547 copyNSvalsDev(cellidx, writeidx,
548 dev_ns_p,
549 dev_ns_v, dev_ns_v_p,
550 dev_ns_phi, dev_ns_dphi,
551 dev_ns_epsilon);
552 }
553 if (x == nx-1) {
554 writeidx = idx(-1,y,z);
555 copyNSvalsDev(cellidx, writeidx,
556 dev_ns_p,
557 dev_ns_v, dev_ns_v_p,
558 dev_ns_phi, dev_ns_dphi,
559 dev_ns_epsilon);
560 }
561
562 if (y == 0) {
563 writeidx = idx(x,ny,z);
564 copyNSvalsDev(cellidx, writeidx,
565 dev_ns_p,
566 dev_ns_v, dev_ns_v_p,
567 dev_ns_phi, dev_ns_dphi,
568 dev_ns_epsilon);
569 }
570 if (y == ny-1) {
571 writeidx = idx(x,-1,z);
572 copyNSvalsDev(cellidx, writeidx,
573 dev_ns_p,
574 dev_ns_v, dev_ns_v_p,
575 dev_ns_phi, dev_ns_dphi,
576 dev_ns_epsilon);
577 }
578
579 // Z boundaries fixed
580 if (z == 0) {
581 writeidx = idx(x,y,-1);
582 copyNSvalsDev(cellidx, writeidx,
583 dev_ns_p,
584 dev_ns_v, dev_ns_v_p,
585 dev_ns_phi, dev_ns_dphi,
586 dev_ns_epsilon);
587 }
588 if (z == nz-1) {
589 writeidx = idx(x,y,nz);
590 copyNSvalsDev(cellidx, writeidx,
591 dev_ns_p,
592 dev_ns_v, dev_ns_v_p,
593 dev_ns_phi, dev_ns_dphi,
594 dev_ns_epsilon);
595 }
596
597 // Z boundaries periodic
598 /*if (z == 0) {
599 writeidx = idx(x,y,nz);
600 copyNSvalsDev(cellidx, writeidx,
601 dev_ns_p,
602 dev_ns_v, dev_ns_v_p,
603 dev_ns_phi, dev_ns_dphi,
604 dev_ns_epsilon);
605 }
606 if (z == nz-1) {
607 writeidx = idx(x,y,-1);
608 copyNSvalsDev(cellidx, writeidx,
609 dev_ns_p,
610 dev_ns_v, dev_ns_v_p,
611 dev_ns_phi, dev_ns_dphi,
612 dev_ns_epsilon);
613 }*/
614 }
615 }
616
617 // Update a field in the ghost nodes from their parent cell values. The edge
618 // (diagonal) cells are not written since they are not read. Launch this kernel
619 // for all cells in the grid usind setNSghostNodes<datatype><<<.. , ..>>>( .. );
620 template<typename T>
621 __global__ void setNSghostNodes(T* __restrict__ dev_scalarfield)
622 {
623 // 3D thread index
624 const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
625 const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
626 const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
627
628 // Grid dimensions
629 const unsigned int nx = devC_grid.num[0];
630 const unsigned int ny = devC_grid.num[1];
631 const unsigned int nz = devC_grid.num[2];
632
633 // check that we are not outside the fluid grid
634 if (x < nx && y < ny && z < nz) {
635
636 const T val = dev_scalarfield[idx(x,y,z)];
637
638 if (x == 0)
639 dev_scalarfield[idx(nx,y,z)] = val;
640 if (x == nx-1)
641 dev_scalarfield[idx(-1,y,z)] = val;
642
643 if (y == 0)
644 dev_scalarfield[idx(x,ny,z)] = val;
645 if (y == ny-1)
646 dev_scalarfield[idx(x,-1,z)] = val;
647
648 if (z == 0)
649 dev_scalarfield[idx(x,y,-1)] = val; // Dirichlet
650 //dev_scalarfield[idx(x,y,nz)] = val; // Periodic -z
651 if (z == nz-1)
652 dev_scalarfield[idx(x,y,nz)] = val; // Dirichlet
653 //dev_scalarfield[idx(x,y,-1)] = val; // Periodic +z
654 }
655 }
656
657 // Update a field in the ghost nodes from their parent cell values. The edge
658 // (diagonal) cells are not written since they are not read.
659 template<typename T>
660 __global__ void setNSghostNodes(
661 T* __restrict__ dev_scalarfield,
662 const int bc_bot,
663 const int bc_top)
664 {
665 // 3D thread index
666 const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
667 const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
668 const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
669
670 // Grid dimensions
671 const unsigned int nx = devC_grid.num[0];
672 const unsigned int ny = devC_grid.num[1];
673 const unsigned int nz = devC_grid.num[2];
674
675 // check that we are not outside the fluid grid
676 if (x < nx && y < ny && z < nz) {
677
678 const T val = dev_scalarfield[idx(x,y,z)];
679
680 // x
681 if (x == 0)
682 dev_scalarfield[idx(nx,y,z)] = val;
683 if (x == nx-1)
684 dev_scalarfield[idx(-1,y,z)] = val;
685
686 // y
687 if (y == 0)
688 dev_scalarfield[idx(x,ny,z)] = val;
689 if (y == ny-1)
690 dev_scalarfield[idx(x,-1,z)] = val;
691
692 // z
693 if (z == 0 && bc_bot == 0)
694 dev_scalarfield[idx(x,y,-1)] = val; // Dirichlet
695 //if (z == 1 && bc_bot == 1)
696 if (z == 0 && bc_bot == 1)
697 dev_scalarfield[idx(x,y,-1)] = val; // Neumann
698 if (z == 0 && bc_bot == 2)
699 dev_scalarfield[idx(x,y,nz)] = val; // Periodic -z
700
701 if (z == nz-1 && bc_top == 0)
702 dev_scalarfield[idx(x,y,nz)] = val; // Dirichlet
703 if (z == nz-2 && bc_top == 1)
704 dev_scalarfield[idx(x,y,nz)] = val; // Neumann
705 if (z == nz-1 && bc_top == 2)
706 dev_scalarfield[idx(x,y,-1)] = val; // Periodic +z
707 }
708 }
709
710 // Update a field in the ghost nodes from their parent cell values. The edge
711 // (diagonal) cells are not written since they are not read.
712 // Launch per face.
713 // According to Griebel et al. 1998 "Numerical Simulation in Fluid Dynamics"
714 template<typename T>
715 __global__ void setNSghostNodesFace(
716 T* __restrict__ dev_scalarfield_x,
717 T* __restrict__ dev_scalarfield_y,
718 T* __restrict__ dev_scalarfield_z,
719 const int bc_bot,
720 const int bc_top)
721 {
722 // 3D thread index
723 const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
724 const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
725 const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
726
727 // Grid dimensions
728 const unsigned int nx = devC_grid.num[0];
729 const unsigned int ny = devC_grid.num[1];
730 const unsigned int nz = devC_grid.num[2];
731
732 // check that we are not outside the fluid grid
733 //if (x <= nx && y <= ny && z <= nz) {
734 if (x < nx && y < ny && z < nz) {
735
736 const T val_x = dev_scalarfield_x[vidx(x,y,z)];
737 const T val_y = dev_scalarfield_y[vidx(x,y,z)];
738 const T val_z = dev_scalarfield_z[vidx(x,y,z)];
739
740 // x (periodic)
741 if (x == 0) {
742 dev_scalarfield_x[vidx(nx,y,z)] = val_x;
743 dev_scalarfield_y[vidx(nx,y,z)] = val_y;
744 dev_scalarfield_z[vidx(nx,y,z)] = val_z;
745 }
746 if (x == 1) {
747 dev_scalarfield_x[vidx(nx+1,y,z)] = val_x;
748 }
749 if (x == nx-1) {
750 dev_scalarfield_x[vidx(-1,y,z)] = val_x;
751 dev_scalarfield_y[vidx(-1,y,z)] = val_y;
752 dev_scalarfield_z[vidx(-1,y,z)] = val_z;
753 }
754
755 // z ghost nodes at x = -1 and z = nz,
756 // equal to the ghost node at x = nx-1 and z = nz
757 if (z == nz-1 && x == nx-1 && bc_top == 0) // Dirichlet +z
758 dev_scalarfield_z[vidx(-1,y,nz)] = val_z;
759
760 if (z == nz-1 && x == nx-1 && (bc_top == 1 || bc_top == 2)) //Neumann +z
761 dev_scalarfield_z[vidx(-1,y,nz)] = 0.0;
762
763 if (z == 0 && x == nx-1 && bc_top == 3) // Periodic +z
764 dev_scalarfield_z[vidx(-1,y,nz)] = val_z;
765
766 // z ghost nodes at y = -1 and z = nz,
767 // equal to the ghost node at y = ny-1 and z = nz
768 if (z == nz-1 && y == ny-1 && bc_top == 0) // Dirichlet +z
769 dev_scalarfield_z[vidx(x,-1,nz)] = val_z;
770
771 if (z == nz-1 && y == ny-1 && (bc_top == 1 || bc_top == 2)) //Neumann +z
772 dev_scalarfield_z[vidx(x,-1,nz)] = 0.0;
773
774 if (z == 0 && y == ny-1 && bc_top == 3) // Periodic +z
775 dev_scalarfield_z[vidx(x,-1,nz)] = val_z;
776
777
778 // x ghost nodes at x = nx and z = -1,
779 // equal to the ghost nodes at x = 0 and z = -1
780 // Dirichlet, Neumann free slip or periodic -z
781 if (z == 0 && x == 0 && (bc_bot == 0 || bc_bot == 1 || bc_bot == 3))
782 dev_scalarfield_x[vidx(nx,y,-1)] = val_x;
783
784 if (z == 0 && x == 0 && bc_bot == 2) // Neumann no slip -z
785 dev_scalarfield_x[vidx(nx,y,-1)] = -val_x;
786
787 // y ghost nodes at y = ny and z = -1,
788 // equal to the ghost node at x = 0 and z = -1
789 // Dirichlet, Neumann free slip or periodic -z
790 if (z == 0 && y == 0 && (bc_bot == 0 || bc_bot == 1 || bc_bot == 3))
791 dev_scalarfield_y[vidx(x,ny,-1)] = val_y;
792
793 if (z == 0 && y == 0 && bc_bot == 2) // Neumann no slip -z
794 dev_scalarfield_y[vidx(x,ny,-1)] = -val_y;
795
796
797 // z ghost nodes at x = nx and z = nz
798 // equal to the ghost node at x = 0 and z = nz
799 if (z == nz-1 && x == 0 && (bc_top == 0 || bc_top == 3)) // D. or p. +z
800 dev_scalarfield_z[vidx(nx,y,nz)] = val_z;
801
802 if (z == nz-1 && x == 0 && (bc_top == 1 || bc_top == 2)) // N. +z
803 dev_scalarfield_z[vidx(nx,y,nz)] = 0.0;
804
805 // z ghost nodes at y = ny and z = nz
806 // equal to the ghost node at y = 0 and z = nz
807 if (z == nz-1 && y == 0 && (bc_top == 0 || bc_top == 3)) // D. or p. +z
808 dev_scalarfield_z[vidx(x,ny,nz)] = val_z;
809
810 if (z == nz-1 && y == 0 && (bc_top == 1 || bc_top == 2)) // N. +z
811 dev_scalarfield_z[vidx(x,ny,nz)] = 0.0;
812
813
814 // x ghost nodes at x = nx and z = nz,
815 // equal to the ghost nodes at x = 0 and z = nz
816 // Dirichlet, Neumann free slip or periodic +z
817 if (z == nz-1 && x == 0 && (bc_bot == 0 || bc_bot == 1 || bc_bot == 3))
818 dev_scalarfield_x[vidx(nx,y,nz)] = val_x;
819
820 if (z == nz-1 && x == 0 && bc_bot == 2) // Neumann no slip -z
821 dev_scalarfield_x[vidx(nx,y,nz)] = -val_x;
822
823 // y ghost nodes at y = ny and z = nz,
824 // equal to the ghost nodes at y = 0 and z = nz
825 // Dirichlet, Neumann free slip or periodic +z
826 if (z == nz-1 && y == 0 && (bc_bot == 0 || bc_bot == 1 || bc_bot == 3))
827 dev_scalarfield_y[vidx(x,ny,nz)] = val_y;
828
829 if (z == nz-1 && y == 0 && bc_bot == 2) // Neumann no slip -z
830 dev_scalarfield_y[vidx(x,ny,nz)] = -val_y;
831
832
833 // y (periodic)
834 if (y == 0) {
835 dev_scalarfield_x[vidx(x,ny,z)] = val_x;
836 dev_scalarfield_y[vidx(x,ny,z)] = val_y;
837 dev_scalarfield_z[vidx(x,ny,z)] = val_z;
838 }
839 if (y == 1) {
840 dev_scalarfield_y[vidx(x,ny+1,z)] = val_y;
841 }
842 if (y == ny-1) {
843 dev_scalarfield_x[vidx(x,-1,z)] = val_x;
844 dev_scalarfield_y[vidx(x,-1,z)] = val_y;
845 dev_scalarfield_z[vidx(x,-1,z)] = val_z;
846 }
847
848 // z
849 if (z == 0 && bc_bot == 0) {
850 dev_scalarfield_x[vidx(x,y,-1)] = val_y; // Dirichlet -z
851 dev_scalarfield_y[vidx(x,y,-1)] = val_x; // Dirichlet -z
852 dev_scalarfield_z[vidx(x,y,-1)] = val_z; // Dirichlet -z
853 }
854 if (z == 0 && bc_bot == 1) {
855 //dev_scalarfield_x[vidx(x,y,-1)] = val_x; // Neumann free slip -z
856 //dev_scalarfield_y[vidx(x,y,-1)] = val_y; // Neumann free slip -z
857 //dev_scalarfield_z[vidx(x,y,-1)] = val_z; // Neumann free slip -z
858 dev_scalarfield_x[vidx(x,y,-1)] = val_x; // Neumann free slip -z
859 dev_scalarfield_y[vidx(x,y,-1)] = val_y; // Neumann free slip -z
860 dev_scalarfield_z[vidx(x,y,-1)] = 0.0; // Neumann free slip -z
861 }
862 if (z == 0 && bc_bot == 2) {
863 //dev_scalarfield_x[vidx(x,y,-1)] = val_x; // Neumann no slip -z
864 //dev_scalarfield_y[vidx(x,y,-1)] = val_y; // Neumann no slip -z
865 //dev_scalarfield_z[vidx(x,y,-1)] = val_z; // Neumann no slip -z
866 dev_scalarfield_x[vidx(x,y,-1)] = -val_x; // Neumann no slip -z
867 dev_scalarfield_y[vidx(x,y,-1)] = -val_y; // Neumann no slip -z
868 dev_scalarfield_z[vidx(x,y,-1)] = 0.0; // Neumann no slip -z
869 }
870 if (z == 0 && bc_bot == 3) {
871 dev_scalarfield_x[vidx(x,y,nz)] = val_x; // Periodic -z
872 dev_scalarfield_y[vidx(x,y,nz)] = val_y; // Periodic -z
873 dev_scalarfield_z[vidx(x,y,nz)] = val_z; // Periodic -z
874 }
875 if (z == 1 && bc_bot == 3) {
876 dev_scalarfield_z[vidx(x,y,nz+1)] = val_z; // Periodic -z
877 }
878
879 if (z == nz-1 && bc_top == 0) {
880 dev_scalarfield_z[vidx(x,y,nz)] = val_z; // Dirichlet +z
881 }
882 if (z == nz-1 && bc_top == 1) {
883 //dev_scalarfield_x[vidx(x,y,nz)] = val_x; // Neumann free slip +z
884 //dev_scalarfield_y[vidx(x,y,nz)] = val_y; // Neumann free slip +z
885 //dev_scalarfield_z[vidx(x,y,nz)] = val_z; // Neumann free slip +z
886 //dev_scalarfield_z[vidx(x,y,nz+1)] = val_z; // Neumann free slip +z
887 dev_scalarfield_x[vidx(x,y,nz)] = val_x; // Neumann free slip +z
888 dev_scalarfield_y[vidx(x,y,nz)] = val_y; // Neumann free slip +z
889 dev_scalarfield_z[vidx(x,y,nz)] = 0.0; // Neumann free slip +z
890 dev_scalarfield_z[vidx(x,y,nz+1)] = 0.0; // Neumann free slip +z
891 }
892 if (z == nz-1 && bc_top == 2) {
893 //dev_scalarfield_x[vidx(x,y,nz)] = val_x; // Neumann no slip +z
894 //dev_scalarfield_y[vidx(x,y,nz)] = val_y; // Neumann no slip +z
895 //dev_scalarfield_z[vidx(x,y,nz)] = val_z; // Neumann no slip +z
896 //dev_scalarfield_z[vidx(x,y,nz+1)] = val_z; // Neumann no slip +z
897 dev_scalarfield_x[vidx(x,y,nz)] = -val_x; // Neumann no slip +z
898 dev_scalarfield_y[vidx(x,y,nz)] = -val_y; // Neumann no slip +z
899 dev_scalarfield_z[vidx(x,y,nz)] = 0.0; // Neumann no slip +z
900 dev_scalarfield_z[vidx(x,y,nz+1)] = 0.0; // Neumann no slip +z
901 }
902 if (z == nz-1 && bc_top == 3) {
903 dev_scalarfield_x[vidx(x,y,-1)] = val_x; // Periodic +z
904 dev_scalarfield_y[vidx(x,y,-1)] = val_y; // Periodic +z
905 dev_scalarfield_z[vidx(x,y,-1)] = val_z; // Periodic +z
906 }
907 }
908 }
909
910 // Update the tensor field for the ghost nodes from their parent cell values.
911 // The edge (diagonal) cells are not written since they are not read. Launch
912 // this kernel for all cells in the grid.
913 __global__ void setNSghostNodes_tau(
914 Float* __restrict__ dev_ns_tau,
915 const int bc_bot,
916 const int bc_top)
917 {
918 // 3D thread index
919 const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
920 const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
921 const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
922
923 // Grid dimensions
924 const unsigned int nx = devC_grid.num[0];
925 const unsigned int ny = devC_grid.num[1];
926 const unsigned int nz = devC_grid.num[2];
927
928 // check that we are not outside the fluid grid
929 if (x < nx && y < ny && z < nz) {
930
931 // Linear index of length-6 vector field entry
932 unsigned int cellidx6 = idx(x,y,z)*6;
933
934 // Read parent values
935 __syncthreads();
936 const Float tau_xx = dev_ns_tau[cellidx6];
937 const Float tau_xy = dev_ns_tau[cellidx6+1];
938 const Float tau_xz = dev_ns_tau[cellidx6+2];
939 const Float tau_yy = dev_ns_tau[cellidx6+3];
940 const Float tau_yz = dev_ns_tau[cellidx6+4];
941 const Float tau_zz = dev_ns_tau[cellidx6+5];
942
943 // x
944 if (x == 0) {
945 cellidx6 = idx(nx,y,z)*6;
946 dev_ns_tau[cellidx6] = tau_xx;
947 dev_ns_tau[cellidx6+1] = tau_xy;
948 dev_ns_tau[cellidx6+2] = tau_xz;
949 dev_ns_tau[cellidx6+3] = tau_yy;
950 dev_ns_tau[cellidx6+4] = tau_yz;
951 dev_ns_tau[cellidx6+5] = tau_zz;
952 }
953 if (x == nx-1) {
954 cellidx6 = idx(-1,y,z)*6;
955 dev_ns_tau[cellidx6] = tau_xx;
956 dev_ns_tau[cellidx6+1] = tau_xy;
957 dev_ns_tau[cellidx6+2] = tau_xz;
958 dev_ns_tau[cellidx6+3] = tau_yy;
959 dev_ns_tau[cellidx6+4] = tau_yz;
960 dev_ns_tau[cellidx6+5] = tau_zz;
961 }
962
963 // y
964 if (y == 0) {
965 cellidx6 = idx(x,ny,z)*6;
966 dev_ns_tau[cellidx6] = tau_xx;
967 dev_ns_tau[cellidx6+1] = tau_xy;
968 dev_ns_tau[cellidx6+2] = tau_xz;
969 dev_ns_tau[cellidx6+3] = tau_yy;
970 dev_ns_tau[cellidx6+4] = tau_yz;
971 dev_ns_tau[cellidx6+5] = tau_zz;
972 }
973 if (y == ny-1) {
974 cellidx6 = idx(x,-1,z)*6;
975 dev_ns_tau[cellidx6] = tau_xx;
976 dev_ns_tau[cellidx6+1] = tau_xy;
977 dev_ns_tau[cellidx6+2] = tau_xz;
978 dev_ns_tau[cellidx6+3] = tau_yy;
979 dev_ns_tau[cellidx6+4] = tau_yz;
980 dev_ns_tau[cellidx6+5] = tau_zz;
981 }
982
983 // z
984 if (z == 0 && bc_bot == 0) { // Dirichlet
985 cellidx6 = idx(x,y,-1)*6;
986 dev_ns_tau[cellidx6] = tau_xx;
987 dev_ns_tau[cellidx6+1] = tau_xy;
988 dev_ns_tau[cellidx6+2] = tau_xz;
989 dev_ns_tau[cellidx6+3] = tau_yy;
990 dev_ns_tau[cellidx6+4] = tau_yz;
991 dev_ns_tau[cellidx6+5] = tau_zz;
992 }
993 if (z == 1 && bc_bot == 1) { // Neumann
994 cellidx6 = idx(x,y,-1)*6;
995 dev_ns_tau[cellidx6] = tau_xx;
996 dev_ns_tau[cellidx6+1] = tau_xy;
997 dev_ns_tau[cellidx6+2] = tau_xz;
998 dev_ns_tau[cellidx6+3] = tau_yy;
999 dev_ns_tau[cellidx6+4] = tau_yz;
1000 dev_ns_tau[cellidx6+5] = tau_zz;
1001 }
1002 if (z == 0 && bc_bot == 2) { // Periodic
1003 cellidx6 = idx(x,y,nz)*6;
1004 dev_ns_tau[cellidx6] = tau_xx;
1005 dev_ns_tau[cellidx6+1] = tau_xy;
1006 dev_ns_tau[cellidx6+2] = tau_xz;
1007 dev_ns_tau[cellidx6+3] = tau_yy;
1008 dev_ns_tau[cellidx6+4] = tau_yz;
1009 dev_ns_tau[cellidx6+5] = tau_zz;
1010 }
1011
1012 if (z == nz-1 && bc_top == 0) { // Dirichlet
1013 cellidx6 = idx(x,y,nz)*6;
1014 dev_ns_tau[cellidx6] = tau_xx;
1015 dev_ns_tau[cellidx6+1] = tau_xy;
1016 dev_ns_tau[cellidx6+2] = tau_xz;
1017 dev_ns_tau[cellidx6+3] = tau_yy;
1018 dev_ns_tau[cellidx6+4] = tau_yz;
1019 dev_ns_tau[cellidx6+5] = tau_zz;
1020 }
1021 if (z == nz-2 && bc_top == 1) { // Neumann
1022 cellidx6 = idx(x,y,nz)*6;
1023 dev_ns_tau[cellidx6] = tau_xx;
1024 dev_ns_tau[cellidx6+1] = tau_xy;
1025 dev_ns_tau[cellidx6+2] = tau_xz;
1026 dev_ns_tau[cellidx6+3] = tau_yy;
1027 dev_ns_tau[cellidx6+4] = tau_yz;
1028 dev_ns_tau[cellidx6+5] = tau_zz;
1029 }
1030 if (z == nz-1 && bc_top == 2) { // Periodic
1031 cellidx6 = idx(x,y,-1)*6;
1032 dev_ns_tau[cellidx6] = tau_xx;
1033 dev_ns_tau[cellidx6+1] = tau_xy;
1034 dev_ns_tau[cellidx6+2] = tau_xz;
1035 dev_ns_tau[cellidx6+3] = tau_yy;
1036 dev_ns_tau[cellidx6+4] = tau_yz;
1037 dev_ns_tau[cellidx6+5] = tau_zz;
1038 }
1039 }
1040 }
1041
1042 // Update a the forcing values in the ghost nodes from their parent cell values.
1043 // The edge (diagonal) cells are not written since they are not read. Launch
1044 // this kernel for all cells in the grid.
1045 /*
1046 __global__ void setNSghostNodesForcing(
1047 Float* dev_ns_f1,
1048 Float3* dev_ns_f2,
1049 Float* dev_ns_f,
1050 unsigned int nijac)
1051
1052 {
1053 // 3D thread index
1054 const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
1055 const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
1056 const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
1057
1058 // Grid dimensions
1059 const unsigned int nx = devC_grid.num[0];
1060 const unsigned int ny = devC_grid.num[1];
1061 const unsigned int nz = devC_grid.num[2];
1062
1063 // 1D thread index
1064 unsigned int cellidx = idx(x,y,z);
1065
1066 // check that we are not outside the fluid grid
1067 if (x < nx && y < ny && z < nz) {
1068
1069 __syncthreads();
1070 const Float f = dev_ns_f[cellidx];
1071 Float f1;
1072 Float3 f2;
1073
1074 if (nijac == 0) {
1075 __syncthreads();
1076 f1 = dev_ns_f1[cellidx];
1077 f2 = dev_ns_f2[cellidx];
1078 }
1079
1080 if (x == 0) {
1081 cellidx = idx(nx,y,z);
1082 dev_ns_f[cellidx] = f;
1083 if (nijac == 0) {
1084 dev_ns_f1[cellidx] = f1;
1085 dev_ns_f2[cellidx] = f2;
1086 }
1087 }
1088 if (x == nx-1) {
1089 cellidx = idx(-1,y,z);
1090 dev_ns_f[cellidx] = f;
1091 if (nijac == 0) {
1092 dev_ns_f1[cellidx] = f1;
1093 dev_ns_f2[cellidx] = f2;
1094 }
1095 }
1096
1097 if (y == 0) {
1098 cellidx = idx(x,ny,z);
1099 dev_ns_f[cellidx] = f;
1100 if (nijac == 0) {
1101 dev_ns_f1[cellidx] = f1;
1102 dev_ns_f2[cellidx] = f2;
1103 }
1104 }
1105 if (y == ny-1) {
1106 cellidx = idx(x,-1,z);
1107 dev_ns_f[cellidx] = f;
1108 if (nijac == 0) {
1109 dev_ns_f1[cellidx] = f1;
1110 dev_ns_f2[cellidx] = f2;
1111 }
1112 }
1113
1114 if (z == 0) {
1115 cellidx = idx(x,y,nz);
1116 dev_ns_f[cellidx] = f;
1117 if (nijac == 0) {
1118 dev_ns_f1[cellidx] = f1;
1119 dev_ns_f2[cellidx] = f2;
1120 }
1121 }
1122 if (z == nz-1) {
1123 cellidx = idx(x,y,-1);
1124 dev_ns_f[cellidx] = f;
1125 if (nijac == 0) {
1126 dev_ns_f1[cellidx] = f1;
1127 dev_ns_f2[cellidx] = f2;
1128 }
1129 }
1130 }
1131 }
1132 */
1133
1134 // Find the porosity in each cell on the base of a sphere, centered at the cell
1135 // center.
1136 __global__ void findPorositiesVelocitiesDiametersSpherical(
1137 const unsigned int* __restrict__ dev_cellStart,
1138 const unsigned int* __restrict__ dev_cellEnd,
1139 const Float4* __restrict__ dev_x_sorted,
1140 const Float4* __restrict__ dev_vel_sorted,
1141 Float* __restrict__ dev_ns_phi,
1142 Float* __restrict__ dev_ns_dphi,
1143 Float3* __restrict__ dev_ns_vp_avg,
1144 Float* __restrict__ dev_ns_d_avg,
1145 const unsigned int iteration,
1146 const unsigned int np,
1147 const Float c_phi)
1148 {
1149 // 3D thread index
1150 const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
1151 const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
1152 const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
1153
1154 // Grid dimensions
1155 const unsigned int nx = devC_grid.num[0];
1156 const unsigned int ny = devC_grid.num[1];
1157 const unsigned int nz = devC_grid.num[2];
1158
1159 // Cell dimensions
1160 const Float dx = devC_grid.L[0]/nx;
1161 const Float dy = devC_grid.L[1]/ny;
1162 const Float dz = devC_grid.L[2]/nz;
1163
1164 // Cell sphere radius
1165 //const Float R = fmin(dx, fmin(dy,dz)) * 0.5; // diameter = cell width
1166 const Float R = fmin(dx, fmin(dy,dz)); // diameter = 2*cell width
1167 const Float cell_volume = 4.0/3.0*M_PI*R*R*R;
1168
1169 Float void_volume = cell_volume;
1170 Float4 xr; // particle pos. and radius
1171
1172 // check that we are not outside the fluid grid
1173 if (x < nx && y < ny && z < nz) {
1174
1175 if (np > 0) {
1176
1177 // Cell sphere center position
1178 const Float3 X = MAKE_FLOAT3(
1179 x*dx + 0.5*dx,
1180 y*dy + 0.5*dy,
1181 z*dz + 0.5*dz);
1182
1183 Float d, r;
1184 Float phi = 1.00;
1185 Float4 v;
1186 unsigned int n = 0;
1187
1188 Float3 v_avg = MAKE_FLOAT3(0.0, 0.0, 0.0);
1189 Float d_avg = 0.0;
1190
1191 // Read old porosity
1192 __syncthreads();
1193 Float phi_0 = dev_ns_phi[idx(x,y,z)];
1194
1195 // The cell 3d index
1196 const int3 gridPos = make_int3((int)x,(int)y,(int)z);
1197
1198 // The neighbor cell 3d index
1199 int3 targetCell;
1200
1201 // The distance modifier for particles across periodic boundaries
1202 Float3 dist, distmod;
1203
1204 unsigned int cellID, startIdx, endIdx, i;
1205
1206 // Iterate over 27 neighbor cells, R = cell width
1207 /*for (int z_dim=-1; z_dim<2; ++z_dim) { // z-axis
1208 for (int y_dim=-1; y_dim<2; ++y_dim) { // y-axis
1209 for (int x_dim=-1; x_dim<2; ++x_dim) { // x-axis*/
1210
1211 // Iterate over 27 neighbor cells, R = 2*cell width
1212 for (int z_dim=-2; z_dim<3; ++z_dim) { // z-axis
1213 //for (int z_dim=-1; z_dim<2; ++z_dim) { // z-axis
1214 for (int y_dim=-2; y_dim<3; ++y_dim) { // y-axis
1215 for (int x_dim=-2; x_dim<3; ++x_dim) { // x-axis
1216
1217 // Index of neighbor cell this iteration is looking at
1218 targetCell = gridPos + make_int3(x_dim, y_dim, z_dim);
1219
1220 // Get distance modifier for interparticle
1221 // vector, if it crosses a periodic boundary
1222 distmod = MAKE_FLOAT3(0.0, 0.0, 0.0);
1223 if (findDistMod(&targetCell, &distmod) != -1) {
1224
1225 // Calculate linear cell ID
1226 cellID = targetCell.x
1227 + targetCell.y * devC_grid.num[0]
1228 + (devC_grid.num[0] * devC_grid.num[1])
1229 * targetCell.z;
1230
1231 // Lowest particle index in cell
1232 __syncthreads();
1233 startIdx = dev_cellStart[cellID];
1234
1235 // Make sure cell is not empty
1236 if (startIdx != 0xffffffff) {
1237
1238 // Highest particle index in cell
1239 __syncthreads();
1240 endIdx = dev_cellEnd[cellID];
1241
1242 // Iterate over cell particles
1243 for (i=startIdx; i<endIdx; ++i) {
1244
1245 // Read particle position and radius
1246 __syncthreads();
1247 xr = dev_x_sorted[i];
1248 v = dev_vel_sorted[i];
1249 r = xr.w;
1250
1251 // Find center distance
1252 dist = MAKE_FLOAT3(
1253 X.x - xr.x,
1254 X.y - xr.y,
1255 X.z - xr.z);
1256 dist += distmod;
1257 d = length(dist);
1258
1259 // Lens shaped intersection
1260 if ((R - r) < d && d < (R + r)) {
1261 void_volume -=
1262 1.0/(12.0*d) * (
1263 M_PI*(R + r - d)*(R + r - d)
1264 *(d*d + 2.0*d*r - 3.0*r*r
1265 + 2.0*d*R + 6.0*r*R
1266 - 3.0*R*R) );
1267 v_avg += MAKE_FLOAT3(v.x, v.y, v.z);
1268 d_avg += 2.0*r;
1269 n++;
1270 }
1271
1272 // Particle fully contained in cell sphere
1273 if (d <= R - r) {
1274 void_volume -= 4.0/3.0*M_PI*r*r*r;
1275 v_avg += MAKE_FLOAT3(v.x, v.y, v.z);
1276 d_avg += 2.0*r;
1277 n++;
1278 }
1279 }
1280 }
1281 }
1282 }
1283 }
1284 }
1285
1286 if (phi < 0.999) {
1287 v_avg /= n;
1288 d_avg /= n;
1289 }
1290
1291 // Make sure that the porosity is in the interval [0.0;1.0]
1292 phi = fmin(1.00, fmax(0.00, void_volume/cell_volume));
1293 //phi = void_volume/cell_volume;
1294
1295 Float dphi = phi - phi_0;
1296 if (iteration == 0)
1297 dphi = 0.0;
1298
1299 // report values to stdout for debugging
1300 //printf("%d,%d,%d\tphi = %f dphi = %f v_avg = %f,%f,%f d_avg = %f\n",
1301 // x,y,z, phi, dphi, v_avg.x, v_avg.y, v_avg.z, d_avg);
1302
1303 // Save porosity, porosity change, average velocity and average diameter
1304 __syncthreads();
1305 //phi = 0.5; dphi = 0.0; // disable porosity effects
1306 const unsigned int cellidx = idx(x,y,z);
1307 dev_ns_phi[cellidx] = phi*c_phi;
1308 dev_ns_dphi[cellidx] = dphi*c_phi;
1309 dev_ns_vp_avg[cellidx] = v_avg;
1310 dev_ns_d_avg[cellidx] = d_avg;
1311
1312 #ifdef CHECK_FLUID_FINITE
1313 (void)checkFiniteFloat("phi", x, y, z, phi);
1314 (void)checkFiniteFloat("dphi", x, y, z, dphi);
1315 (void)checkFiniteFloat3("v_avg", x, y, z, v_avg);
1316 (void)checkFiniteFloat("d_avg", x, y, z, d_avg);
1317 #endif
1318 } else {
1319
1320 __syncthreads();
1321 const unsigned int cellidx = idx(x,y,z);
1322
1323 //Float phi = 0.5;
1324 //Float dphi = 0.0;
1325 //if (iteration == 20 && x == nx/2 && y == ny/2 && z == nz/2) {
1326 //phi = 0.4;
1327 //dphi = 0.1;
1328 //}
1329 //dev_ns_phi[cellidx] = phi;
1330 //dev_ns_dphi[cellidx] = dphi;
1331 dev_ns_phi[cellidx] = 1.0;
1332 dev_ns_dphi[cellidx] = 0.0;
1333
1334 dev_ns_vp_avg[cellidx] = MAKE_FLOAT3(0.0, 0.0, 0.0);
1335 dev_ns_d_avg[cellidx] = 0.0;
1336 }
1337 }
1338 }
1339
1340 // Find the porosity in each cell on the base of a sphere, centered at the cell
1341 // center.
1342 __global__ void findPorositiesVelocitiesDiametersSphericalGradient(
1343 const unsigned int* __restrict__ dev_cellStart,
1344 const unsigned int* __restrict__ dev_cellEnd,
1345 const Float4* __restrict__ dev_x_sorted,
1346 const Float4* __restrict__ dev_vel_sorted,
1347 Float* __restrict__ dev_ns_phi,
1348 Float* __restrict__ dev_ns_dphi,
1349 Float3* __restrict__ dev_ns_vp_avg,
1350 Float* __restrict__ dev_ns_d_avg,
1351 const unsigned int iteration,
1352 const unsigned int ndem,
1353 const unsigned int np)
1354 {
1355 // 3D thread index
1356 const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
1357 const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
1358 const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
1359
1360 // Grid dimensions
1361 const unsigned int nx = devC_grid.num[0];
1362 const unsigned int ny = devC_grid.num[1];
1363 const unsigned int nz = devC_grid.num[2];
1364
1365 // Cell dimensions
1366 const Float dx = devC_grid.L[0]/nx;
1367 const Float dy = devC_grid.L[1]/ny;
1368 const Float dz = devC_grid.L[2]/nz;
1369
1370 // Cell sphere radius
1371 const Float R = fmin(dx, fmin(dy,dz)); // diameter = 2*cell width
1372
1373 Float4 xr; // particle pos. and radius
1374
1375 // check that we are not outside the fluid grid
1376 if (x < nx && y < ny && z < nz) {
1377
1378 if (np > 0) {
1379
1380 // Cell sphere center position
1381 const Float3 X = MAKE_FLOAT3(
1382 x*dx + 0.5*dx,
1383 y*dy + 0.5*dy,
1384 z*dz + 0.5*dz);
1385
1386 Float d, r;
1387 Float phi = 1.00;
1388 Float4 v;
1389 unsigned int n = 0;
1390
1391 Float3 v_avg = MAKE_FLOAT3(0.0, 0.0, 0.0);
1392 Float d_avg = 0.0;
1393
1394 // Read old porosity
1395 __syncthreads();
1396 Float phi_0 = dev_ns_phi[idx(x,y,z)];
1397
1398 // The cell 3d index
1399 const int3 gridPos = make_int3((int)x,(int)y,(int)z);
1400
1401 // The neighbor cell 3d index
1402 int3 targetCell;
1403
1404 // The distance modifier for particles across periodic boundaries
1405 Float3 distmod;
1406
1407 unsigned int cellID, startIdx, endIdx, i;
1408
1409 // Diagonal strain rate tensor components
1410 Float3 dot_epsilon_ii = MAKE_FLOAT3(0.0, 0.0, 0.0);
1411
1412 // Vector pointing from cell center to particle center
1413 Float3 x_p;
1414
1415 // Normal vector pointing from cell center towards particle center
1416 Float3 n_p;
1417
1418 // Normalized sphere-particle distance
1419 Float q;
1420
1421 // Kernel function derivative value
1422 Float dw_q;
1423
1424 // Iterate over 27 neighbor cells, R = 2*cell width
1425 for (int z_dim=-2; z_dim<3; ++z_dim) { // z-axis
1426 for (int y_dim=-2; y_dim<3; ++y_dim) { // y-axis
1427 for (int x_dim=-2; x_dim<3; ++x_dim) { // x-axis
1428
1429 // Index of neighbor cell this iteration is looking at
1430 targetCell = gridPos + make_int3(x_dim, y_dim, z_dim);
1431
1432 // Get distance modifier for interparticle
1433 // vector, if it crosses a periodic boundary
1434 distmod = MAKE_FLOAT3(0.0, 0.0, 0.0);
1435 if (findDistMod(&targetCell, &distmod) != -1) {
1436
1437 // Calculate linear cell ID
1438 cellID = targetCell.x
1439 + targetCell.y * devC_grid.num[0]
1440 + (devC_grid.num[0] * devC_grid.num[1])
1441 * targetCell.z;
1442
1443 // Lowest particle index in cell
1444 startIdx = dev_cellStart[cellID];
1445
1446 // Make sure cell is not empty
1447 if (startIdx != 0xffffffff) {
1448
1449 // Highest particle index in cell
1450 endIdx = dev_cellEnd[cellID];
1451
1452 // Iterate over cell particles
1453 for (i=startIdx; i<endIdx; ++i) {
1454
1455 // Read particle position and radius
1456 __syncthreads();
1457 xr = dev_x_sorted[i];
1458 v = dev_vel_sorted[i];
1459 r = xr.w;
1460
1461 // Find center distance and normal vector
1462 x_p = MAKE_FLOAT3(
1463 xr.x - X.x,
1464 xr.y - X.y,
1465 xr.z - X.z);
1466 d = length(x_p);
1467 n_p = x_p/d;
1468 q = d/R;
1469
1470
1471 dw_q = 0.0;
1472 if (0.0 < q && q < 1.0) {
1473 // kernel for 2d disc approximation
1474 //dw_q = -1.0;
1475
1476 // kernel for 3d sphere approximation
1477 dw_q = -1.5*pow(-q + 1.0, 0.5)
1478 *pow(q + 1.0, 0.5)
1479 + 0.5*pow(-q + 1.0, 1.5)
1480 *pow(q + 1.0, -0.5);
1481 }
1482
1483 v_avg += MAKE_FLOAT3(v.x, v.y, v.z);
1484 d_avg += 2.0*r;
1485 dot_epsilon_ii +=
1486 dw_q*MAKE_FLOAT3(v.x, v.y, v.z)*n_p;
1487 n++;
1488
1489 }
1490 }
1491 }
1492 }
1493 }
1494 }
1495
1496 dot_epsilon_ii /= R;
1497 const Float dot_epsilon_kk =
1498 dot_epsilon_ii.x + dot_epsilon_ii.y + dot_epsilon_ii.z;
1499
1500 const Float dphi =
1501 (1.0 - fmin(phi_0,0.99))*dot_epsilon_kk*ndem*devC_dt;
1502 phi = phi_0 + dphi/(ndem*devC_dt);
1503
1504 //if (dot_epsilon_kk != 0.0)
1505 //printf("%d,%d,%d\tdot_epsilon_kk = %f\tdphi = %f\tphi = %f\n",
1506 //x,y,z, dot_epsilon_kk, dphi, phi);
1507
1508 // Make sure that the porosity is in the interval [0.0;1.0]
1509 phi = fmin(1.00, fmax(0.00, phi));
1510
1511 if (phi < 0.999) {
1512 v_avg /= n;
1513 d_avg /= n;
1514 }
1515
1516 // report values to stdout for debugging
1517 //printf("%d,%d,%d\tphi = %f dphi = %f v_avg = %f,%f,%f d_avg = %f\n",
1518 // x,y,z, phi, dphi, v_avg.x, v_avg.y, v_avg.z, d_avg);
1519
1520 // Save porosity, porosity change, average velocity and average diameter
1521 __syncthreads();
1522 const unsigned int cellidx = idx(x,y,z);
1523 //phi = 0.5; dphi = 0.0; // disable porosity effects const unsigned int cellidx = idx(x,y,z);
1524 dev_ns_phi[cellidx] = phi;
1525 dev_ns_dphi[cellidx] = dphi;
1526 dev_ns_vp_avg[cellidx] = v_avg;
1527 dev_ns_d_avg[cellidx] = d_avg;
1528
1529 #ifdef CHECK_FLUID_FINITE
1530 (void)checkFiniteFloat("phi", x, y, z, phi);
1531 (void)checkFiniteFloat("dphi", x, y, z, dphi);
1532 (void)checkFiniteFloat3("v_avg", x, y, z, v_avg);
1533 (void)checkFiniteFloat("d_avg", x, y, z, d_avg);
1534 #endif
1535 } else {
1536 // np=0: there are no particles
1537
1538 __syncthreads();
1539 const unsigned int cellidx = idx(x,y,z);
1540
1541 dev_ns_dphi[cellidx] = 0.0;
1542
1543 dev_ns_vp_avg[cellidx] = MAKE_FLOAT3(0.0, 0.0, 0.0);
1544 dev_ns_d_avg[cellidx] = 0.0;
1545 }
1546 }
1547 }
1548
1549 // Modulate the hydraulic pressure at the upper boundary
1550 __global__ void setUpperPressureNS(
1551 Float* __restrict__ dev_ns_p,
1552 Float* __restrict__ dev_ns_epsilon,
1553 Float* __restrict__ dev_ns_epsilon_new,
1554 const Float beta,
1555 const Float new_pressure)
1556 {
1557 // 3D thread index
1558 const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
1559 const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
1560 const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
1561
1562 // check that the thread is located at the top boundary
1563 if (x < devC_grid.num[0] &&
1564 y < devC_grid.num[1] &&
1565 z == devC_grid.num[2]-1) {
1566
1567 const unsigned int cellidx = idx(x,y,z);
1568
1569 // Read the current pressure
1570 const Float pressure = dev_ns_p[cellidx];
1571
1572 // Determine the new epsilon boundary condition
1573 const Float epsilon = new_pressure - beta*pressure;
1574
1575 // Write the new pressure and epsilon values to the top boundary cells
1576 __syncthreads();
1577 dev_ns_epsilon[cellidx] = epsilon;
1578 dev_ns_epsilon_new[cellidx] = epsilon;
1579 dev_ns_p[cellidx] = new_pressure;
1580
1581 #ifdef CHECK_FLUID_FINITE
1582 (void)checkFiniteFloat("epsilon", x, y, z, epsilon);
1583 (void)checkFiniteFloat("new_pressure", x, y, z, new_pressure);
1584 #endif
1585 }
1586 }
1587
1588 // Find the gradient in a cell in a homogeneous, cubic 3D scalar field using
1589 // finite central differences
1590 __device__ Float3 gradient(
1591 const Float* __restrict__ dev_scalarfield,
1592 const unsigned int x,
1593 const unsigned int y,
1594 const unsigned int z,
1595 const Float dx,
1596 const Float dy,
1597 const Float dz)
1598 {
1599 // Read 6 neighbor cells
1600 __syncthreads();
1601 //const Float p = dev_scalarfield[idx(x,y,z)];
1602 const Float xn = dev_scalarfield[idx(x-1,y,z)];
1603 const Float xp = dev_scalarfield[idx(x+1,y,z)];
1604 const Float yn = dev_scalarfield[idx(x,y-1,z)];
1605 const Float yp = dev_scalarfield[idx(x,y+1,z)];
1606 const Float zn = dev_scalarfield[idx(x,y,z-1)];
1607 const Float zp = dev_scalarfield[idx(x,y,z+1)];
1608
1609 //__syncthreads();
1610 //if (p != 0.0)
1611 //printf("p[%d,%d,%d] =\t%f\n", x,y,z, p);
1612
1613 // Calculate central-difference gradients
1614 return MAKE_FLOAT3(
1615 (xp - xn)/(2.0*dx),
1616 (yp - yn)/(2.0*dy),
1617 (zp - zn)/(2.0*dz));
1618 }
1619
1620 // Find the divergence in a cell in a homogeneous, cubic, 3D vector field
1621 __device__ Float divergence(
1622 const Float* __restrict__ dev_vectorfield_x,
1623 const Float* __restrict__ dev_vectorfield_y,
1624 const Float* __restrict__ dev_vectorfield_z,
1625 const unsigned int x,
1626 const unsigned int y,
1627 const unsigned int z,
1628 const Float dx,
1629 const Float dy,
1630 const Float dz)
1631 {
1632 // Read 6 cell-face values
1633 __syncthreads();
1634 const Float xn = dev_vectorfield_x[vidx(x,y,z)];
1635 const Float xp = dev_vectorfield_x[vidx(x+1,y,z)];
1636 const Float yn = dev_vectorfield_y[vidx(x,y,z)];
1637 const Float yp = dev_vectorfield_y[vidx(x,y+1,z)];
1638 const Float zn = dev_vectorfield_z[vidx(x,y,z)];
1639 const Float zp = dev_vectorfield_z[vidx(x,y,z+1)];
1640
1641 // Calculate the central difference gradrients and the divergence
1642 return
1643 (xp - xn)/dx +
1644 (yp - yn)/dy +
1645 (zp - zn)/dz;
1646 }
1647
1648 // Find the divergence of a tensor field
1649 __device__ Float3 divergence_tensor(
1650 const Float* __restrict__ dev_tensorfield,
1651 const unsigned int x,
1652 const unsigned int y,
1653 const unsigned int z,
1654 const Float dx,
1655 const Float dy,
1656 const Float dz)
1657 {
1658 __syncthreads();
1659
1660 // Read the tensor in the 6 neighbor cells
1661 const Float t_xx_xp = dev_tensorfield[idx(x+1,y,z)*6];
1662 const Float t_xy_xp = dev_tensorfield[idx(x+1,y,z)*6+1];
1663 const Float t_xz_xp = dev_tensorfield[idx(x+1,y,z)*6+2];
1664 const Float t_yy_xp = dev_tensorfield[idx(x+1,y,z)*6+3];
1665 const Float t_yz_xp = dev_tensorfield[idx(x+1,y,z)*6+4];
1666 const Float t_zz_xp = dev_tensorfield[idx(x+1,y,z)*6+5];
1667
1668 const Float t_xx_xn = dev_tensorfield[idx(x-1,y,z)*6];
1669 const Float t_xy_xn = dev_tensorfield[idx(x-1,y,z)*6+1];
1670 const Float t_xz_xn = dev_tensorfield[idx(x-1,y,z)*6+2];
1671 const Float t_yy_xn = dev_tensorfield[idx(x-1,y,z)*6+3];
1672 const Float t_yz_xn = dev_tensorfield[idx(x-1,y,z)*6+4];
1673 const Float t_zz_xn = dev_tensorfield[idx(x-1,y,z)*6+5];
1674
1675 const Float t_xx_yp = dev_tensorfield[idx(x,y+1,z)*6];
1676 const Float t_xy_yp = dev_tensorfield[idx(x,y+1,z)*6+1];
1677 const Float t_xz_yp = dev_tensorfield[idx(x,y+1,z)*6+2];
1678 const Float t_yy_yp = dev_tensorfield[idx(x,y+1,z)*6+3];
1679 const Float t_yz_yp = dev_tensorfield[idx(x,y+1,z)*6+4];
1680 const Float t_zz_yp = dev_tensorfield[idx(x,y+1,z)*6+5];
1681
1682 const Float t_xx_yn = dev_tensorfield[idx(x,y-1,z)*6];
1683 const Float t_xy_yn = dev_tensorfield[idx(x,y-1,z)*6+1];
1684 const Float t_xz_yn = dev_tensorfield[idx(x,y-1,z)*6+2];
1685 const Float t_yy_yn = dev_tensorfield[idx(x,y-1,z)*6+3];
1686 const Float t_yz_yn = dev_tensorfield[idx(x,y-1,z)*6+4];
1687 const Float t_zz_yn = dev_tensorfield[idx(x,y-1,z)*6+5];
1688
1689 const Float t_xx_zp = dev_tensorfield[idx(x,y,z+1)*6];
1690 const Float t_xy_zp = dev_tensorfield[idx(x,y,z+1)*6+1];
1691 const Float t_xz_zp = dev_tensorfield[idx(x,y,z+1)*6+2];
1692 const Float t_yy_zp = dev_tensorfield[idx(x,y,z+1)*6+3];
1693 const Float t_yz_zp = dev_tensorfield[idx(x,y,z+1)*6+4];
1694 const Float t_zz_zp = dev_tensorfield[idx(x,y,z+1)*6+5];
1695
1696 const Float t_xx_zn = dev_tensorfield[idx(x,y,z-1)*6];
1697 const Float t_xy_zn = dev_tensorfield[idx(x,y,z-1)*6+1];
1698 const Float t_xz_zn = dev_tensorfield[idx(x,y,z-1)*6+2];
1699 const Float t_yy_zn = dev_tensorfield[idx(x,y,z-1)*6+3];
1700 const Float t_yz_zn = dev_tensorfield[idx(x,y,z-1)*6+4];
1701 const Float t_zz_zn = dev_tensorfield[idx(x,y,z-1)*6+5];
1702
1703 // Calculate div(phi*tau)
1704 const Float3 div_tensor = MAKE_FLOAT3(
1705 // x
1706 (t_xx_xp - t_xx_xn)/dx +
1707 (t_xy_yp - t_xy_yn)/dy +
1708 (t_xz_zp - t_xz_zn)/dz,
1709 // y
1710 (t_xy_xp - t_xy_xn)/dx +
1711 (t_yy_yp - t_yy_yn)/dy +
1712 (t_yz_zp - t_yz_zn)/dz,
1713 // z
1714 (t_xz_xp - t_xz_xn)/dx +
1715 (t_yz_yp - t_yz_yn)/dy +
1716 (t_zz_zp - t_zz_zn)/dz);
1717
1718 #ifdef CHECK_FLUID_FINITE
1719 (void)checkFiniteFloat3("div_tensor", x, y, z, div_tensor);
1720 #endif
1721 return div_tensor;
1722 }
1723
1724
1725 // Find the spatial gradient in e.g. pressures per cell
1726 // using first order central differences
1727 __global__ void findNSgradientsDev(
1728 const Float* __restrict__ dev_scalarfield, // in
1729 Float3* __restrict__ dev_vectorfield) // out
1730 {
1731 // 3D thread index
1732 const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
1733 const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
1734 const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
1735
1736 // Grid dimensions
1737 const unsigned int nx = devC_grid.num[0];
1738 const unsigned int ny = devC_grid.num[1];
1739 const unsigned int nz = devC_grid.num[2];
1740
1741 // Grid sizes
1742 const Float dx = devC_grid.L[0]/nx;
1743 const Float dy = devC_grid.L[1]/ny;
1744 const Float dz = devC_grid.L[2]/nz;
1745
1746 // 1D thread index
1747 const unsigned int cellidx = idx(x,y,z);
1748
1749 // Check that we are not outside the fluid grid
1750 if (x < nx && y < ny && z < nz) {
1751
1752 const Float3 grad = gradient(dev_scalarfield, x, y, z, dx, dy, dz);
1753
1754 // Write gradient
1755 __syncthreads();
1756 dev_vectorfield[cellidx] = grad;
1757
1758 #ifdef CHECK_FLUID_FINITE
1759 (void)checkFiniteFloat3("grad", x, y, z, grad);
1760 #endif
1761 }
1762 }
1763
1764 // Find the outer product of v v
1765 __global__ void findvvOuterProdNS(
1766 const Float3* __restrict__ dev_ns_v, // in
1767 Float* __restrict__ dev_ns_v_prod) // out
1768 {
1769 const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
1770 const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
1771 const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
1772
1773 // 1D thread index
1774 const unsigned int cellidx6 = idx(x,y,z)*6;
1775
1776 // Check that we are not outside the fluid grid
1777 if (x < devC_grid.num[0] && y < devC_grid.num[1] && z < devC_grid.num[2]) {
1778
1779 __syncthreads();
1780 const Float3 v = dev_ns_v[idx(x,y,z)];
1781
1782 // The outer product (v v) looks like:
1783 // [[ v_x^2 v_x*v_y v_x*v_z ]
1784 // [ v_y*v_x v_y^2 v_y*v_z ]
1785 // [ v_z*v_x v_z*v_y v_z^2 ]]
1786
1787 // The tensor is symmetrical: value i,j = j,i.
1788 // Only the upper triangle is saved, with the cells given a linear index
1789 // enumerated as:
1790 // [[ 0 1 2 ]
1791 // [ 3 4 ]
1792 // [ 5 ]]
1793
1794 __syncthreads();
1795 dev_ns_v_prod[cellidx6] = v.x*v.x;
1796 dev_ns_v_prod[cellidx6+1] = v.x*v.y;
1797 dev_ns_v_prod[cellidx6+2] = v.x*v.z;
1798 dev_ns_v_prod[cellidx6+3] = v.y*v.y;
1799 dev_ns_v_prod[cellidx6+4] = v.y*v.z;
1800 dev_ns_v_prod[cellidx6+5] = v.z*v.z;
1801
1802 #ifdef CHECK_FLUID_FINITE
1803 (void)checkFiniteFloat("v_prod[0]", x, y, z, v.x*v.x);
1804 (void)checkFiniteFloat("v_prod[1]", x, y, z, v.x*v.y);
1805 (void)checkFiniteFloat("v_prod[2]", x, y, z, v.x*v.z);
1806 (void)checkFiniteFloat("v_prod[3]", x, y, z, v.y*v.y);
1807 (void)checkFiniteFloat("v_prod[4]", x, y, z, v.y*v.z);
1808 (void)checkFiniteFloat("v_prod[5]", x, y, z, v.z*v.z);
1809 #endif
1810 }
1811 }
1812
1813
1814 // Find the fluid stress tensor. It is symmetrical, and can thus be saved in 6
1815 // values in 3D.
1816 __global__ void findNSstressTensor(
1817 const Float3* __restrict__ dev_ns_v, // in
1818 const Float mu, // in
1819 Float* __restrict__ dev_ns_tau) // out
1820 {
1821 // 3D thread index
1822 const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
1823 const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
1824 const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
1825
1826 // Grid dimensions
1827 const unsigned int nx = devC_grid.num[0];
1828 const unsigned int ny = devC_grid.num[1];
1829 const unsigned int nz = devC_grid.num[2];
1830
1831 // Cell sizes
1832 const Float dx = devC_grid.L[0]/nx;
1833 const Float dy = devC_grid.L[1]/ny;
1834 const Float dz = devC_grid.L[2]/nz;
1835
1836 // 1D thread index
1837 const unsigned int cellidx6 = idx(x,y,z)*6;
1838
1839 // Check that we are not outside the fluid grid
1840 if (x < devC_grid.num[0] && y < devC_grid.num[1] && z < devC_grid.num[2]) {
1841
1842 // The fluid stress tensor (tau) looks like
1843 // [[ tau_xx tau_xy tau_xz ]
1844 // [ tau_yx tau_xy tau_yz ]
1845 // [ tau_zx tau_zy tau_zz ]]
1846
1847 // The tensor is symmetrical: value i,j = j,i.
1848 // Only the upper triangle is saved, with the cells given a linear index
1849 // enumerated as:
1850 // [[ 0 1 2 ]
1851 // [ 3 4 ]
1852 // [ 5 ]]
1853
1854 // Read neighbor values for central differences
1855 __syncthreads();
1856 const Float3 xp = dev_ns_v[idx(x+1,y,z)];
1857 const Float3 xn = dev_ns_v[idx(x-1,y,z)];
1858 const Float3 yp = dev_ns_v[idx(x,y+1,z)];
1859 const Float3 yn = dev_ns_v[idx(x,y-1,z)];
1860 const Float3 zp = dev_ns_v[idx(x,y,z+1)];
1861 const Float3 zn = dev_ns_v[idx(x,y,z-1)];
1862
1863 // The diagonal stress tensor components
1864 const Float tau_xx = 2.0*mu*(xp.x - xn.x)/(2.0*dx);
1865 const Float tau_yy = 2.0*mu*(yp.y - yn.y)/(2.0*dy);
1866 const Float tau_zz = 2.0*mu*(zp.z - zn.z)/(2.0*dz);
1867
1868 // The off-diagonal stress tensor components
1869 const Float tau_xy =
1870 mu*((yp.x - yn.x)/(2.0*dy) + (xp.y - xn.y)/(2.0*dx));
1871 const Float tau_xz =
1872 mu*((zp.x - zn.x)/(2.0*dz) + (xp.z - xn.z)/(2.0*dx));
1873 const Float tau_yz =
1874 mu*((zp.y - zn.y)/(2.0*dz) + (yp.z - yn.z)/(2.0*dy));
1875
1876 /*
1877 if (x == 0 && y == 0 && z == 0)
1878 printf("mu = %f\n", mu);
1879 if (tau_xz > 1.0e-6)
1880 printf("%d,%d,%d\ttau_xx = %f\n", x,y,z, tau_xx);
1881 if (tau_yz > 1.0e-6)
1882 printf("%d,%d,%d\ttau_yy = %f\n", x,y,z, tau_yy);
1883 if (tau_zz > 1.0e-6)
1884 printf("%d,%d,%d\ttau_zz = %f\n", x,y,z, tau_zz);
1885 */
1886
1887 // Store values in global memory
1888 __syncthreads();
1889 dev_ns_tau[cellidx6] = tau_xx;
1890 dev_ns_tau[cellidx6+1] = tau_xy;
1891 dev_ns_tau[cellidx6+2] = tau_xz;
1892 dev_ns_tau[cellidx6+3] = tau_yy;
1893 dev_ns_tau[cellidx6+4] = tau_yz;
1894 dev_ns_tau[cellidx6+5] = tau_zz;
1895
1896 #ifdef CHECK_FLUID_FINITE
1897 (void)checkFiniteFloat("tau_xx", x, y, z, tau_xx);
1898 (void)checkFiniteFloat("tau_xy", x, y, z, tau_xy);
1899 (void)checkFiniteFloat("tau_xz", x, y, z, tau_xz);
1900 (void)checkFiniteFloat("tau_yy", x, y, z, tau_yy);
1901 (void)checkFiniteFloat("tau_yz", x, y, z, tau_yz);
1902 (void)checkFiniteFloat("tau_zz", x, y, z, tau_zz);
1903 #endif
1904 }
1905 }
1906
1907
1908 // Find the divergence of phi*v*v
1909 __global__ void findNSdivphiviv(
1910 const Float* __restrict__ dev_ns_phi, // in
1911 const Float3* __restrict__ dev_ns_v, // in
1912 Float3* __restrict__ dev_ns_div_phi_vi_v) // out
1913 {
1914 // 3D thread index
1915 const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
1916 const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
1917 const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
1918
1919 // Grid dimensions
1920 const unsigned int nx = devC_grid.num[0];
1921 const unsigned int ny = devC_grid.num[1];
1922 const unsigned int nz = devC_grid.num[2];
1923
1924 // Cell sizes
1925 const Float dx = devC_grid.L[0]/nx;
1926 const Float dy = devC_grid.L[1]/ny;
1927 const Float dz = devC_grid.L[2]/nz;
1928
1929 // 1D thread index
1930 const unsigned int cellidx = idx(x,y,z);
1931
1932 // Check that we are not outside the fluid grid
1933 if (x < nx && y < ny && z < nz) {
1934
1935 // Read porosity and velocity in the 6 neighbor cells
1936 __syncthreads();
1937 const Float phi_xn = dev_ns_phi[idx(x-1,y,z)];
1938 const Float phi = dev_ns_phi[idx(x,y,z)];
1939 const Float phi_xp = dev_ns_phi[idx(x+1,y,z)];
1940 const Float phi_yn = dev_ns_phi[idx(x,y-1,z)];
1941 const Float phi_yp = dev_ns_phi[idx(x,y+1,z)];
1942 const Float phi_zn = dev_ns_phi[idx(x,y,z-1)];
1943 const Float phi_zp = dev_ns_phi[idx(x,y,z+1)];
1944
1945 const Float3 v_xn = dev_ns_v[idx(x-1,y,z)];
1946 const Float3 v = dev_ns_v[idx(x,y,z)];
1947 const Float3 v_xp = dev_ns_v[idx(x+1,y,z)];
1948 const Float3 v_yn = dev_ns_v[idx(x,y-1,z)];
1949 const Float3 v_yp = dev_ns_v[idx(x,y+1,z)];
1950 const Float3 v_zn = dev_ns_v[idx(x,y,z-1)];
1951 const Float3 v_zp = dev_ns_v[idx(x,y,z+1)];
1952
1953 // Calculate upwind coefficients
1954 //*
1955 const Float3 a = MAKE_FLOAT3(
1956 copysign(1.0, v.x),
1957 copysign(1.0, v.y),
1958 copysign(1.0, v.z));
1959
1960 // Calculate the divergence based on the upwind differences (Griebel et
1961 // al. 1998, eq. 3.9)
1962 const Float3 div_uw = MAKE_FLOAT3(
1963 // x
1964 ((1.0 + a.x)*(phi*v.x*v.x - phi_xn*v_xn.x*v_xn.x) +
1965 (1.0 - a.x)*(phi_xp*v_xp.x*v_xp.x - phi*v.x*v.x))/(2.0*dx) +
1966
1967 ((1.0 + a.y)*(phi*v.x*v.y - phi_yn*v_yn.x*v_yn.y) +
1968 (1.0 - a.y)*(phi_yp*v_yp.x*v_yp.y - phi*v.x*v.y))/(2.0*dy) +
1969
1970 ((1.0 + a.z)*(phi*v.x*v.z - phi_zn*v_zn.x*v_zn.z) +
1971 (1.0 - a.z)*(phi_zp*v_zp.x*v_zp.z - phi*v.x*v.z))/(2.0*dz),
1972
1973 // y
1974 ((1.0 + a.x)*(phi*v.y*v.x - phi_xn*v_xn.y*v_xn.x) +
1975 (1.0 - a.x)*(phi_xp*v_xp.y*v_xp.x - phi*v.y*v.x))/(2.0*dx) +
1976
1977 ((1.0 + a.y)*(phi*v.y*v.y - phi_yn*v_yn.y*v_yn.y) +
1978 (1.0 - a.y)*(phi_yp*v_yp.y*v_yp.y - phi*v.y*v.y))/(2.0*dy) +
1979
1980 ((1.0 + a.z)*(phi*v.y*v.z - phi_zn*v_zn.y*v_zn.z) +
1981 (1.0 - a.z)*(phi_zp*v_zp.y*v_zp.z - phi*v.y*v.z))/(2.0*dz),
1982
1983 // z
1984 ((1.0 + a.x)*(phi*v.z*v.x - phi_xn*v_xn.z*v_xn.x) +
1985 (1.0 - a.x)*(phi_xp*v_xp.z*v_xp.x - phi*v.z*v.x))/(2.0*dx) +
1986
1987 ((1.0 + a.y)*(phi*v.z*v.y - phi_yn*v_yn.z*v_yn.y) +
1988 (1.0 - a.y)*(phi_yp*v_yp.z*v_yp.y - phi*v.z*v.y))/(2.0*dy) +
1989
1990 ((1.0 + a.z)*(phi*v.z*v.z - phi_zn*v_zn.z*v_zn.z) +
1991 (1.0 - a.z)*(phi_zp*v_zp.z*v_zp.z - phi*v.z*v.z))/(2.0*dz));
1992
1993
1994 // Calculate the divergence based on the central-difference gradients
1995 const Float3 div_cd = MAKE_FLOAT3(
1996 // x
1997 (phi_xp*v_xp.x*v_xp.x - phi_xn*v_xn.x*v_xn.x)/(2.0*dx) +
1998 (phi_yp*v_yp.x*v_yp.y - phi_yn*v_yn.x*v_yn.y)/(2.0*dy) +
1999 (phi_zp*v_zp.x*v_zp.z - phi_zn*v_zn.x*v_zn.z)/(2.0*dz),
2000 // y
2001 (phi_xp*v_xp.y*v_xp.x - phi_xn*v_xn.y*v_xn.x)/(2.0*dx) +
2002 (phi_yp*v_yp.y*v_yp.y - phi_yn*v_yn.y*v_yn.y)/(2.0*dy) +
2003 (phi_zp*v_zp.y*v_zp.z - phi_zn*v_zn.y*v_zn.z)/(2.0*dz),
2004 // z
2005 (phi_xp*v_xp.z*v_xp.x - phi_xn*v_xn.z*v_xn.x)/(2.0*dx) +
2006 (phi_yp*v_yp.z*v_yp.y - phi_yn*v_yn.z*v_yn.y)/(2.0*dy) +
2007 (phi_zp*v_zp.z*v_zp.z - phi_zn*v_zn.z*v_zn.z)/(2.0*dz));
2008
2009 // Weighting parameter
2010 const Float tau = 0.5;
2011
2012 // Determine the weighted average of both discretizations
2013 const Float3 div_phi_vi_v = tau*div_uw + (1.0 - tau)*div_cd;
2014 //*/
2015
2016 /*
2017 // Calculate the divergence: div(phi*v_i*v)
2018 const Float3 div_phi_vi_v = MAKE_FLOAT3(
2019 // x
2020 (phi_xp*v_xp.x*v_xp.x - phi_xn*v_xn.x*v_xn.x)/(2.0*dx) +
2021 (phi_yp*v_yp.x*v_yp.y - phi_yn*v_yn.x*v_yn.y)/(2.0*dy) +
2022 (phi_zp*v_zp.x*v_zp.z - phi_zn*v_zn.x*v_zn.z)/(2.0*dz),
2023 // y
2024 (phi_xp*v_xp.y*v_xp.x - phi_xn*v_xn.y*v_xn.x)/(2.0*dx) +
2025 (phi_yp*v_yp.y*v_yp.y - phi_yn*v_yn.y*v_yn.y)/(2.0*dy) +
2026 (phi_zp*v_zp.y*v_zp.z - phi_zn*v_zn.y*v_zn.z)/(2.0*dz),
2027 // z
2028 (phi_xp*v_xp.z*v_xp.x - phi_xn*v_xn.z*v_xn.x)/(2.0*dx) +
2029 (phi_yp*v_yp.z*v_yp.y - phi_yn*v_yn.z*v_yn.y)/(2.0*dy) +
2030 (phi_zp*v_zp.z*v_zp.z - phi_zn*v_zn.z*v_zn.z)/(2.0*dz));
2031 // */
2032
2033 // Write divergence
2034 __syncthreads();
2035 dev_ns_div_phi_vi_v[cellidx] = div_phi_vi_v;
2036
2037 //printf("div(phi*v*v) [%d,%d,%d] = %f, %f, %f\n", x,y,z,
2038 //div_phi_vi_v.x, div_phi_vi_v.y, div_phi_vi_v.z);
2039
2040 #ifdef CHECK_FLUID_FINITE
2041 (void)checkFiniteFloat3("div_phi_vi_v", x, y, z, div_phi_vi_v);
2042 #endif
2043 }
2044 }
2045
2046 __global__ void findNSdivtau(
2047 const Float* __restrict__ dev_ns_tau, // in
2048 Float3* __restrict__ dev_ns_div_tau) // out
2049 {
2050 // 3D thread index
2051 const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
2052 const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
2053 const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
2054
2055 // Grid dimensions
2056 const unsigned int nx = devC_grid.num[0];
2057 const unsigned int ny = devC_grid.num[1];
2058 const unsigned int nz = devC_grid.num[2];
2059
2060 // Cell sizes
2061 const Float dx = devC_grid.L[0]/nx;
2062 const Float dy = devC_grid.L[1]/ny;
2063 const Float dz = devC_grid.L[2]/nz;
2064
2065 // 1D thread index
2066 const unsigned int cellidx = idx(x,y,z);
2067
2068 // Check that we are not outside the fluid grid
2069 if (x < nx && y < ny && z < nz) {
2070
2071 __syncthreads();
2072 const Float3 div_tau =
2073 divergence_tensor(dev_ns_tau, x, y, z, dx, dy, dz);
2074
2075 __syncthreads();
2076 dev_ns_div_tau[cellidx] = div_tau;
2077 }
2078 }
2079
2080
2081 // Find the divergence of phi*tau
2082 __global__ void findNSdivphitau(
2083 const Float* __restrict__ dev_ns_phi, // in
2084 const Float* __restrict__ dev_ns_tau, // in
2085 Float3* __restrict__ dev_ns_div_phi_tau) // out
2086 {
2087 // 3D thread index
2088 const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
2089 const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
2090 const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
2091
2092 // Grid dimensions
2093 const unsigned int nx = devC_grid.num[0];
2094 const unsigned int ny = devC_grid.num[1];
2095 const unsigned int nz = devC_grid.num[2];
2096
2097 // Cell sizes
2098 const Float dx = devC_grid.L[0]/nx;
2099 const Float dy = devC_grid.L[1]/ny;
2100 const Float dz = devC_grid.L[2]/nz;
2101
2102 // 1D thread index
2103 const unsigned int cellidx = idx(x,y,z);
2104
2105 // Check that we are not outside the fluid grid
2106 if (x < nx && y < ny && z < nz) {
2107
2108 // Read the porosity in the 6 neighbor cells
2109 __syncthreads();
2110 const Float phi_xn = dev_ns_phi[idx(x-1,y,z)];
2111 const Float phi_xp = dev_ns_phi[idx(x+1,y,z)];
2112 const Float phi_yn = dev_ns_phi[idx(x,y-1,z)];
2113 const Float phi_yp = dev_ns_phi[idx(x,y+1,z)];
2114 const Float phi_zn = dev_ns_phi[idx(x,y,z-1)];
2115 const Float phi_zp = dev_ns_phi[idx(x,y,z+1)];
2116
2117 // Read the stress tensor in the 6 neighbor cells
2118 const Float tau_xx_xp = dev_ns_tau[idx(x+1,y,z)*6];
2119 const Float tau_xy_xp = dev_ns_tau[idx(x+1,y,z)*6+1];
2120 const Float tau_xz_xp = dev_ns_tau[idx(x+1,y,z)*6+2];
2121 const Float tau_yy_xp = dev_ns_tau[idx(x+1,y,z)*6+3];
2122 const Float tau_yz_xp = dev_ns_tau[idx(x+1,y,z)*6+4];
2123 const Float tau_zz_xp = dev_ns_tau[idx(x+1,y,z)*6+5];
2124
2125 const Float tau_xx_xn = dev_ns_tau[idx(x-1,y,z)*6];
2126 const Float tau_xy_xn = dev_ns_tau[idx(x-1,y,z)*6+1];
2127 const Float tau_xz_xn = dev_ns_tau[idx(x-1,y,z)*6+2];
2128 const Float tau_yy_xn = dev_ns_tau[idx(x-1,y,z)*6+3];
2129 const Float tau_yz_xn = dev_ns_tau[idx(x-1,y,z)*6+4];
2130 const Float tau_zz_xn = dev_ns_tau[idx(x-1,y,z)*6+5];
2131
2132 const Float tau_xx_yp = dev_ns_tau[idx(x,y+1,z)*6];
2133 const Float tau_xy_yp = dev_ns_tau[idx(x,y+1,z)*6+1];
2134 const Float tau_xz_yp = dev_ns_tau[idx(x,y+1,z)*6+2];
2135 const Float tau_yy_yp = dev_ns_tau[idx(x,y+1,z)*6+3];
2136 const Float tau_yz_yp = dev_ns_tau[idx(x,y+1,z)*6+4];
2137 const Float tau_zz_yp = dev_ns_tau[idx(x,y+1,z)*6+5];
2138
2139 const Float tau_xx_yn = dev_ns_tau[idx(x,y-1,z)*6];
2140 const Float tau_xy_yn = dev_ns_tau[idx(x,y-1,z)*6+1];
2141 const Float tau_xz_yn = dev_ns_tau[idx(x,y-1,z)*6+2];
2142 const Float tau_yy_yn = dev_ns_tau[idx(x,y-1,z)*6+3];
2143 const Float tau_yz_yn = dev_ns_tau[idx(x,y-1,z)*6+4];
2144 const Float tau_zz_yn = dev_ns_tau[idx(x,y-1,z)*6+5];
2145
2146 const Float tau_xx_zp = dev_ns_tau[idx(x,y,z+1)*6];
2147 const Float tau_xy_zp = dev_ns_tau[idx(x,y,z+1)*6+1];
2148 const Float tau_xz_zp = dev_ns_tau[idx(x,y,z+1)*6+2];
2149 const Float tau_yy_zp = dev_ns_tau[idx(x,y,z+1)*6+3];
2150 const Float tau_yz_zp = dev_ns_tau[idx(x,y,z+1)*6+4];
2151 const Float tau_zz_zp = dev_ns_tau[idx(x,y,z+1)*6+5];
2152
2153 const Float tau_xx_zn = dev_ns_tau[idx(x,y,z-1)*6];
2154 const Float tau_xy_zn = dev_ns_tau[idx(x,y,z-1)*6+1];
2155 const Float tau_xz_zn = dev_ns_tau[idx(x,y,z-1)*6+2];
2156 const Float tau_yy_zn = dev_ns_tau[idx(x,y,z-1)*6+3];
2157 const Float tau_yz_zn = dev_ns_tau[idx(x,y,z-1)*6+4];
2158 const Float tau_zz_zn = dev_ns_tau[idx(x,y,z-1)*6+5];
2159
2160 // Calculate div(phi*tau)
2161 const Float3 div_phi_tau = MAKE_FLOAT3(
2162 // x
2163 (phi_xp*tau_xx_xp - phi_xn*tau_xx_xn)/dx +
2164 (phi_yp*tau_xy_yp - phi_yn*tau_xy_yn)/dy +
2165 (phi_zp*tau_xz_zp - phi_zn*tau_xz_zn)/dz,
2166 // y
2167 (phi_xp*tau_xy_xp - phi_xn*tau_xy_xn)/dx +
2168 (phi_yp*tau_yy_yp - phi_yn*tau_yy_yn)/dy +
2169 (phi_zp*tau_yz_zp - phi_zn*tau_yz_zn)/dz,
2170 // z
2171 (phi_xp*tau_xz_xp - phi_xn*tau_xz_xn)/dx +
2172 (phi_yp*tau_yz_yp - phi_yn*tau_yz_yn)/dy +
2173 (phi_zp*tau_zz_zp - phi_zn*tau_zz_zn)/dz);
2174
2175 // Write divergence
2176 __syncthreads();
2177 dev_ns_div_phi_tau[cellidx] = div_phi_tau;
2178
2179 #ifdef CHECK_FLUID_FINITE
2180 (void)checkFiniteFloat3("div_phi_tau", x, y, z, div_phi_tau);
2181 #endif
2182 }
2183 }
2184
2185 // Find the divergence of phi v v
2186 // Unused
2187 __global__ void findNSdivphivv(
2188 const Float* __restrict__ dev_ns_v_prod, // in
2189 const Float* __restrict__ dev_ns_phi, // in
2190 Float3* __restrict__ dev_ns_div_phi_v_v) // out
2191 {
2192 // 3D thread index
2193 const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
2194 const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
2195 const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
2196
2197 // Grid dimensions
2198 const unsigned int nx = devC_grid.num[0];
2199 const unsigned int ny = devC_grid.num[1];
2200 const unsigned int nz = devC_grid.num[2];
2201
2202 // Cell sizes
2203 const Float dx = devC_grid.L[0]/nx;
2204 const Float dy = devC_grid.L[1]/ny;
2205 const Float dz = devC_grid.L[2]/nz;
2206
2207 // 1D thread index
2208 const unsigned int cellidx = idx(x,y,z);
2209
2210 // Check that we are not outside the fluid grid
2211 if (x < nx && y < ny && z < nz) {
2212
2213 // Read cell and 6 neighbor cells
2214 __syncthreads();
2215 //const Float phi = dev_ns_phi[cellidx];
2216 const Float phi_xn = dev_ns_phi[idx(x-1,y,z)];
2217 const Float phi_xp = dev_ns_phi[idx(x+1,y,z)];
2218 const Float phi_yn = dev_ns_phi[idx(x,y-1,z)];
2219 const Float phi_yp = dev_ns_phi[idx(x,y+1,z)];
2220 const Float phi_zn = dev_ns_phi[idx(x,y,z-1)];
2221 const Float phi_zp = dev_ns_phi[idx(x,y,z+1)];
2222
2223 // The tensor is symmetrical: value i,j = j,i.
2224 // Only the upper triangle is saved, with the cells given a linear index
2225 // enumerated as:
2226 // [[ 0 1 2 ]
2227 // [ 3 4 ]
2228 // [ 5 ]]
2229
2230 // div(T) =
2231 // [ de_xx/dx + de_xy/dy + de_xz/dz ,
2232 // de_yx/dx + de_yy/dy + de_yz/dz ,
2233 // de_zx/dx + de_zy/dy + de_zz/dz ]
2234
2235 // This function finds the divergence of (phi v v), which is a vector
2236
2237 // Calculate the divergence. See
2238 // https://en.wikipedia.org/wiki/Divergence#Application_in_Cartesian_coordinates
2239 // The symmetry described in findvvOuterProdNS is used
2240 __syncthreads();
2241 const Float3 div = MAKE_FLOAT3(
2242 ((dev_ns_v_prod[idx(x+1,y,z)*6]*phi_xp
2243 - dev_ns_v_prod[idx(x-1,y,z)*6]*phi_xn)/(2.0*dx) +
2244 (dev_ns_v_prod[idx(x,y+1,z)*6+1]*phi_yp
2245 - dev_ns_v_prod[idx(x,y-1,z)*6+1]*phi_yn)/(2.0*dy) +
2246 (dev_ns_v_prod[idx(x,y,z+1)*6+2]*phi_zp
2247 - dev_ns_v_prod[idx(x,y,z-1)*6+2]*phi_zn)/(2.0*dz)),
2248 ((dev_ns_v_prod[idx(x+1,y,z)*6+1]*phi_xp
2249 - dev_ns_v_prod[idx(x-1,y,z)*6+1]*phi_xn)/(2.0*dx) +
2250 (dev_ns_v_prod[idx(x,y+1,z)*6+3]*phi_yp
2251 - dev_ns_v_prod[idx(x,y-1,z)*6+3]*phi_yn)/(2.0*dy) +
2252 (dev_ns_v_prod[idx(x,y,z+1)*6+4]*phi_zp
2253 - dev_ns_v_prod[idx(x,y,z-1)*6+4]*phi_zn)/(2.0*dz)),
2254 ((dev_ns_v_prod[idx(x+1,y,z)*6+2]*phi_xp
2255 - dev_ns_v_prod[idx(x-1,y,z)*6+2]*phi_xn)/(2.0*dx) +
2256 (dev_ns_v_prod[idx(x,y+1,z)*6+4]*phi_yp
2257 - dev_ns_v_prod[idx(x,y-1,z)*6+4]*phi_yn)/(2.0*dy) +
2258 (dev_ns_v_prod[idx(x,y,z+1)*6+5]*phi_zp
2259 - dev_ns_v_prod[idx(x,y,z-1)*6+5]*phi_zn)/(2.0*dz)) );
2260
2261 //printf("div[%d,%d,%d] = %f\t%f\t%f\n", x, y, z, div.x, div.y, div.z);
2262
2263 // Write divergence
2264 __syncthreads();
2265 dev_ns_div_phi_v_v[cellidx] = div;
2266
2267 #ifdef CHECK_FLUID_FINITE
2268 (void)checkFiniteFloat3("div_phi_v_v", x, y, z, div);
2269 #endif
2270 }
2271 }
2272
2273
2274 // Find predicted fluid velocity
2275 // Launch per face.
2276 __global__ void findPredNSvelocities(
2277 const Float* __restrict__ dev_ns_p, // in
2278 const Float* __restrict__ dev_ns_v_x, // in
2279 const Float* __restrict__ dev_ns_v_y, // in
2280 const Float* __restrict__ dev_ns_v_z, // in
2281 const Float* __restrict__ dev_ns_phi, // in
2282 const Float* __restrict__ dev_ns_dphi, // in
2283 const Float* __restrict__ dev_ns_div_tau_x, // in
2284 const Float* __restrict__ dev_ns_div_tau_y, // in
2285 const Float* __restrict__ dev_ns_div_tau_z, // in
2286 const Float3* __restrict__ dev_ns_div_phi_vi_v, // in
2287 const int bc_bot, // in
2288 const int bc_top, // in
2289 const Float beta, // in
2290 const Float3* __restrict__ dev_ns_F_pf, // in
2291 const unsigned int ndem, // in
2292 const unsigned int wall0_iz, // in
2293 const Float c_v, // in
2294 const Float mu, // in
2295 const Float rho_f, // in
2296 Float* __restrict__ dev_ns_v_p_x, // out
2297 Float* __restrict__ dev_ns_v_p_y, // out
2298 Float* __restrict__ dev_ns_v_p_z) // out
2299 {
2300 // 3D thread index
2301 const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
2302 const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
2303 const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
2304
2305 // Grid dimensions
2306 const unsigned int nx = devC_grid.num[0];
2307 const unsigned int ny = devC_grid.num[1];
2308 const unsigned int nz = devC_grid.num[2];
2309
2310 // Cell sizes
2311 const Float dx = devC_grid.L[0]/nx;
2312 const Float dy = devC_grid.L[1]/ny;
2313 const Float dz = devC_grid.L[2]/nz;
2314
2315 // 1D thread index
2316 const unsigned int fidx = vidx(x,y,z);
2317 const unsigned int cellidx = idx(x,y,z);
2318
2319 // Check that we are not outside the fluid grid
2320 //if (x <= nx && y <= ny && z <= nz) {
2321 if (x < nx && y < ny && z < nz) {
2322
2323 // Values that are needed for calculating the predicted velocity
2324 __syncthreads();
2325 const Float3 v = MAKE_FLOAT3(
2326 dev_ns_v_x[fidx],
2327 dev_ns_v_y[fidx],
2328 dev_ns_v_z[fidx]);
2329 //printf("v in v* [%d,%d,%d] = %f, %f, %f\n", x,y,z, v.x, v.y, v.z);
2330
2331 Float3 div_tau = MAKE_FLOAT3(0.0, 0.0, 0.0);
2332 if (mu > 0.0) {
2333 div_tau = MAKE_FLOAT3(
2334 dev_ns_div_tau_x[fidx],
2335 dev_ns_div_tau_y[fidx],
2336 dev_ns_div_tau_z[fidx]);
2337 }
2338
2339 // cell center values
2340 const Float phi_xn = dev_ns_phi[idx(x-1,y,z)];
2341 const Float phi_c = dev_ns_phi[cellidx];
2342 const Float phi_yn = dev_ns_phi[idx(x,y-1,z)];
2343 const Float phi_zn = dev_ns_phi[idx(x,y,z-1)];
2344
2345 const Float dphi_xn = dev_ns_dphi[idx(x-1,y,z)];
2346 const Float dphi_c = dev_ns_dphi[cellidx];
2347 const Float dphi_yn = dev_ns_dphi[idx(x,y-1,z)];
2348 const Float dphi_zn = dev_ns_dphi[idx(x,y,z-1)];
2349
2350 const Float3 div_phi_vi_v_xn = dev_ns_div_phi_vi_v[idx(x-1,y,z)];
2351 const Float3 div_phi_vi_v_c = dev_ns_div_phi_vi_v[cellidx];
2352 const Float3 div_phi_vi_v_yn = dev_ns_div_phi_vi_v[idx(x,y-1,z)];
2353 const Float3 div_phi_vi_v_zn = dev_ns_div_phi_vi_v[idx(x,y,z-1)];
2354
2355 // component-wise average values
2356 const Float3 phi = MAKE_FLOAT3(
2357 amean(phi_c, phi_xn),
2358 amean(phi_c, phi_yn),
2359 amean(phi_c, phi_zn));
2360 const Float3 dphi = MAKE_FLOAT3(
2361 amean(dphi_c, dphi_xn),
2362 amean(dphi_c, dphi_yn),
2363 amean(dphi_c, dphi_zn));
2364
2365 // The particle-fluid interaction force should only be incoorporated if
2366 // there is a fluid viscosity
2367 Float3 f_i_c, f_i_xn, f_i_yn, f_i_zn;
2368 if (mu > 0.0 && devC_np > 0) {
2369 f_i_c = dev_ns_F_pf[cellidx];
2370 f_i_xn = dev_ns_F_pf[idx(x-1,y,z)];
2371 f_i_yn = dev_ns_F_pf[idx(x,y-1,z)];
2372 f_i_zn = dev_ns_F_pf[idx(x,y,z-1)];
2373 } else {
2374 f_i_c = MAKE_FLOAT3(0.0, 0.0, 0.0);
2375 f_i_xn = MAKE_FLOAT3(0.0, 0.0, 0.0);
2376 f_i_yn = MAKE_FLOAT3(0.0, 0.0, 0.0);
2377 f_i_zn = MAKE_FLOAT3(0.0, 0.0, 0.0);
2378 }
2379 const Float3 f_i = MAKE_FLOAT3(
2380 amean(f_i_c.x, f_i_xn.x),
2381 amean(f_i_c.y, f_i_yn.y),
2382 amean(f_i_c.z, f_i_zn.z));
2383
2384 const Float dt = ndem*devC_dt;
2385
2386 // The pressure gradient is not needed in Chorin's projection method
2387 // (ns.beta=0), so only has to be looked up in pressure-dependant
2388 // projection methods
2389 Float3 pressure_term = MAKE_FLOAT3(0.0, 0.0, 0.0);
2390 if (beta > 0.0) {
2391 __syncthreads();
2392 const Float p = dev_ns_p[cellidx];
2393 const Float p_xn = dev_ns_p[idx(x-1,y,z)];
2394 const Float p_yn = dev_ns_p[idx(x,y-1,z)];
2395 const Float p_zn = dev_ns_p[idx(x,y,z-1)];
2396 const Float3 grad_p = MAKE_FLOAT3(
2397 (p - p_xn)/dx,
2398 (p - p_yn)/dy,
2399 (p - p_zn)/dz);
2400 #ifdef SET_1
2401 pressure_term = -beta*dt/(rho_f*phi)*grad_p;
2402 #endif
2403 #ifdef SET_2
2404 pressure_term = -beta*dt/rho_f*grad_p;
2405 #endif
2406 }
2407
2408 const Float3 div_phi_vi_v = MAKE_FLOAT3(
2409 amean(div_phi_vi_v_xn.x, div_phi_vi_v_c.x),
2410 amean(div_phi_vi_v_yn.x, div_phi_vi_v_c.y),
2411 amean(div_phi_vi_v_zn.x, div_phi_vi_v_c.z));
2412
2413 // Determine the terms of the predicted velocity change
2414 const Float3 interaction_term = -dt/(rho_f*phi)*f_i;
2415 const Float3 gravity_term = MAKE_FLOAT3(
2416 devC_params.g[0], devC_params.g[1], devC_params.g[2])*dt;
2417 const Float3 advection_term = -1.0*div_phi_vi_v*dt/phi;
2418 const Float3 porosity_term = -1.0*v*dphi/phi;
2419 #ifdef SET_1
2420 const Float3 diffusion_term = dt/(rho_f*phi)*div_tau;
2421 #endif
2422 #ifdef SET_2
2423 const Float3 diffusion_term = dt/rho_f*div_tau;
2424 #endif
2425
2426 // Predict new velocity and add scaling parameters
2427 Float3 v_p = v + c_v*(
2428 pressure_term
2429 + interaction_term
2430 + diffusion_term
2431 + gravity_term
2432 + porosity_term
2433 + advection_term);
2434
2435 //// Neumann BCs
2436 if ((z == 0 && bc_bot == 1) || (z > nz-1 && bc_top == 1))
2437 v_p.z = 0.0;
2438
2439 if ((z == 0 && bc_bot == 2) || (z > nz-1 && bc_top == 2))
2440 v_p = MAKE_FLOAT3(0.0, 0.0, 0.0);
2441
2442 // Set velocities to zero above the dynamic wall
2443 if (z >= wall0_iz)
2444 v_p = MAKE_FLOAT3(0.0, 0.0, 0.0);
2445
2446 #ifdef REPORT_V_P_COMPONENTS
2447 // Report velocity components to stdout for debugging
2448 printf("\n[%d,%d,%d] REPORT_V_P_COMPONENTS\n"
2449 "\tv_p = %+e %+e %+e\n"
2450 "\tv_old = %+e %+e %+e\n"
2451 "\tpres = %+e %+e %+e\n"
2452 "\tinteract = %+e %+e %+e\n"
2453 "\tdiff = %+e %+e %+e\n"
2454 "\tgrav = %+e %+e %+e\n"
2455 "\tporos = %+e %+e %+e\n"
2456 "\tadv = %+e %+e %+e\n",
2457 x, y, z,
2458 v_p.x, v_p.y, v_p.z,
2459 v.x, v.y, v.z,
2460 c_v*pressure_term.x,
2461 c_v*pressure_term.y,
2462 c_v*pressure_term.z,
2463 c_v*interaction_term.x,
2464 c_v*interaction_term.y,
2465 c_v*interaction_term.z,
2466 c_v*diffusion_term.x,
2467 c_v*diffusion_term.y,
2468 c_v*diffusion_term.z,
2469 c_v*gravity_term.x,
2470 c_v*gravity_term.y,
2471 c_v*gravity_term.z,
2472 c_v*porosity_term.x,
2473 c_v*porosity_term.y,
2474 c_v*porosity_term.z,
2475 c_v*advection_term.x,
2476 c_v*advection_term.y,
2477 c_v*advection_term.z);
2478 #endif
2479
2480 // Save the predicted velocity
2481 __syncthreads();
2482 dev_ns_v_p_x[fidx] = v_p.x;
2483 dev_ns_v_p_y[fidx] = v_p.y;
2484 dev_ns_v_p_z[fidx] = v_p.z;
2485
2486 #ifdef CHECK_FLUID_FINITE
2487 (void)checkFiniteFloat3("v_p", x, y, z, v_p);
2488 #endif
2489 }
2490 }
2491
2492 // Find the value of the forcing function. Only grad(epsilon) changes during
2493 // the Jacobi iterations. The remaining, constant terms are only calculated
2494 // during the first iteration.
2495 // At each iteration, the value of the forcing function is found as:
2496 // f = f1 - f2 dot grad(epsilon)
2497 __global__ void findNSforcing(
2498 const Float* __restrict__ dev_ns_epsilon, // in
2499 const Float* __restrict__ dev_ns_phi, // in
2500 const Float* __restrict__ dev_ns_dphi, // in
2501 const Float3* __restrict__ dev_ns_v_p, // in
2502 const Float* __restrict__ dev_ns_v_p_x, // in
2503 const Float* __restrict__ dev_ns_v_p_y, // in
2504 const Float* __restrict__ dev_ns_v_p_z, // in
2505 const unsigned int nijac, // in
2506 const unsigned int ndem, // in
2507 const Float c_v, // in
2508 const Float rho_f, // in
2509 Float* __restrict__ dev_ns_f1, // out
2510 Float3* __restrict__ dev_ns_f2, // out
2511 Float* __restrict__ dev_ns_f) // out
2512 {
2513 // 3D thread index
2514 const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
2515 const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
2516 const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
2517
2518 // Grid dimensions
2519 const unsigned int nx = devC_grid.num[0];
2520 const unsigned int ny = devC_grid.num[1];
2521 const unsigned int nz = devC_grid.num[2];
2522
2523 // Cell sizes
2524 const Float dx = devC_grid.L[0]/nx;
2525 const Float dy = devC_grid.L[1]/ny;
2526 const Float dz = devC_grid.L[2]/nz;
2527
2528 // 1D thread index
2529 const unsigned int cellidx = idx(x,y,z);
2530
2531
2532 // Check that we are not outside the fluid grid
2533 if (x < nx && y < ny && z < nz) {
2534
2535 // Constant forcing function terms
2536 Float f1;
2537 Float3 f2;
2538
2539 // Check if this is the first Jacobi iteration. If it is, find f1 and f2
2540 if (nijac == 0) {
2541
2542 // Read needed values
2543 __syncthreads();
2544 const Float3 v_p = dev_ns_v_p[cellidx];
2545 const Float phi = dev_ns_phi[cellidx];
2546 const Float dphi = dev_ns_dphi[cellidx];
2547
2548 // Calculate derivatives
2549 const Float div_v_p
2550 = divergence(dev_ns_v_p_x, dev_ns_v_p_y, dev_ns_v_p_z,
2551 x, y, z, dx, dy, dz);
2552 const Float3 grad_phi
2553 = gradient(dev_ns_phi, x, y, z, dx, dy, dz);
2554
2555 // CFD time step
2556 const Float dt = devC_dt*ndem;
2557
2558 // Find forcing function terms
2559 #ifdef SET_1
2560 const Float t1 = rho_f*phi*div_v_p/(c_v*dt);
2561 const Float t2 = rho_f*dot(grad_phi, v_p)/(c_v*dt);
2562 const Float t4 = rho_f*dphi/(dt*dt*c_v);
2563 #endif
2564 #ifdef SET_2
2565 const Float t1 = rho_f*div_v_p/(c_v*dt);
2566 const Float t2 = rho_f*dot(grad_phi, v_p)/(c_v*dt*phi);
2567 const Float t4 = rho_f*dphi/(dt*dt*phi*c_v);
2568 #endif
2569 f1 = t1 + t2 + t4;
2570 f2 = grad_phi/phi; // t3/grad(epsilon)
2571
2572 #ifdef REPORT_FORCING_TERMS
2573 // Report values terms in the forcing function for debugging
2574 printf("[%d,%d,%d] REPORT_FORCING_TERMS\t"
2575 "t1 = %f\tt2 = %f\tt4 = %f\n", x,y,z, t1, t2, t4);
2576 #endif
2577
2578 // Save values
2579 __syncthreads();
2580 dev_ns_f1[cellidx] = f1;
2581 dev_ns_f2[cellidx] = f2;
2582
2583 } else {
2584
2585 // Read previously found values
2586 __syncthreads();
2587 f1 = dev_ns_f1[cellidx];
2588 f2 = dev_ns_f2[cellidx];
2589 }
2590
2591 // Find the gradient of epsilon, which changes during Jacobi iterations
2592 const Float3 grad_epsilon
2593 = gradient(dev_ns_epsilon, x, y, z, dx, dy, dz);
2594
2595 // Forcing function value
2596 const Float f = f1 - dot(grad_epsilon, f2);
2597
2598 #ifdef REPORT_FORCING_TERMS
2599 const Float t3 = -dot(f2, grad_epsilon);
2600 if (z >= nz-3)
2601 printf("[%d,%d,%d] REPORT_FORCING_TERMS\tf= %f\tf1 = %f\tt3 = %f\n",
2602 x,y,z, f, f1, t3);
2603 #endif
2604
2605 // Save forcing function value
2606 __syncthreads();
2607 dev_ns_f[cellidx] = f;
2608
2609 #ifdef CHECK_FLUID_FINITE
2610 (void)checkFiniteFloat("f", x, y, z, f);
2611 #endif
2612 }
2613 }
2614
2615 // Spatial smoothing, used for the epsilon values. If there are several blocks,
2616 // there will be small errors at the block boundaries, since the update will mix
2617 // non-smoothed and smoothed values.
2618 template<typename T>
2619 __global__ void smoothing(
2620 T* __restrict__ dev_arr,
2621 const Float gamma,
2622 const unsigned int bc_bot,
2623 const unsigned int bc_top)
2624 {
2625 // 3D thread index
2626 const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
2627 const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
2628 const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
2629
2630 // Grid dimensions
2631 const unsigned int nx = devC_grid.num[0];
2632 const unsigned int ny = devC_grid.num[1];
2633 const unsigned int nz = devC_grid.num[2];
2634
2635 // 1D thread index
2636 const unsigned int cellidx = idx(x,y,z);
2637
2638 // Perform the epsilon updates for all non-ghost nodes except the
2639 // Dirichlet boundaries at z=0 and z=nz-1.
2640 // Adjust z range if a boundary has the Dirichlet boundary condition.
2641 int z_min = 0;
2642 int z_max = nz-1;
2643 if (bc_bot == 0)
2644 z_min = 1;
2645 if (bc_top == 0)
2646 z_max = nz-2;
2647
2648 if (x < nx && y < ny && z >= z_min && z <= z_max) {
2649
2650 __syncthreads();
2651 const T e_xn = dev_arr[idx(x-1,y,z)];
2652 const T e = dev_arr[cellidx];
2653 const T e_xp = dev_arr[idx(x+1,y,z)];
2654 const T e_yn = dev_arr[idx(x,y-1,z)];
2655 const T e_yp = dev_arr[idx(x,y+1,z)];
2656 const T e_zn = dev_arr[idx(x,y,z-1)];
2657 const T e_zp = dev_arr[idx(x,y,z+1)];
2658
2659 const T e_avg_neigbors = 1.0/6.0 *
2660 (e_xn + e_xp + e_yn + e_yp + e_zn + e_zp);
2661
2662 const T e_smooth = (1.0 - gamma)*e + gamma*e_avg_neigbors;
2663
2664 __syncthreads();
2665 dev_arr[cellidx] = e_smooth;
2666
2667 //printf("%d,%d,%d\te = %f e_smooth = %f\n", x,y,z, e, e_smooth);
2668 /*printf("%d,%d,%d\te_xn = %f, e_xp = %f, e_yn = %f, e_yp = %f,"
2669 " e_zn = %f, e_zp = %f\n", x,y,z, e_xn, e_xp,
2670 e_yn, e_yp, e_zn, e_zp);*/
2671
2672 #ifdef CHECK_FLUID_FINITE
2673 (void)checkFiniteFloat("e_smooth", x, y, z, e_smooth);
2674 #endif
2675 }
2676 }
2677
2678 // Perform a single Jacobi iteration
2679 __global__ void jacobiIterationNS(
2680 const Float* __restrict__ dev_ns_epsilon,
2681 Float* __restrict__ dev_ns_epsilon_new,
2682 Float* __restrict__ dev_ns_norm,
2683 const Float* __restrict__ dev_ns_f,
2684 const int bc_bot,
2685 const int bc_top,
2686 const Float theta,
2687 const unsigned int wall0_iz,
2688 const Float dp_dz)
2689 {
2690 // 3D thread index
2691 const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
2692 const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
2693 const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
2694
2695 // Grid dimensions
2696 const unsigned int nx = devC_grid.num[0];
2697 const unsigned int ny = devC_grid.num[1];
2698 const unsigned int nz = devC_grid.num[2];
2699
2700 // Cell sizes
2701 const Float dx = devC_grid.L[0]/nx;
2702 const Float dy = devC_grid.L[1]/ny;
2703 const Float dz = devC_grid.L[2]/nz;
2704
2705 // 1D thread index
2706 const unsigned int cellidx = idx(x,y,z);
2707
2708 // Check that we are not outside the fluid grid
2709 //if (x < nx && y < ny && z < nz) {
2710
2711 // internal nodes only
2712 //if (x > 0 && x < nx-1 && y > 0 && y < ny-1 && z > 0 && z < nz-1) {
2713
2714 // Lower boundary: Dirichlet. Upper boundary: Dirichlet
2715 //if (x < nx && y < ny && z > 0 && z < nz-1) {
2716
2717 // Lower boundary: Neumann. Upper boundary: Dirichlet
2718 //if (x < nx && y < ny && z < nz-1) {
2719
2720 // Perform the epsilon updates for all non-ghost nodes except the Dirichlet
2721 // boundaries at z=0 and z=nz-1.
2722 // Adjust z range if a boundary has the Dirichlet boundary condition.
2723 int z_min = 0;
2724 int z_max = nz-1;
2725 if (bc_bot == 0)
2726 z_min = 1;
2727 if (bc_top == 0)
2728 z_max = nz-2;
2729
2730 if (x < nx && y < ny && z >= z_min && z <= z_max) {
2731
2732 // Read the epsilon values from the cell and its 6 neighbors
2733 __syncthreads();
2734 const Float e_xn = dev_ns_epsilon[idx(x-1,y,z)];
2735 const Float e = dev_ns_epsilon[cellidx];
2736 const Float e_xp = dev_ns_epsilon[idx(x+1,y,z)];
2737 const Float e_yn = dev_ns_epsilon[idx(x,y-1,z)];
2738 const Float e_yp = dev_ns_epsilon[idx(x,y+1,z)];
2739 const Float e_zn = dev_ns_epsilon[idx(x,y,z-1)];
2740 const Float e_zp = dev_ns_epsilon[idx(x,y,z+1)];
2741
2742 // Read the value of the forcing function
2743 const Float f = dev_ns_f[cellidx];
2744
2745 // New value of epsilon in 3D update, derived by rearranging the
2746 // discrete Laplacian
2747 const Float dxdx = dx*dx;
2748 const Float dydy = dy*dy;
2749 const Float dzdz = dz*dz;
2750 Float e_new
2751 = (-dxdx*dydy*dzdz*f
2752 + dydy*dzdz*(e_xn + e_xp)
2753 + dxdx*dzdz*(e_yn + e_yp)
2754 + dxdx*dydy*(e_zn + e_zp))
2755 /(2.0*(dxdx*dydy + dxdx*dzdz + dydy*dzdz));
2756
2757
2758 // Dirichlet BC at dynamic top wall. wall0_iz will be larger than the
2759 // grid if the wall isn't dynamic
2760 if (z == wall0_iz)
2761 e_new = e;
2762
2763 // Neumann BC at dynamic top wall
2764 if (z == wall0_iz + 1)
2765 e_new = e_zn + dp_dz;
2766
2767 // New value of epsilon in 1D update
2768 //const Float e_new = (e_zp + e_zn - dz*dz*f)/2.0;
2769
2770 // Print values for debugging
2771 /*printf("[%d,%d,%d]\t e = %f\tf = %f\te_new = %f\n",
2772 x,y,z, e, f, e_new);*/
2773
2774 const Float res_norm = (e_new - e)*(e_new - e)/(e_new*e_new + 1.0e-16);
2775 const Float e_relax = e*(1.0-theta) + e_new*theta;
2776
2777 __syncthreads();
2778 dev_ns_epsilon_new[cellidx] = e_relax;
2779 dev_ns_norm[cellidx] = res_norm;
2780
2781 #ifdef CHECK_FLUID_FINITE
2782 (void)checkFiniteFloat("e_new", x, y, z, e_new);
2783 (void)checkFiniteFloat("e_relax", x, y, z, e_relax);
2784 //(void)checkFiniteFloat("res_norm", x, y, z, res_norm);
2785 if (checkFiniteFloat("res_norm", x, y, z, res_norm)) {
2786 printf("[%d,%d,%d]\t e = %f\tf = %f\te_new = %f\tres_norm = %f\n",
2787 x,y,z, e, f, e_new, res_norm);
2788 }
2789 #endif
2790 }
2791 }
2792
2793 // Copy all values from one array to the other
2794 template<typename T>
2795 __global__ void copyValues(
2796 const T* __restrict__ dev_read,
2797 T* __restrict__ dev_write)
2798 {
2799 // 3D thread index
2800 const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
2801 const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
2802 const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
2803
2804 // Internal nodes only
2805 if (x < devC_grid.num[0] && y < devC_grid.num[1] && z < devC_grid.num[2]) {
2806
2807 // Internal nodes + ghost nodes
2808 /*if (x <= devC_grid.num[0]+1 &&
2809 y <= devC_grid.num[1]+1 &&
2810 z <= devC_grid.num[2]+1) {*/
2811
2812 const unsigned int cellidx = idx(x,y,z); // without ghost nodes
2813 //const unsigned int cellidx = idx(x-1,y-1,z-1); // with ghost nodes
2814
2815 // Read
2816 __syncthreads();
2817 const T val = dev_read[cellidx];
2818
2819 //if (z == devC_grid.num[2]-1)
2820 //printf("[%d,%d,%d] = %f\n", x, y, z, val);
2821
2822 // Write
2823 __syncthreads();
2824 dev_write[cellidx] = val;
2825 }
2826 }
2827
2828 // Find and store the normalized residuals
2829 __global__ void findNormalizedResiduals(
2830 const Float* __restrict__ dev_ns_epsilon_old,
2831 const Float* __restrict__ dev_ns_epsilon,
2832 Float* __restrict__ dev_ns_norm,
2833 const unsigned int bc_bot,
2834 const unsigned int bc_top)
2835 {
2836 // 3D thread index
2837 const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
2838 const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
2839 const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
2840
2841 // Grid dimensions
2842 const unsigned int nx = devC_grid.num[0];
2843 const unsigned int ny = devC_grid.num[1];
2844 const unsigned int nz = devC_grid.num[2];
2845
2846 // 1D thread index
2847 const unsigned int cellidx = idx(x,y,z);
2848
2849 // Perform the epsilon updates for all non-ghost nodes except the
2850 // Dirichlet boundaries at z=0 and z=nz-1.
2851 // Adjust z range if a boundary has the Dirichlet boundary condition.
2852 int z_min = 0;
2853 int z_max = nz-1;
2854 if (bc_bot == 0)
2855 z_min = 1;
2856 if (bc_top == 0)
2857 z_max = nz-2;
2858
2859 if (x < nx && y < ny && z >= z_min && z <= z_max) {
2860
2861 __syncthreads();
2862 const Float e = dev_ns_epsilon_old[cellidx];
2863 const Float e_new = dev_ns_epsilon[cellidx];
2864
2865 // Find the normalized residual value. A small value is added to the
2866 // denominator to avoid a divide by zero.
2867 //const Float res_norm = (e_new - e)*(e_new - e)/(e_new*e_new + 1.0e-16);
2868 const Float res_norm = (e_new - e)/(e + 1.0e-16);
2869
2870 __syncthreads();
2871 dev_ns_norm[cellidx] = res_norm;
2872
2873 #ifdef CHECK_FLUID_FINITE
2874 checkFiniteFloat("res_norm", x, y, z, res_norm);
2875 #endif
2876 }
2877 }
2878
2879
2880 // Computes the new velocity and pressure using the corrector
2881 __global__ void updateNSpressure(
2882 const Float* __restrict__ dev_ns_epsilon, // in
2883 const Float __restrict__ beta, // in
2884 Float* __restrict__ dev_ns_p) // out
2885 {
2886 // 3D thread index
2887 const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
2888 const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
2889 const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
2890
2891 // 1D thread index
2892 const unsigned int cellidx = idx(x,y,z);
2893
2894 // Check that we are not outside the fluid grid
2895 if (x < devC_grid.num[0] && y < devC_grid.num[1] && z < devC_grid.num[2]) {
2896
2897 // Read values
2898 __syncthreads();
2899 const Float p_old = dev_ns_p[cellidx];
2900 const Float epsilon = dev_ns_epsilon[cellidx];
2901
2902 // New pressure
2903 Float p = beta*p_old + epsilon;
2904
2905 // Write new values
2906 __syncthreads();
2907 dev_ns_p[cellidx] = p;
2908
2909 #ifdef CHECK_FLUID_FINITE
2910 checkFiniteFloat("p", x, y, z, p);
2911 #endif
2912 }
2913 }
2914
2915 __global__ void updateNSvelocity(
2916 const Float* __restrict__ dev_ns_v_p_x, // in
2917 const Float* __restrict__ dev_ns_v_p_y, // in
2918 const Float* __restrict__ dev_ns_v_p_z, // in
2919 const Float* __restrict__ dev_ns_phi, // in
2920 const Float* __restrict__ dev_ns_epsilon, // in
2921 const Float beta, // in
2922 const int bc_bot, // in
2923 const int bc_top, // in
2924 const unsigned int ndem, // in
2925 const Float c_v, // in
2926 const Float rho_f, // in
2927 const unsigned int wall0_iz, // in
2928 const unsigned int iter, // in
2929 Float* __restrict__ dev_ns_v_x, // out
2930 Float* __restrict__ dev_ns_v_y, // out
2931 Float* __restrict__ dev_ns_v_z) // out
2932 {
2933 // 3D thread index
2934 const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
2935 const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
2936 const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
2937
2938 // Grid dimensions
2939 const unsigned int nx = devC_grid.num[0];
2940 const unsigned int ny = devC_grid.num[1];
2941 const unsigned int nz = devC_grid.num[2];
2942
2943 // Cell sizes
2944 const Float dx = devC_grid.L[0]/nx;
2945 const Float dy = devC_grid.L[1]/ny;
2946 const Float dz = devC_grid.L[2]/nz;
2947
2948 // 1D thread index
2949 const unsigned int cellidx = vidx(x,y,z);
2950
2951 // Check that we are not outside the fluid grid
2952 //if (x <= nx && y <= ny && z <= nz) {
2953 if (x < nx && y < ny && z < nz) {
2954
2955 // Read values
2956 __syncthreads();
2957 const Float v_p_x = dev_ns_v_p_x[cellidx];
2958 const Float v_p_y = dev_ns_v_p_y[cellidx];
2959 const Float v_p_z = dev_ns_v_p_z[cellidx];
2960 const Float3 v_p = MAKE_FLOAT3(v_p_x, v_p_y, v_p_z);
2961
2962 const Float epsilon_xn = dev_ns_epsilon[idx(x-1,y,z)];
2963 const Float epsilon_c = dev_ns_epsilon[idx(x,y,z)];
2964 const Float epsilon_yn = dev_ns_epsilon[idx(x,y-1,z)];
2965 const Float epsilon_zn = dev_ns_epsilon[idx(x,y,z-1)];
2966
2967 const Float phi_xn = dev_ns_phi[idx(x-1,y,z)];
2968 const Float phi_c = dev_ns_phi[idx(x,y,z)];
2969 const Float phi_yn = dev_ns_phi[idx(x,y-1,z)];
2970 const Float phi_zn = dev_ns_phi[idx(x,y,z-1)];
2971
2972 const Float3 phi = MAKE_FLOAT3(
2973 amean(phi_c, phi_xn),
2974 amean(phi_c, phi_yn),
2975 amean(phi_c, phi_zn));
2976
2977 // Find corrector gradient
2978 const Float3 grad_epsilon = MAKE_FLOAT3(
2979 (epsilon_c - epsilon_xn)/dx,
2980 (epsilon_c - epsilon_yn)/dy,
2981 (epsilon_c - epsilon_zn)/dz);
2982
2983 // Find new velocity
2984 Float3 v = v_p
2985 #ifdef SET_1
2986 - c_v*ndem*devC_dt/(phi*rho_f)*grad_epsilon;
2987 #endif
2988 #ifdef SET_2
2989 - c_v*ndem*devC_dt/rho_f*grad_epsilon;
2990 #endif
2991
2992 if ((z == 0 && bc_bot == 1) || (z > nz-1 && bc_top == 1))
2993 v.z = 0.0;
2994
2995 if ((z == 0 && bc_bot == 2) || (z > nz-1 && bc_top == 2))
2996 v = MAKE_FLOAT3(0.0, 0.0, 0.0);
2997
2998 // Do not calculate all components at the outer grid edges (nx, ny, nz)
2999 if (x == nx) {
3000 v.y = 0.0;
3001 v.z = 0.0;
3002 }
3003 if (y == ny) {
3004 v.x = 0.0;
3005 v.z = 0.0;
3006 }
3007 if (z == nz) {
3008 v.x = 0.0;
3009 v.y = 0.0;
3010 }
3011
3012 // Set velocities to zero above the dynamic wall
3013 if (z >= wall0_iz)
3014 v = MAKE_FLOAT3(0.0, 0.0, 0.0);
3015
3016 #ifdef REPORT_V_C_COMPONENTS
3017 printf("[%d,%d,%d] REPORT_V_C_COMPONENTS\n"
3018 "\tv_p = % f\t% f\t% f\n"
3019 "\tv_c = % f\t% f\t% f\n"
3020 "\tv = % f\t% f\t% f\n"
3021 "\tgrad(epsilon) = % f\t% f\t% f\n"
3022 "\tphi = % f\t% f\t% f\n",
3023 x, y, z,
3024 v_p.x, v_p.y, v_p.z,
3025 v.x-v_p.x, v.y-v_p.y, v.z-v_p.z,
3026 v.x, v.y, v.z,
3027 grad_epsilon.x, grad_epsilon.y, grad_epsilon.z,
3028 phi.x, phi.y, phi.z);
3029 #endif
3030
3031 // Check the advection term using the Courant-Friedrichs-Lewy condition
3032 __syncthreads();
3033 if (v.x*ndem*devC_dt/dx
3034 + v.y*ndem*devC_dt/dy
3035 + v.z*ndem*devC_dt/dz > 1.0) {
3036 printf("[%d,%d,%d] Warning: Advection term in fluid may be "
3037 "unstable (CFL condition)\n"
3038 "\tv = %.2e,%.2e,%.2e\n"
3039 "\te_c,e_xn,e_yn,e_zn = %.2e,%.2e,%.2e,%.2e\n",
3040 x,y,z, v.x, v.y, v.z,
3041 epsilon_c, epsilon_xn, epsilon_yn, epsilon_zn
3042 );
3043 }
3044
3045 // Write new values
3046 __syncthreads();
3047 dev_ns_v_x[cellidx] = v.x;
3048 dev_ns_v_y[cellidx] = v.y;
3049 dev_ns_v_z[cellidx] = v.z;
3050
3051 #ifdef CHECK_FLUID_FINITE
3052 checkFiniteFloat3("v", x, y, z, v);
3053 #endif
3054 }
3055 }
3056
3057 // Find the average particle diameter and velocity for each CFD cell.
3058 // UNUSED: The values are estimated in the porosity estimation function instead
3059 __global__ void findAvgParticleVelocityDiameter(
3060 const unsigned int* __restrict__ dev_cellStart, // in
3061 const unsigned int* __restrict__ dev_cellEnd, // in
3062 const Float4* __restrict__ dev_vel_sorted, // in
3063 const Float4* __restrict__ dev_x_sorted, // in
3064 Float3* dev_ns_vp_avg, // out
3065 Float* dev_ns_d_avg) // out
3066 {
3067 // 3D thread index
3068 const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
3069 const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
3070 const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
3071
3072 // check that we are not outside the fluid grid
3073 if (x < devC_grid.num[0] && y < devC_grid.num[1] && z < devC_grid.num[2]) {
3074
3075 Float4 v;
3076 Float d;
3077 unsigned int startIdx, endIdx, i;
3078 unsigned int n = 0;
3079
3080 // average particle velocity
3081 Float3 v_avg = MAKE_FLOAT3(0.0, 0.0, 0.0);
3082
3083 // average particle diameter
3084 Float d_avg = 0.0;
3085
3086 const unsigned int cellID = x + y * devC_grid.num[0]
3087 + (devC_grid.num[0] * devC_grid.num[1]) * z;
3088
3089 // Lowest particle index in cell
3090 startIdx = dev_cellStart[cellID];
3091
3092 // Make sure cell is not empty
3093 if (startIdx != 0xffffffff) {
3094
3095 // Highest particle index in cell
3096 endIdx = dev_cellEnd[cellID];
3097
3098 // Iterate over cell particles
3099 for (i=startIdx; i<endIdx; ++i) {
3100
3101 // Read particle velocity
3102 __syncthreads();
3103 v = dev_vel_sorted[i];
3104 d = 2.0*dev_x_sorted[i].w;
3105 n++;
3106 v_avg += MAKE_FLOAT3(v.x, v.y, v.z);
3107 d_avg += d;
3108 }
3109
3110 v_avg /= n;
3111 d_avg /= n;
3112 }
3113
3114 // save average radius and velocity
3115 const unsigned int cellidx = idx(x,y,z);
3116 __syncthreads();
3117 dev_ns_vp_avg[cellidx] = v_avg;
3118 dev_ns_d_avg[cellidx] = d_avg;
3119
3120 #ifdef CHECK_FLUID_FINITE
3121 checkFiniteFloat3("v_avg", x, y, z, v_avg);
3122 checkFiniteFloat("d_avg", x, y, z, d_avg);
3123 #endif
3124 }
3125 }
3126
3127 // Find the drag coefficient as dictated by the Reynold's number
3128 // Shamy and Zeghal (2005).
3129 __device__ Float dragCoefficient(Float re)
3130 {
3131 Float cd;
3132 if (re >= 1000.0)
3133 cd = 0.44;
3134 else
3135 cd = 24.0/re*(1.0 + 0.15*pow(re, 0.687));
3136 return cd;
3137 }
3138
3139 // Find particle-fluid interaction force as outlined by Zhou et al. 2010, and
3140 // originally by Gidaspow 1992.
3141 __global__ void findInteractionForce(
3142 const Float4* __restrict__ dev_x, // in
3143 const Float4* __restrict__ dev_vel, // in
3144 const Float* __restrict__ dev_ns_phi, // in
3145 const Float* __restrict__ dev_ns_p, // in
3146 const Float* __restrict__ dev_ns_v_x, // in
3147 const Float* __restrict__ dev_ns_v_y, // in
3148 const Float* __restrict__ dev_ns_v_z, // in
3149 const Float* __restrict__ dev_ns_div_tau_x,// in
3150 const Float* __restrict__ dev_ns_div_tau_y,// in
3151 const Float* __restrict__ dev_ns_div_tau_z,// in
3152 const Float mu, // in
3153 const Float rho_f, // in
3154 //const Float c_v, // in
3155 Float3* __restrict__ dev_ns_f_pf, // out
3156 Float4* __restrict__ dev_force, // out
3157 Float4* __restrict__ dev_ns_f_d, // out
3158 Float4* __restrict__ dev_ns_f_p, // out
3159 Float4* __restrict__ dev_ns_f_v, // out
3160 Float4* __restrict__ dev_ns_f_sum) // out
3161 {
3162 unsigned int i = threadIdx.x + blockIdx.x*blockDim.x; // Particle index
3163
3164 if (i < devC_np) {
3165
3166 // Particle information
3167 __syncthreads();
3168 const Float4 x = dev_x[i];
3169 const Float4 vel = dev_vel[i];
3170 const Float3 v_p = MAKE_FLOAT3(vel.x, vel.y, vel.z);
3171 const Float d = x.w;
3172
3173 // Fluid cell
3174 const unsigned int i_x =
3175 floor((x.x - devC_grid.origo[0])/(devC_grid.L[0]/devC_grid.num[0]));
3176 const unsigned int i_y =
3177 floor((x.y - devC_grid.origo[1])/(devC_grid.L[1]/devC_grid.num[1]));
3178 const unsigned int i_z =
3179 floor((x.z - devC_grid.origo[2])/(devC_grid.L[2]/devC_grid.num[2]));
3180 const unsigned int cellidx = idx(i_x, i_y, i_z);
3181
3182 // Cell sizes
3183 const Float dx = devC_grid.L[0]/devC_grid.num[0];
3184 const Float dy = devC_grid.L[1]/devC_grid.num[1];
3185 const Float dz = devC_grid.L[2]/devC_grid.num[2];
3186
3187 // Fluid information
3188 __syncthreads();
3189 const Float phi = dev_ns_phi[cellidx];
3190
3191 const Float v_x = dev_ns_v_x[vidx(i_x,i_y,i_z)];
3192 const Float v_x_p = dev_ns_v_x[vidx(i_x+1,i_y,i_z)];
3193 const Float v_y = dev_ns_v_y[vidx(i_x,i_y,i_z)];
3194 const Float v_y_p = dev_ns_v_y[vidx(i_x,i_y+1,i_z)];
3195 const Float v_z = dev_ns_v_z[vidx(i_x,i_y,i_z)];
3196 const Float v_z_p = dev_ns_v_z[vidx(i_x,i_y,i_z+1)];
3197
3198 const Float div_tau_x = dev_ns_div_tau_x[vidx(i_x,i_y,i_z)];
3199 const Float div_tau_x_p = dev_ns_div_tau_x[vidx(i_x+1,i_y,i_z)];
3200 const Float div_tau_y = dev_ns_div_tau_y[vidx(i_x,i_y,i_z)];
3201 const Float div_tau_y_p = dev_ns_div_tau_y[vidx(i_x,i_y+1,i_z)];
3202 const Float div_tau_z = dev_ns_div_tau_z[vidx(i_x,i_y,i_z)];
3203 const Float div_tau_z_p = dev_ns_div_tau_z[vidx(i_x,i_y,i_z+1)];
3204
3205 const Float3 v_f = MAKE_FLOAT3(
3206 amean(v_x, v_x_p),
3207 amean(v_y, v_y_p),
3208 amean(v_z, v_z_p));
3209
3210 const Float3 div_tau = MAKE_FLOAT3(
3211 amean(div_tau_x, div_tau_x_p),
3212 amean(div_tau_y, div_tau_y_p),
3213 amean(div_tau_z, div_tau_z_p));
3214
3215 const Float3 v_rel = v_f - v_p;
3216 const Float v_rel_length = length(v_rel);
3217
3218 const Float V_p = dx*dy*dz - phi*dx*dy*dz;
3219 const Float Re = rho_f*d*phi*v_rel_length/mu;
3220 Float Cd = pow(0.63 + 4.8/pow(Re, 0.5), 2.0);
3221 Float chi = 3.7 - 0.65*exp(-pow(1.5 - log10(Re), 2.0)/2.0);
3222
3223 if (v_rel_length < 1.0e-6) { // avoid Re=0 -> Cd=inf, chi=-nan
3224 Cd = 0.0;
3225 chi = 0.0;
3226 }
3227
3228 // Drag force
3229 const Float3 f_d = 0.125*Cd*rho_f*M_PI*d*d*phi*phi
3230 *v_rel_length*v_rel*pow(phi, -chi);
3231
3232 // Pressure gradient force
3233 const Float3 f_p =
3234 -1.0*gradient(dev_ns_p, i_x, i_y, i_z, dx, dy, dz)*V_p;
3235
3236 // Viscous force
3237 const Float3 f_v = -1.0*div_tau*V_p;
3238
3239 // Interaction force on particle (force) and fluid (f_pf)
3240 __syncthreads();
3241 const Float3 f_pf = f_d + f_p + f_v;
3242
3243 #ifdef CHECK_FLUID_FINITE
3244 /*
3245 printf("\nfindInteractionForce %d [%d,%d,%d]\n"
3246 "\tV_p = %f Re=%f Cd=%f chi=%f\n"
3247 "\tf_d = %+e %+e %+e\n"
3248 "\tf_p = %+e %+e %+e\n"
3249 "\tf_v = %+e %+e %+e\n",
3250 i, i_x, i_y, i_z, V_p, Re, Cd, chi,
3251 f_d.x, f_d.y, f_d.z,
3252 f_p.x, f_p.y, f_p.z,
3253 f_v.x, f_v.y, f_v.z);// */
3254 checkFiniteFloat3("f_d", i_x, i_y, i_z, f_d);
3255 checkFiniteFloat3("f_p", i_x, i_y, i_z, f_p);
3256 checkFiniteFloat3("f_v", i_x, i_y, i_z, f_v);
3257 checkFiniteFloat3("f_pf", i_x, i_y, i_z, f_pf);
3258 #endif
3259
3260 __syncthreads();
3261 #ifdef SET_1
3262 dev_ns_f_pf[i] = f_pf;
3263 #endif
3264
3265 #ifdef SET_2
3266 dev_ns_f_pf[i] = f_d;
3267 #endif
3268
3269 __syncthreads();
3270 dev_force[i] += MAKE_FLOAT4(f_pf.x, f_pf.y, f_pf.z, 0.0);
3271 dev_ns_f_d[i] = MAKE_FLOAT4(f_d.x, f_d.y, f_d.z, 0.0);
3272 dev_ns_f_p[i] = MAKE_FLOAT4(f_p.x, f_p.y, f_p.z, 0.0);
3273 dev_ns_f_v[i] = MAKE_FLOAT4(f_v.x, f_v.y, f_v.z, 0.0);
3274 dev_ns_f_sum[i] = MAKE_FLOAT4(
3275 f_d.x + f_p.x + f_v.x,
3276 f_d.y + f_p.y + f_v.y,
3277 f_d.z + f_p.z + f_v.z,
3278 0.0);
3279 }
3280 }
3281
3282 // Apply the fluid-particle interaction force to the fluid cell based on the
3283 // interaction forces from each particle in it
3284 __global__ void applyInteractionForceToFluid(
3285 const unsigned int* __restrict__ dev_gridParticleIndex, // in
3286 const unsigned int* __restrict__ dev_cellStart, // in
3287 const unsigned int* __restrict__ dev_cellEnd, // in
3288 const Float3* __restrict__ dev_ns_f_pf, // in
3289 Float3* __restrict__ dev_ns_F_pf) // out
3290 {
3291 // 3D thread index
3292 const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
3293 const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
3294 const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
3295
3296 // Grid dimensions
3297 const unsigned int nx = devC_grid.num[0];
3298 const unsigned int ny = devC_grid.num[1];
3299 const unsigned int nz = devC_grid.num[2];
3300
3301 // Cell sizes
3302 const Float dx = devC_grid.L[0]/nx;
3303 const Float dy = devC_grid.L[1]/ny;
3304 const Float dz = devC_grid.L[2]/nz;
3305
3306 // Check that we are not outside the fluid grid
3307 if (x < nx && y < ny && z < nz) {
3308
3309 const unsigned int cellidx = idx(x,y,z);
3310
3311 // Calculate linear cell ID
3312 const unsigned int cellID = x + y * devC_grid.num[0]
3313 + (devC_grid.num[0] * devC_grid.num[1]) * z;
3314
3315 const unsigned int startidx = dev_cellStart[cellID];
3316 unsigned int endidx, i, origidx;
3317
3318 Float3 fi;
3319
3320 if (startidx != 0xffffffff) {
3321
3322 __syncthreads();
3323 endidx = dev_cellEnd[cellID];
3324
3325 for (i=startidx; i<endidx; ++i) {
3326
3327 __syncthreads();
3328 origidx = dev_gridParticleIndex[i];
3329 fi += dev_ns_f_pf[origidx];
3330 }
3331 }
3332
3333 const Float3 F_pf = fi/(dx*dy*dz);
3334
3335 #ifdef CHECK_FLUID_FINITE
3336 checkFiniteFloat3("F_pf", x, y, z, F_pf);
3337 #endif
3338 //printf("F_pf [%d,%d,%d] = %f,%f,%f\n", x,y,z, F_pf.x, F_pf.y, F_pf.z);
3339 __syncthreads();
3340 dev_ns_F_pf[cellidx] = F_pf;
3341 }
3342 }
3343
3344
3345 // Launch per cell face node.
3346 // Cell center ghost nodes must be set prior to call.
3347 __global__ void interpolateCenterToFace(
3348 const Float3* __restrict__ dev_in,
3349 Float* __restrict__ dev_out_x,
3350 Float* __restrict__ dev_out_y,
3351 Float* __restrict__ dev_out_z)
3352 {
3353 // 3D thread index
3354 const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
3355 const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
3356 const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
3357
3358 // Check that we are not outside the fluid grid
3359 if (x <= devC_grid.num[0]
3360 && y <= devC_grid.num[1]
3361 && z <= devC_grid.num[2]) {
3362
3363 const unsigned int faceidx = vidx(x,y,z);
3364
3365 __syncthreads();
3366 const Float3 xn = dev_in[idx(x-1,y,z)];
3367 const Float3 yn = dev_in[idx(x,y-1,z)];
3368 const Float3 zn = dev_in[idx(x,y,z-1)];
3369 const Float3 center = dev_in[idx(x,y,z)];
3370
3371 const Float x_val = amean(center.x, xn.x);
3372 const Float y_val = amean(center.y, yn.y);
3373 const Float z_val = amean(center.z, zn.z);
3374
3375 __syncthreads();
3376 //printf("c2f [%d,%d,%d] = %f,%f,%f\n", x,y,z, x_val, y_val, z_val);
3377 dev_out_x[faceidx] = x_val;
3378 dev_out_y[faceidx] = y_val;
3379 dev_out_z[faceidx] = z_val;
3380 }
3381 }
3382
3383 // Launch per cell center node
3384 __global__ void interpolateFaceToCenter(
3385 Float* dev_in_x,
3386 Float* dev_in_y,
3387 Float* dev_in_z,
3388 Float3* dev_out)
3389 {
3390 // 3D thread index
3391 const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
3392 const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
3393 const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
3394
3395 // Grid dimensions
3396 const unsigned int nx = devC_grid.num[0];
3397 const unsigned int ny = devC_grid.num[1];
3398 const unsigned int nz = devC_grid.num[2];
3399
3400 // Check that we are not outside the fluid grid
3401 //if (x < devC_grid.num[0] && y < devC_grid.num[1] && z < devC_grid.num[2]) {
3402 if (x < nx && y < ny && z < nz) {
3403
3404 __syncthreads();
3405 const Float x_n = dev_in_x[vidx(x,y,z)];
3406 const Float x_p = dev_in_x[vidx(x+1,y,z)];
3407 const Float y_n = dev_in_y[vidx(x,y,z)];
3408 const Float y_p = dev_in_y[vidx(x,y+1,z)];
3409 const Float z_n = dev_in_z[vidx(x,y,z)];
3410 const Float z_p = dev_in_z[vidx(x,y,z+1)];
3411
3412 const Float3 val = MAKE_FLOAT3(
3413 amean(x_n, x_p),
3414 amean(y_n, y_p),
3415 amean(z_n, z_p));
3416
3417 __syncthreads();
3418 //printf("f2c [%d,%d,%d] = %f, %f, %f\n", x,y,z, val.x, val.y, val.z);
3419 dev_out[idx(x,y,z)] = val;
3420 }
3421 }
3422
3423 // Launch per cell face node. Set velocity ghost nodes beforehand.
3424 // Find div(tau) at all cell faces.
3425 // Warning: The grid-corner values will be invalid, along with the non-normal
3426 // components of the ghost nodes
3427 __global__ void findFaceDivTau(
3428 const Float* __restrict__ dev_ns_v_x, // in
3429 const Float* __restrict__ dev_ns_v_y, // in
3430 const Float* __restrict__ dev_ns_v_z, // in
3431 const Float mu, // in
3432 Float* __restrict__ dev_ns_div_tau_x, // out
3433 Float* __restrict__ dev_ns_div_tau_y, // out
3434 Float* __restrict__ dev_ns_div_tau_z) // out
3435 {
3436 // 3D thread index
3437 const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
3438 const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
3439 const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
3440
3441 // Grid dimensions
3442 const unsigned int nx = devC_grid.num[0];
3443 const unsigned int ny = devC_grid.num[1];
3444 const unsigned int nz = devC_grid.num[2];
3445
3446 // Cell sizes
3447 const Float dx = devC_grid.L[0]/nx;
3448 const Float dy = devC_grid.L[1]/ny;
3449 const Float dz = devC_grid.L[2]/nz;
3450
3451 // Check that we are not outside the fluid grid
3452 //if (x <= nx && y <= ny && z <= nz) {
3453 if (x < nx && y < ny && z < nz) {
3454
3455 const unsigned int faceidx = vidx(x,y,z);
3456
3457 __syncthreads();
3458 const Float v_x_xn = dev_ns_v_x[vidx(x-1,y,z)];
3459 const Float v_x = dev_ns_v_x[faceidx];
3460 const Float v_x_xp = dev_ns_v_x[vidx(x+1,y,z)];
3461 const Float v_x_yn = dev_ns_v_x[vidx(x,y-1,z)];
3462 const Float v_x_yp = dev_ns_v_x[vidx(x,y+1,z)];
3463 const Float v_x_zn = dev_ns_v_x[vidx(x,y,z-1)];
3464 const Float v_x_zp = dev_ns_v_x[vidx(x,y,z+1)];
3465
3466 const Float v_y_xn = dev_ns_v_y[vidx(x-1,y,z)];
3467 const Float v_y_xp = dev_ns_v_y[vidx(x+1,y,z)];
3468 const Float v_y_yn = dev_ns_v_y[vidx(x,y-1,z)];
3469 const Float v_y = dev_ns_v_y[faceidx];
3470 const Float v_y_yp = dev_ns_v_y[vidx(x,y+1,z)];
3471 const Float v_y_zn = dev_ns_v_y[vidx(x,y,z-1)];
3472 const Float v_y_zp = dev_ns_v_y[vidx(x,y,z+1)];
3473
3474 const Float v_z_xn = dev_ns_v_z[vidx(x-1,y,z)];
3475 const Float v_z_xp = dev_ns_v_z[vidx(x+1,y,z)];
3476 const Float v_z_yn = dev_ns_v_z[vidx(x,y-1,z)];
3477 const Float v_z_yp = dev_ns_v_z[vidx(x,y+1,z)];
3478 const Float v_z_zn = dev_ns_v_z[vidx(x,y,z-1)];
3479 const Float v_z = dev_ns_v_z[faceidx];
3480 const Float v_z_zp = dev_ns_v_z[vidx(x,y,z+1)];
3481
3482 const Float div_tau_x =
3483 mu*(
3484 (v_x_xp - 2.0*v_x + v_x_xn)/(dx*dx) +
3485 (v_x_yp - 2.0*v_x + v_x_yn)/(dy*dy) +
3486 (v_x_zp - 2.0*v_x + v_x_zn)/(dz*dz));
3487 const Float div_tau_y =
3488 mu*(
3489 (v_y_xp - 2.0*v_y + v_y_xn)/(dx*dx) +
3490 (v_y_yp - 2.0*v_y + v_y_yn)/(dy*dy) +
3491 (v_y_zp - 2.0*v_y + v_y_zn)/(dz*dz));
3492 const Float div_tau_z =
3493 mu*(
3494 (v_z_xp - 2.0*v_z + v_z_xn)/(dx*dx) +
3495 (v_z_yp - 2.0*v_z + v_z_yn)/(dy*dy) +
3496 (v_z_zp - 2.0*v_z + v_z_zn)/(dz*dz));
3497
3498 __syncthreads();
3499 //printf("div_tau [%d,%d,%d] = %f, %f, %f\n", x,y,z,
3500 //div_tau_x, div_tau_y, div_tau_z);
3501 dev_ns_div_tau_x[faceidx] = div_tau_x;
3502 dev_ns_div_tau_y[faceidx] = div_tau_y;
3503 dev_ns_div_tau_z[faceidx] = div_tau_z;
3504 }
3505
3506 }
3507
3508 // Print final heads and free memory
3509 void DEM::endNSdev()
3510 {
3511 freeNSmemDev();
3512 }
3513
3514 // vim: tabstop=8 expandtab shiftwidth=4 softtabstop=4