tported Darcy routines to CUDA, compiles, needs testing - sphere - GPU-based 3D discrete element method algorithm with optional fluid coupling
(HTM) git clone git://src.adamsgaard.dk/sphere
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) LICENSE
---
(DIR) commit 1658688f8ba0d701c10244801f13fd46821c799c
(DIR) parent 27f9122518041ed1ed936e25643d2f9f61cc8100
(HTM) Author: Anders Damsgaard <anders.damsgaard@geo.au.dk>
Date: Wed, 9 Oct 2013 11:11:55 +0200
ported Darcy routines to CUDA, compiles, needs testing
Diffstat:
M src/darcy.cpp | 221 ++++++++++++++++++-------------
A src/darcy.cuh | 616 +++++++++++++++++++++++++++++++
M src/device.cu | 172 ++++++++++++++++++++++++++-----
M src/sphere.h | 5 +++++
4 files changed, 898 insertions(+), 116 deletions(-)
---
(DIR) diff --git a/src/darcy.cpp b/src/darcy.cpp
t@@ -10,6 +10,9 @@
#include "sphere.h"
#include "utility.h"
+// Enable line below to make x and y boundaries periodic
+#define PERIODIC_XY
+
// Initialize memory
void DEM::initDarcyMem()
{
t@@ -50,7 +53,7 @@ unsigned int DEM::idx(
//return x + d_nx*y + d_nx*d_ny*z;
// with ghost nodes
- // the ghost nodes are placed at -1 and WIDTH
+ // the ghost nodes are placed at x,y,z = -1 and WIDTH
return (x+1) + (d_nx+2)*(y+1) + (d_nx+2)*(d_ny+2)*(z+1);
}
t@@ -75,17 +78,26 @@ void DEM::initDarcyVals()
// read from input binary
// Hydraulic permeability [m^2]
- d_K[cellidx] = k*rho*-params.g[2]/params.nu;
+ //d_K[cellidx] = k*rho*-params.g[2]/params.nu;
+ d_K[cellidx] = 0.5;
// Hydraulic storativity [-]
d_Ss[cellidx] = 8.0e-3;
//d_Ss[cellidx] = 1.0;
- // Hydraulic recharge [Pa/s]
+ // Hydraulic recharge [s^-1]
d_W[cellidx] = 0.0;
}
}
}
+
+ // Extract water from all cells in center
+ ix = d_nx/2-1; iy = d_ny/2-1;
+ Float cellvolume = d_dx*d_dy*d_dz;
+ for (iz=0; iz<d_nz; ++iz) {
+ //d_W[idx(ix,iy,iz)] = -0.1*cellvolume;
+ d_W[idx(ix,iy,iz)] = -1.0;
+ }
}
t@@ -169,7 +181,12 @@ void DEM::findDarcyTransmissivities()
// Kozeny-Carman parameter
//Float a = 1.0e-8;
- Float a = 1.0;
+ //Float a = 1.0;
+
+ // Representative grain radius
+ Float r_bar2 = meanRadius()*2.0;
+ // Grain size factor for Kozeny-Carman relationship
+ Float d_factor = r_bar2*r_bar2/180.0;
unsigned int ix, iy, iz, cellidx;
Float K, k;
t@@ -185,7 +202,9 @@ void DEM::findDarcyTransmissivities()
// Calculate permeability from the Kozeny-Carman relationship
// Nelson 1994 eq. 1c
// Boek 2012 eq. 16
- k = a*phi*phi*phi/(1.0 - phi*phi);
+ //k = a*phi*phi*phi/(1.0 - phi*phi);
+ // Schwartz and Zhang 2003
+ k = phi*phi*phi/((1.0-phi)*(1.0-phi)) * d_factor;
// Save hydraulic conductivity [m/s]
//K = d_K[cellidx];
t@@ -261,7 +280,8 @@ void DEM::findDarcyGradients()
// With ghost-nodes
for (ix=0; ix<d_nx; ++ix) {
for (iy=0; iy<d_ny; ++iy) {
- for (iz=1; iz<d_nz-1; ++iz) {
+ //for (iz=1; iz<d_nz-1; ++iz) {
+ for (iz=0; iz<d_nz; ++iz) {
cellidx = idx(ix,iy,iz);
t@@ -335,9 +355,10 @@ void DEM::explDarcyStep()
checkDarcyTimestep();
// Cell dims squared
- const Float dx2 = d_dx*d_dx;
- const Float dy2 = d_dy*d_dy;
- const Float dz2 = d_dz*d_dz;
+ const Float dxdx = d_dx*d_dx;
+ const Float dydy = d_dy*d_dy;
+ const Float dzdz = d_dz*d_dz;
+ const Float dxdydz = d_dx*d_dy*d_dz;
//setDarcyBCNeumannZero();
t@@ -358,95 +379,104 @@ void DEM::explDarcyStep()
// Cell linear index
cellidx = idx(ix,iy,iz);
- // If x,y,z boundaries are fixed values:
- // Enforce Dirichlet BC
- if (ix == 0 || iy == 0 || iz == 0 ||
+ // If x,y,z boundaries are fixed values: Enforce Dirichlet BC
+ /*if (ix == 0 || iy == 0 || iz == 0 ||
ix == d_nx-1 || iy == d_ny-1 || iz == d_nz-1) {
+ d_H_new[cellidx] = d_H[cellidx];*/
+
+ // If z boundaries are fixed val, x and y are periodic:
+ /*if (iz == 0 || iz == d_nz-1) {
d_H_new[cellidx] = d_H[cellidx];
- // If z boundaries are periodic:
- //if (iz == 0 || iz == d_nz-1) {
- //d_H_new[cellidx] = d_H[cellidx];
- } else {
- // Cell hydraulic conductivity
- K = d_K[cellidx];
-
- // Cell hydraulic transmissivities
- Tx = K*d_dx;
- Ty = K*d_dy;
- Tz = K*d_dz;
-
- // Cell hydraulic head
- H = d_H[cellidx];
-
- // Harmonic mean of transmissivity
- // (in neg. and pos. direction along axis from cell)
- // with periodic x and y boundaries
- // without ghost nodes
- /*
- if (ix == 0)
- gradx_n = hmean(Tx, d_T[idx(d_nx-1,iy,iz)].x)
- * (d_H[idx(d_nx-1,iy,iz)] - H)/dx2;
- else
- gradx_n = hmean(Tx, d_T[idx(ix-1,iy,iz)].x)
- * (d_H[idx(ix-1,iy,iz)] - H)/dx2;
-
- if (ix == d_nx-1)
- gradx_p = hmean(Tx, d_T[idx(0,iy,iz)].x)
- * (d_H[idx(0,iy,iz)] - H)/dx2;
- else
- gradx_p = hmean(Tx, d_T[idx(ix+1,iy,iz)].x)
- * (d_H[idx(ix+1,iy,iz)] - H)/dx2;
-
- if (iy == 0)
- grady_n = hmean(Ty, d_T[idx(ix,d_ny-1,iz)].y)
- * (d_H[idx(ix,d_ny-1,iz)] - H)/dy2;
- else
- grady_n = hmean(Ty, d_T[idx(ix,iy-1,iz)].y)
- * (d_H[idx(ix,iy-1,iz)] - H)/dy2;
-
- if (iy == d_ny-1)
- grady_p = hmean(Ty, d_T[idx(ix,0,iz)].y)
- * (d_H[idx(ix,0,iz)] - H)/dy2;
- else
- grady_p = hmean(Ty, d_T[idx(ix,iy+1,iz)].y)
- * (d_H[idx(ix,iy+1,iz)] - H)/dy2;
- */
+ } else {*/
+
+ // Cell hydraulic conductivity
+ K = d_K[cellidx];
+
+ // Cell hydraulic transmissivities
+ Tx = K*d_dx;
+ Ty = K*d_dy;
+ Tz = K*d_dz;
+ // Cell hydraulic head
+ H = d_H[cellidx];
+
+ // Harmonic mean of transmissivity
+ // (in neg. and pos. direction along axis from cell)
+ // with periodic x and y boundaries
+ // without ghost nodes
+ /*
+ if (ix == 0)
+ gradx_n = hmean(Tx, d_T[idx(d_nx-1,iy,iz)].x)
+ * (d_H[idx(d_nx-1,iy,iz)] - H)/dx2;
+ else
gradx_n = hmean(Tx, d_T[idx(ix-1,iy,iz)].x)
* (d_H[idx(ix-1,iy,iz)] - H)/dx2;
+
+ if (ix == d_nx-1)
+ gradx_p = hmean(Tx, d_T[idx(0,iy,iz)].x)
+ * (d_H[idx(0,iy,iz)] - H)/dx2;
+ else
gradx_p = hmean(Tx, d_T[idx(ix+1,iy,iz)].x)
* (d_H[idx(ix+1,iy,iz)] - H)/dx2;
+ if (iy == 0)
+ grady_n = hmean(Ty, d_T[idx(ix,d_ny-1,iz)].y)
+ * (d_H[idx(ix,d_ny-1,iz)] - H)/dy2;
+ else
grady_n = hmean(Ty, d_T[idx(ix,iy-1,iz)].y)
* (d_H[idx(ix,iy-1,iz)] - H)/dy2;
+
+ if (iy == d_ny-1)
+ grady_p = hmean(Ty, d_T[idx(ix,0,iz)].y)
+ * (d_H[idx(ix,0,iz)] - H)/dy2;
+ else
grady_p = hmean(Ty, d_T[idx(ix,iy+1,iz)].y)
* (d_H[idx(ix,iy+1,iz)] - H)/dy2;
-
+ */
+
+ gradx_n = hmean(Tx, d_T[idx(ix-1,iy,iz)].x)
+ * (d_H[idx(ix-1,iy,iz)] - H)/dxdx;
+ gradx_p = hmean(Tx, d_T[idx(ix+1,iy,iz)].x)
+ * (d_H[idx(ix+1,iy,iz)] - H)/dxdx;
+
+ grady_n = hmean(Ty, d_T[idx(ix,iy-1,iz)].y)
+ * (d_H[idx(ix,iy-1,iz)] - H)/dydy;
+ grady_p = hmean(Ty, d_T[idx(ix,iy+1,iz)].y)
+ * (d_H[idx(ix,iy+1,iz)] - H)/dydy;
+
+ // Neumann (no-flow) boundary condition at +z and -z boundaries
+ // enforced by a gradient value of 0.0
+ if (iz == 0)
+ gradz_n = 0.0;
+ else
gradz_n = hmean(Tz, d_T[idx(ix,iy,iz-1)].z)
- * (d_H[idx(ix,iy,iz-1)] - H)/dz2;
+ * (d_H[idx(ix,iy,iz-1)] - H)/dzdz;
+ if (iz == d_nz-1)
+ gradz_p = 0.0;
+ else
gradz_p = hmean(Tz, d_T[idx(ix,iy,iz+1)].z)
- * (d_H[idx(ix,iy,iz+1)] - H)/dz2;
-
- /*std::cerr << ix << ',' << iy << ',' << iz << '\t'
- << H << '\t' << Tx << ',' << Ty << ',' << Tz << '\t'
- << gradx_n << ',' << gradx_p << '\t'
- << grady_n << ',' << grady_p << '\t'
- << gradz_n << ',' << gradz_p << std::endl;*/
-
- // Cell hydraulic storativity
- S = d_Ss[cellidx]*d_dx*d_dy*d_dz;
-
- // Laplacian operator
- deltaH = time.dt/S *
- ( gradx_n + gradx_p
- + grady_n + grady_p
- + gradz_n + gradz_p
- + d_W[cellidx] );
-
- // Calculate new hydraulic pressure in cell
- d_H_new[cellidx] = H + deltaH;
- }
+ * (d_H[idx(ix,iy,iz+1)] - H)/dzdz;
+
+ /*std::cerr << ix << ',' << iy << ',' << iz << '\t'
+ << H << '\t' << Tx << ',' << Ty << ',' << Tz << '\t'
+ << gradx_n << ',' << gradx_p << '\t'
+ << grady_n << ',' << grady_p << '\t'
+ << gradz_n << ',' << gradz_p << std::endl;*/
+
+ // Cell hydraulic storativity
+ S = d_Ss[cellidx]*dxdydz;
+
+ // Laplacian operator
+ deltaH = time.dt/S *
+ ( gradx_n + gradx_p
+ + grady_n + grady_p
+ + gradz_n + gradz_p
+ + d_W[cellidx] );
+
+ // Calculate new hydraulic pressure in cell
+ d_H_new[cellidx] = H + deltaH;
+ //}
}
}
}
t@@ -607,6 +637,7 @@ Float DEM::cellPorosity(
return phi;
}
+// Calculate the porosity for each cell
void DEM::findPorosities()
{
unsigned int ix, iy, iz, cellidx;
t@@ -619,6 +650,16 @@ void DEM::findPorosities()
}
}
+// Returns the mean particle radius
+Float DEM::meanRadius()
+{
+ unsigned int i;
+ Float r_sum;
+ for (i=0; i<np; ++i)
+ r_sum += k.x[i].w;
+ return r_sum/((Float)np);
+}
+
// Find particles with centres inside a spatial interval
// NOTE: This function is untested and unused
std::vector<unsigned int> DEM::particlesInCell(
t@@ -709,8 +750,8 @@ void DEM::checkDarcyTimestep()
if (value > 0.5) {
std::cerr << "Error! The explicit darcy solution will be unstable.\n"
<< "This happens due to a combination of the following:\n"
- << " - The transmissivity T (i.e. hydraulic conductivity, K) is too large"
- << " (" << T_max << ")\n"
+ << " - The transmissivity T (i.e. hydraulic conductivity, K)"
+ << " is too large (" << T_max << ")\n"
<< " - The storativity S is too small"
<< " (" << S_min << ")\n"
<< " - The time step is too large"
t@@ -722,7 +763,7 @@ void DEM::checkDarcyTimestep()
}
}
-// Solve Darcy flow on a regular, cubic grid
+// Initialize darcy arrays, their values, and check the time step length
void DEM::initDarcy(const Float cellsizemultiplier)
{
if (params.nu <= 0.0) {
t@@ -762,15 +803,15 @@ void DEM::initDarcy(const Float cellsizemultiplier)
// Print final heads and free memory
void DEM::endDarcy()
{
- FILE* Kfile;
+ /*FILE* Kfile;
if ((Kfile = fopen("d_K.txt","w"))) {
printDarcyArray(Kfile, d_K);
fclose(Kfile);
} else {
fprintf(stderr, "Error, could not open d_K.txt\n");
- }
- printDarcyArray(stdout, d_phi, "d_phi");
- printDarcyArray(stdout, d_K, "d_K");
+ }*/
+ //printDarcyArray(stdout, d_phi, "d_phi");
+ //printDarcyArray(stdout, d_K, "d_K");
//printDarcyArray(stdout, d_H, "d_H");
//printDarcyArray(stdout, d_V, "d_V");
freeDarcyMem();
(DIR) diff --git a/src/darcy.cuh b/src/darcy.cuh
t@@ -0,0 +1,616 @@
+// darcy.cu
+// CUDA implementation of Darcy flow
+
+// Enable line below to perform Darcy flow computations on the GPU, disable for
+// CPU computation
+#define DARCY_GPU
+
+#include <iostream>
+#include <cuda.h>
+//#include <cutil_math.h>
+#include <helper_math.h>
+
+#include "vector_arithmetic.h" // for arbitrary prec. vectors
+#include "sphere.h"
+#include "datatypes.h"
+#include "utility.cuh"
+#include "utility.h"
+#include "constants.cuh"
+#include "debug.h"
+
+// Initialize memory
+void DEM::initDarcyMemDev(void)
+{
+ // number of cells
+ //unsigned int ncells = d_nx*d_ny*d_nz; // without ghost nodes
+ unsigned int ncells = (d_nx+2)*(d_ny+2)*(d_nz+2); // with ghost nodes
+ unsigned int memSizeF = sizeof(Float) * ncells;
+
+ cudaMalloc((void**)&dev_d_H, memSizeF); // hydraulic pressure
+ cudaMalloc((void**)&dev_d_H_new, memSizeF); // new pressure matrix
+ cudaMalloc((void**)&dev_d_V, memSizeF*3); // cell hydraulic velocity
+ cudaMalloc((void**)&dev_d_dH, memSizeF*3); // hydraulic pressure gradient
+ cudaMalloc((void**)&dev_d_K, memSizeF); // hydraulic conductivity
+ cudaMalloc((void**)&dev_d_T, memSizeF*3); // hydraulic transmissivity
+ cudaMalloc((void**)&dev_d_Ss, memSizeF); // hydraulic storativi
+ cudaMalloc((void**)&dev_d_W, memSizeF); // hydraulic recharge
+ cudaMalloc((void**)&dev_d_phi, memSizeF); // cell porosity
+
+ checkForCudaErrors("End of initDarcyMemDev");
+}
+
+// Free memory
+void DEM::freeDarcyMemDev()
+{
+ cudaFree(dev_d_H);
+ cudaFree(dev_d_H_new);
+ cudaFree(dev_d_V);
+ cudaFree(dev_d_dH);
+ cudaFree(dev_d_K);
+ cudaFree(dev_d_T);
+ cudaFree(dev_d_Ss);
+ cudaFree(dev_d_W);
+ cudaFree(dev_d_phi);
+}
+
+// Transfer to device
+void DEM::transferDarcyToGlobalDeviceMemory(int statusmsg)
+{
+ checkForCudaErrors("Before attempting cudaMemcpy in "
+ "transferDarcyToGlobalDeviceMemory");
+
+ if (verbose == 1 && statusmsg == 1)
+ std::cout << " Transfering darcy data to the device: ";
+
+ // number of cells
+ //unsigned int ncells = d_nx*d_ny*d_nz; // without ghost nodes
+ unsigned int ncells = (d_nx+2)*(d_ny+2)*(d_nz+2); // with ghost nodes
+ unsigned int memSizeF = sizeof(Float) * ncells;
+
+ // Kinematic particle values
+ cudaMemcpy(dev_d_H, d_H, memSizeF, cudaMemcpyHostToDevice);
+ checkForCudaErrors("transferDarcyToGlobalDeviceMemory after first cudaMemcpy");
+ cudaMemcpy(dev_d_H_new, d_H_new, memSizeF, cudaMemcpyHostToDevice);
+ cudaMemcpy(dev_d_V, d_V, memSizeF*3, cudaMemcpyHostToDevice);
+ cudaMemcpy(dev_d_dH, d_dH, memSizeF*3, cudaMemcpyHostToDevice);
+ cudaMemcpy(dev_d_K, d_K, memSizeF, cudaMemcpyHostToDevice);
+ cudaMemcpy(dev_d_T, d_T, memSizeF*3, cudaMemcpyHostToDevice);
+ cudaMemcpy(dev_d_Ss, d_Ss, memSizeF, cudaMemcpyHostToDevice);
+ cudaMemcpy(dev_d_W, d_W, memSizeF, cudaMemcpyHostToDevice);
+ cudaMemcpy(dev_d_phi, d_phi, memSizeF, cudaMemcpyHostToDevice);
+
+ checkForCudaErrors("End of transferDarcyToGlobalDeviceMemory");
+ if (verbose == 1 && statusmsg == 1)
+ std::cout << "Done" << std::endl;
+}
+
+// Transfer from device
+void DEM::transferDarcyFromGlobalDeviceMemory(int statusmsg)
+{
+ if (verbose == 1 && statusmsg == 1)
+ std::cout << " Transfering darcy data from the device: ";
+
+ // number of cells
+ //unsigned int ncells = d_nx*d_ny*d_nz; // without ghost nodes
+ unsigned int ncells = (d_nx+2)*(d_ny+2)*(d_nz+2); // with ghost nodes
+ unsigned int memSizeF = sizeof(Float) * ncells;
+
+ // Kinematic particle values
+ cudaMemcpy(d_H, dev_d_H, memSizeF, cudaMemcpyDeviceToHost);
+ cudaMemcpy(d_H_new, dev_d_H_new, memSizeF, cudaMemcpyDeviceToHost);
+ cudaMemcpy(d_V, dev_d_V, memSizeF*3, cudaMemcpyDeviceToHost);
+ cudaMemcpy(d_dH, dev_d_dH, memSizeF*3, cudaMemcpyDeviceToHost);
+ cudaMemcpy(d_K, dev_d_K, memSizeF, cudaMemcpyDeviceToHost);
+ cudaMemcpy(d_T, dev_d_T, memSizeF*3, cudaMemcpyDeviceToHost);
+ cudaMemcpy(d_Ss, dev_d_Ss, memSizeF, cudaMemcpyDeviceToHost);
+ cudaMemcpy(d_W, dev_d_W, memSizeF, cudaMemcpyDeviceToHost);
+ cudaMemcpy(d_phi, dev_d_phi, memSizeF, cudaMemcpyDeviceToHost);
+
+ checkForCudaErrors("End of transferDarcyFromGlobalDeviceMemory");
+ if (verbose == 1 && statusmsg == 1)
+ std::cout << "Done" << std::endl;
+}
+
+// Get linear index from 3D grid position
+__device__ unsigned int idx(
+ const int x, const int y, const int z)
+{
+ // without ghost nodes
+ //return x + dev_grid.num[0]*y + dev_grid.num[0]*dev_grid.num[1]*z;
+
+ // with ghost nodes
+ // the ghost nodes are placed at x,y,z = -1 and WIDTH
+ return (x+1) + (devC_grid.num[0]+2)*(y+1) +
+ (devC_grid.num[0]+2)*(devC_grid.num[1]+2)*(z+1);
+}
+
+__device__ void copyDarcyValsDev(
+ unsigned int read, unsigned int write,
+ Float* dev_d_H, Float* dev_d_H_new,
+ Float3* dev_d_V, Float3* dev_d_dH,
+ Float* dev_d_K, Float3* dev_d_T,
+ Float* dev_d_Ss, Float* dev_d_W,
+ Float* dev_d_phi)
+{
+ // Coalesced read
+ const Float H = dev_d_H[read];
+ const Float H_new = dev_d_H_new[read];
+ const Float3 V = dev_d_V[read];
+ const Float3 dH = dev_d_dH[read];
+ const Float K = dev_d_K[read];
+ const Float3 T = dev_d_T[read];
+ const Float Ss = dev_d_Ss[read];
+ const Float W = dev_d_W[read];
+ const Float phi = dev_d_phi[read];
+
+ // Coalesced write
+ __syncthreads();
+ dev_d_H[write] = H;
+ dev_d_H_new[write] = H_new;
+ dev_d_V[write] = V;
+ dev_d_dH[write] = dH;
+ dev_d_K[write] = K;
+ dev_d_T[write] = T;
+ dev_d_Ss[write] = Ss;
+ dev_d_W[write] = W;
+ dev_d_phi[write] = phi;
+}
+
+// Update ghost nodes from their parent cell values
+// The edge (diagonal) cells are not written since they are note read
+// Launch this kernel for all cells in the grid
+__global__ void setDarcyGhostNodesDev(
+ Float* dev_d_H, Float* dev_d_H_new,
+ Float3* dev_d_V, Float3* dev_d_dH,
+ Float* dev_d_K, Float3* dev_d_T,
+ Float* dev_d_Ss, Float* dev_d_W,
+ Float* dev_d_phi)
+{
+ // 3D thread index
+ const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
+ const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
+ const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
+
+ // Grid dimensions
+ const unsigned int nx = devC_grid.num[0];
+ const unsigned int ny = devC_grid.num[1];
+ const unsigned int nz = devC_grid.num[2];
+
+ // 1D thread index
+ const unsigned int cellidx = idx(x,y,z);
+
+ // 1D position of ghost node
+ unsigned int writeidx;
+
+ // check that we are not outside the fluid grid
+ if (x < nx && y < ny && z < nz) {
+
+ if (x == 0) {
+ writeidx = idx(nx,y,z);
+ copyDarcyValsDev(cellidx, writeidx,
+ dev_d_H, dev_d_H_new,
+ dev_d_V, dev_d_dH,
+ dev_d_K, dev_d_T,
+ dev_d_Ss, dev_d_W,
+ dev_d_phi);
+ }
+ if (x == nx-1) {
+ writeidx = idx(-1,y,z);
+ copyDarcyValsDev(cellidx, writeidx,
+ dev_d_H, dev_d_H_new,
+ dev_d_V, dev_d_dH,
+ dev_d_K, dev_d_T,
+ dev_d_Ss, dev_d_W,
+ dev_d_phi);
+ }
+
+ if (y == 0) {
+ writeidx = idx(x,ny,z);
+ copyDarcyValsDev(cellidx, writeidx,
+ dev_d_H, dev_d_H_new,
+ dev_d_V, dev_d_dH,
+ dev_d_K, dev_d_T,
+ dev_d_Ss, dev_d_W,
+ dev_d_phi);
+ }
+ if (y == ny-1) {
+ writeidx = idx(x,-1,z);
+ copyDarcyValsDev(cellidx, writeidx,
+ dev_d_H, dev_d_H_new,
+ dev_d_V, dev_d_dH,
+ dev_d_K, dev_d_T,
+ dev_d_Ss, dev_d_W,
+ dev_d_phi);
+ }
+
+ if (z == 0) {
+ writeidx = idx(x,y,nz);
+ copyDarcyValsDev(cellidx, writeidx,
+ dev_d_H, dev_d_H_new,
+ dev_d_V, dev_d_dH,
+ dev_d_K, dev_d_T,
+ dev_d_Ss, dev_d_W,
+ dev_d_phi);
+ }
+ if (z == nz-1) {
+ writeidx = idx(x,y,-1);
+ copyDarcyValsDev(cellidx, writeidx,
+ dev_d_H, dev_d_H_new,
+ dev_d_V, dev_d_dH,
+ dev_d_K, dev_d_T,
+ dev_d_Ss, dev_d_W,
+ dev_d_phi);
+ }
+ }
+}
+
+// Find the porosity in each cell
+__global__ void findPorositiesDev(
+ unsigned int* dev_cellStart,
+ unsigned int* dev_cellEnd,
+ Float4* dev_x_sorted,
+ Float* dev_d_phi)
+{
+ // 3D thread index
+ const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
+ const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
+ const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
+
+ // Grid dimensions
+ const unsigned int nx = devC_grid.num[0];
+ const unsigned int ny = devC_grid.num[1];
+ const unsigned int nz = devC_grid.num[2];
+
+ // Cell dimensions
+ const Float dx = devC_grid.L[0]/nx;
+ const Float dy = devC_grid.L[1]/ny;
+ const Float dz = devC_grid.L[2]/nz;
+ const Float cell_volume = dx*dy*dz;
+
+ Float void_volume = cell_volume;
+ Float4 xr; // particle pos. and radius
+
+ // check that we are not outside the fluid grid
+ if (x < nx && y < ny && z < nz) {
+
+ // Calculate linear cell ID
+ const unsigned int cellID = x + y*devC_grid.num[0]
+ + (devC_grid.num[0] * devC_grid.num[1])*z;
+
+ // Lowest particle index in cell
+ const unsigned int startIdx = dev_cellStart[cellID];
+
+ // Highest particle index in cell
+ const unsigned int endIdx = dev_cellEnd[cellID];
+
+ // Iterate over cell particles
+ for (unsigned int i = startIdx; i<endIdx; ++i) {
+
+ // Read particle position and radius
+ __syncthreads();
+ xr = dev_x_sorted[i];
+
+ // Subtract particle volume from void volume
+ void_volume -= 4.0/3.0*M_PI*xr.w*xr.w*xr.w;
+ }
+
+ // Make sure that the porosity is in the interval ]0.0;1.0[
+ const Float phi = fmin(0.99, fmax(0.01, void_volume/cell_volume));
+
+ // Save porosity
+ __syncthreads();
+
+ dev_d_phi[idx(x,y,z)] = phi;
+ }
+}
+
+
+// Find cell transmissivities from hydraulic conductivities and cell dimensions
+// Make sure to compute the porosities (d_phi) beforehand
+// d_factor: Grain size factor for Kozeny-Carman relationship
+__global__ void findDarcyTransmissivitiesDev(
+ Float* dev_d_K,
+ Float3* dev_d_T,
+ Float* dev_d_phi,
+ Float d_factor)
+{
+ // 3D thread index
+ const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
+ const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
+ const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
+
+ // Grid dimensions
+ const unsigned int nx = devC_grid.num[0];
+ const unsigned int ny = devC_grid.num[1];
+ const unsigned int nz = devC_grid.num[2];
+
+ // Grid sizes
+ const Float d_dx = devC_grid.L[0]/nx;
+ const Float d_dy = devC_grid.L[1]/ny;
+ const Float d_dz = devC_grid.L[2]/nz;
+
+ // Density of the fluid [kg/m^3]
+ const Float rho = 1000.0;
+
+ // Check that we are not outside the fluid grid
+ if (x < nx && y < ny && z < nz) {
+
+ // 1D thread index
+ const unsigned int cellidx = idx(x,y,z);
+
+ __syncthreads();
+
+ // Cell porosity [-]
+ const Float phi = dev_d_phi[cellidx];
+
+ // Calculate permeability from the Kozeny-Carman relationship
+ // Nelson 1994 eq. 1c
+ // Boek 2012 eq. 16
+ //k = a*phi*phi*phi/(1.0 - phi*phi);
+ // Schwartz and Zhang 2003
+ Float k = phi*phi*phi/((1.0-phi)*(1.0-phi)) * d_factor;
+
+
+ __syncthreads();
+
+ // Save hydraulic conductivity [m/s]
+ const Float K = k*rho*-devC_params.g[2]/devC_params.nu;
+ //K = 0.5;
+ dev_d_K[cellidx] = K;
+
+ // Hydraulic transmissivity [m2/s]
+ Float3 T = {K*d_dx, K*d_dy, K*d_dz};
+ dev_d_T[cellidx] = T;
+
+ }
+}
+
+// Find the spatial gradient in e.g.pressures per cell
+// using first order central differences
+__global__ void findDarcyGradientsDev(
+ Float* dev_scalarfield, // in
+ Float3* dev_vectorfield) // out
+{
+ // 3D thread index
+ const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
+ const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
+ const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
+
+ // Grid dimensions
+ const unsigned int nx = devC_grid.num[0];
+ const unsigned int ny = devC_grid.num[1];
+ const unsigned int nz = devC_grid.num[2];
+
+ // Grid sizes
+ const Float dx = devC_grid.L[0]/nx;
+ const Float dy = devC_grid.L[1]/ny;
+ const Float dz = devC_grid.L[2]/nz;
+
+ // 1D thread index
+ const unsigned int cellidx = idx(x,y,z);
+
+ // Check that we are not outside the fluid grid
+ Float3 gradient;
+ if (x < nx && y < ny && z < nz) {
+
+ __syncthreads();
+
+ // x
+ gradient.x =
+ (dev_scalarfield[idx(x+1,y,z)] - dev_scalarfield[idx(x-1,y,z)])
+ /(2.0*dx);
+
+ // y
+ gradient.y =
+ (dev_scalarfield[idx(x,y+1,z)] - dev_scalarfield[idx(x,y-1,z)])
+ /(2.0*dy);
+
+ // z
+ gradient.z =
+ (dev_scalarfield[idx(x,y,z+1)] - dev_scalarfield[idx(x,y,z-1)])
+ /(2.0*dz);
+
+ __syncthreads();
+ dev_vectorfield[cellidx] = gradient;
+ }
+}
+
+// Arithmetic mean of two numbers
+__device__ Float ameanDev(Float a, Float b) {
+ return (a+b)*0.5;
+}
+
+// Harmonic mean of two numbers
+__device__ Float hmeanDev(Float a, Float b) {
+ return (2.0*a*b)/(a+b);
+}
+
+// Perform an explicit step.
+__global__ void explDarcyStepDev(
+ Float* dev_d_H,
+ Float* dev_d_H_new,
+ Float3* dev_d_T,
+ Float* dev_d_Ss,
+ Float* dev_d_W)
+{
+ // 3D thread index
+ const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
+ const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
+ const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
+
+ // Grid dimensions
+ const unsigned int nx = devC_grid.num[0];
+ const unsigned int ny = devC_grid.num[1];
+ const unsigned int nz = devC_grid.num[2];
+
+ // Grid sizes
+ const Float dx = devC_grid.L[0]/nx;
+ const Float dy = devC_grid.L[1]/ny;
+ const Float dz = devC_grid.L[2]/nz;
+
+ // 1D thread index
+ const unsigned int cellidx = idx(x,y,z);
+
+ // Check that we are not outside the fluid grid
+ if (x < nx && y < ny && z < nz) {
+
+ // Explicit 3D finite difference scheme
+ // new = old + production*timestep + gradient*timestep
+
+ // Enforce Dirichlet BC
+ if (x == 0 || y == 0 || z == 0 ||
+ x == nx-1 || y == ny-1 || z == nz-1) {
+ __syncthreads();
+ dev_d_H_new[cellidx] = dev_d_H[cellidx];
+ } else {
+
+ // Cell hydraulic conductivity
+ __syncthreads();
+ //const Float K = dev_d_K[cellidx];
+
+ // Cell hydraulic transmissivities
+ const Float3 T = dev_d_T[cellidx];
+ //const Float Tx = K*dx;
+ //const Float Ty = K*dy;
+ //const Float Tz = K*dz;
+
+ // Harmonic mean of transmissivity
+ // (in neg. and pos. direction along axis from cell)
+ __syncthreads();
+ const Float Tx_n = hmeanDev(T.x, dev_d_T[idx(x-1,y,z)].x);
+ const Float Tx_p = hmeanDev(T.x, dev_d_T[idx(x+1,y,z)].x);
+ const Float Ty_n = hmeanDev(T.y, dev_d_T[idx(x,y-1,z)].y);
+ const Float Ty_p = hmeanDev(T.y, dev_d_T[idx(x,y+1,z)].y);
+ const Float Tz_n = hmeanDev(T.z, dev_d_T[idx(x,y,z-1)].z);
+ const Float Tz_p = hmeanDev(T.z, dev_d_T[idx(x,y,z+1)].z);
+
+ // Cell hydraulic storativity
+ const Float S = dev_d_Ss[cellidx]*dx*dy*dz;
+
+ // Cell hydraulic head
+ const Float H = dev_d_H[cellidx];
+
+ // Laplacian operator
+ const Float deltaH = devC_dt/S *
+ ( Tx_n * (dev_d_H[idx(x-1,y,z)] - H)/(dx*dx)
+ + Tx_p * (dev_d_H[idx(x+1,y,z)] - H)/(dx*dx)
+ + Ty_n * (dev_d_H[idx(x,y-1,z)] - H)/(dy*dy)
+ + Ty_p * (dev_d_H[idx(x,y+1,z)] - H)/(dy*dy)
+ + Tz_n * (dev_d_H[idx(x,y,z-1)] - H)/(dy*dz)
+ + Tz_p * (dev_d_H[idx(x,y,z+1)] - H)/(dy*dz)
+ + dev_d_W[cellidx] );
+
+ // Calculate new hydraulic pressure in cell
+ __syncthreads();
+ dev_d_H_new[cellidx] = H + deltaH;
+ }
+ }
+}
+
+// Find cell velocity
+__global__ void findDarcyVelocitiesDev(
+ Float* dev_d_H,
+ Float3* dev_d_dH,
+ Float3* dev_d_V,
+ Float* dev_d_phi,
+ Float* dev_d_K)
+{
+ // 3D thread index
+ const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
+ const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
+ const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
+
+ // 1D thread index
+ const unsigned int cellidx = idx(x,y,z);
+
+ // Check that we are not outside the fluid grid
+ if (x < devC_grid.num[0] && y < devC_grid.num[1] && z < devC_grid.num[2]) {
+
+ // Flux [m/s]: q = -k/nu * dH
+ // Pore velocity [m/s]: v = q/n
+
+ // Dynamic viscosity
+ Float nu = devC_params.nu;
+
+ __syncthreads();
+ const Float3 dH = dev_d_dH[cellidx];
+ const Float K = dev_d_K[cellidx];
+ const Float phi = dev_d_phi[cellidx];
+
+ // Calculate flux
+ // The sign might need to be reversed, depending on the
+ // grid orientation
+ Float3 q = MAKE_FLOAT3(
+ -K/nu * dH.x,
+ -K/nu * dH.y,
+ -K/nu * dH.z);
+
+ // Calculate velocity
+ Float3 v = MAKE_FLOAT3(
+ v.x = q.x/phi,
+ v.y = q.y/phi,
+ v.z = q.z/phi);
+
+ // Save velocity
+ __syncthreads();
+ dev_d_V[cellidx] = v;
+ }
+}
+
+// Solve Darcy flow on a regular, cubic grid
+/*void DEM::initDarcyDev(const Float cellsizemultiplier)
+{
+ if (params.nu <= 0.0) {
+ std::cerr << "Error in initDarcy. The dymamic viscosity (params.nu), "
+ << "should be larger than 0.0, but is " << params.nu << std::endl;
+ exit(1);
+ }
+
+ // Number of cells
+ d_nx = floor(grid.num[0]*cellsizemultiplier);
+ d_ny = floor(grid.num[1]*cellsizemultiplier);
+ d_nz = floor(grid.num[2]*cellsizemultiplier);
+
+ // Cell size
+ d_dx = grid.L[0]/d_nx;
+ d_dy = grid.L[1]/d_ny;
+ d_dz = grid.L[2]/d_nz;
+
+ if (verbose == 1) {
+ std::cout << " - Fluid grid dimensions: "
+ << d_nx << "*"
+ << d_ny << "*"
+ << d_nz << std::endl;
+ std::cout << " - Fluid grid cell size: "
+ << d_dx << "*"
+ << d_dy << "*"
+ << d_dz << std::endl;
+ }
+
+ initDarcyMemDev();
+ initDarcyVals();
+ findDarcyTransmissivities();
+
+ checkDarcyTimestep();
+
+ transferDarcyToGlobalDeviceMemory(1);
+}*/
+
+// Print final heads and free memory
+void DEM::endDarcyDev()
+{
+ /*FILE* Kfile;
+ if ((Kfile = fopen("d_K.txt","w"))) {
+ printDarcyArray(Kfile, d_K);
+ fclose(Kfile);
+ } else {
+ fprintf(stderr, "Error, could not open d_K.txt\n");
+ }*/
+ //printDarcyArray(stdout, d_phi, "d_phi");
+ //printDarcyArray(stdout, d_K, "d_K");
+ //printDarcyArray(stdout, d_H, "d_H");
+ //printDarcyArray(stdout, d_V, "d_V");
+ freeDarcyMemDev();
+}
+
+// vim: tabstop=8 expandtab shiftwidth=4 softtabstop=4
(DIR) diff --git a/src/device.cu b/src/device.cu
t@@ -27,7 +27,7 @@
#include "integration.cuh"
#include "raytracer.cuh"
#include "latticeboltzmann.cuh"
-//#include "darcy.cuh"
+#include "darcy.cuh"
// Wrapper function for initializing the CUDA components.
t@@ -450,15 +450,20 @@ __host__ void DEM::transferToGlobalDeviceMemory(int statusmsg)
}
// Fluid arrays
- if (params.nu > 0.0 && darcy == 0) {
+ if (params.nu > 0.0) {
+ if (darcy == 0) {
#ifdef LBM_GPU
- cudaMemcpy( dev_f, f,
- sizeof(Float)*grid.num[0]*grid.num[1]*grid.num[2]*19,
- cudaMemcpyHostToDevice);
- cudaMemcpy( dev_v_rho, v_rho,
- sizeof(Float4)*grid.num[0]*grid.num[1]*grid.num[2],
- cudaMemcpyHostToDevice);
+ cudaMemcpy( dev_f, f,
+ sizeof(Float)*grid.num[0]*grid.num[1]*grid.num[2]*19,
+ cudaMemcpyHostToDevice);
+ cudaMemcpy( dev_v_rho, v_rho,
+ sizeof(Float4)*grid.num[0]*grid.num[1]*grid.num[2],
+ cudaMemcpyHostToDevice);
#endif
+ } //else {
+ //transferDarcyToGlobalDeviceMemory(1);
+ //}
+ // Darcy arrays aren't ready yet
}
checkForCudaErrors("End of transferToGlobalDeviceMemory");
t@@ -530,16 +535,20 @@ __host__ void DEM::transferFromGlobalDeviceMemory()
sizeof(Float4)*walls.nw, cudaMemcpyDeviceToHost);
// Fluid arrays
+ if (params.nu > 0.0) {
+ if (darcy == 0) {
#ifdef LBM_GPU
- if (params.nu > 0.0 && darcy == 0) {
cudaMemcpy( f, dev_f,
sizeof(Float)*grid.num[0]*grid.num[1]*grid.num[2]*19,
cudaMemcpyDeviceToHost);
cudaMemcpy(v_rho, dev_v_rho,
sizeof(Float4)*grid.num[0]*grid.num[1]*grid.num[2],
cudaMemcpyDeviceToHost);
- }
#endif
+ } else {
+ transferDarcyFromGlobalDeviceMemory(0);
+ }
+ }
checkForCudaErrors("End of transferFromGlobalDeviceMemory");
}
t@@ -580,13 +589,13 @@ __host__ void DEM::startTime()
unsigned int blocksPerGridBonds = iDivUp(params.nb0, threadsPerBlock);
dim3 dimGridBonds(blocksPerGridBonds, 1, 1); // Blocks arranged in 1D grid
- // Use 3D block and grid layout for Lattice-Boltzmann fluid calculations
+ // Use 3D block and grid layout for fluid calculations
dim3 dimBlockFluid(8, 8, 8); // 512 threads per block
dim3 dimGridFluid(
iDivUp(grid.num[0], dimBlockFluid.x),
iDivUp(grid.num[1], dimBlockFluid.y),
iDivUp(grid.num[2], dimBlockFluid.z));
- if (dimGridFluid.z > 64) {
+ if (dimGridFluid.z > 64 && params.nu > 0.0) {
cerr << "Error: dimGridFluid.z > 64" << endl;
exit(1);
}
t@@ -634,18 +643,32 @@ __host__ void DEM::startTime()
fclose(fp);
// Initialize fluid distribution array
- if (params.nu > 0.0 && darcy == 0) {
+ Float d_factor;
+ if (params.nu > 0.0) {
+ if (darcy == 0) {
#ifdef LBM_GPU
- initFluid<<< dimGridFluid, dimBlockFluid >>>(dev_v_rho, dev_f);
- cudaThreadSynchronize();
-#else
+ initFluid<<< dimGridFluid, dimBlockFluid >>>(dev_v_rho, dev_f);
+ cudaThreadSynchronize();
+#endif
+ } else if (darcy == 1) {
#ifdef DARCY_GPU
- initFluid(v_rho, f, grid.num[0], grid.num[1], grid.num[2]);
+ const Float cellsizemultiplier = 1.0;
+ initDarcy(cellsizemultiplier);
+ initDarcyMemDev();
+ transferDarcyToGlobalDeviceMemory(1);
+
+ // Representative grain radius
+ const Float r_bar2 = meanRadius()*2.0;
+ // Grain size factor for Kozeny-Carman relationship
+ d_factor = r_bar2*r_bar2/180.0;
#else
- const Float cellsizemultiplier = 1.0;
- initDarcy(cellsizemultiplier);
-#endif
+ const Float cellsizemultiplier = 1.0;
+ initDarcy(cellsizemultiplier);
#endif
+ } else {
+ std::cerr << "Error, darcy value (" << darcy
+ << ") not understood." << std::endl;
+ }
}
if (verbose == 1) {
t@@ -677,6 +700,12 @@ __host__ void DEM::startTime()
double t_summation = 0.0;
double t_integrateWalls = 0.0;
+ double t_findPorositiesDev = 0.0;
+ double t_findDarcyTransmissivitiesDev = 0.0;
+ double t_explDarcyStepDev = 0.0;
+ double t_findDarcyGradientsDev = 0.0;
+ double t_findDarcyVelocitiesDev = 0.0;
+
if (PROFILING == 1) {
cudaEventCreate(&kernel_tic);
cudaEventCreate(&kernel_toc);
t@@ -866,17 +895,90 @@ __host__ void DEM::startTime()
}
// Solve darcy flow through grid
- if (darcy == 1) {
+ if (params.nu > 0.0 && darcy == 1) {
#ifdef DARCY_GPU
- std::cout << "GPU darcy" << std::endl;
+ checkForCudaErrors("Before findPorositiesDev", iter);
+ // Find cell porosities
+ if (PROFILING == 1)
+ startTimer(&kernel_tic);
+ findPorositiesDev<<<dimGridFluid, dimBlockFluid>>>(
+ dev_cellStart,
+ dev_cellEnd,
+ dev_x_sorted,
+ dev_d_phi);
+ cudaThreadSynchronize();
+ if (PROFILING == 1)
+ stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
+ &t_findPorositiesDev);
+ checkForCudaErrors("Post findPorositiesDev", iter);
+
+ // Find resulting cell transmissivities
+ if (PROFILING == 1)
+ startTimer(&kernel_tic);
+ findDarcyTransmissivitiesDev<<<dimGridFluid, dimBlockFluid>>>(
+ dev_d_K,
+ dev_d_T,
+ dev_d_phi,
+ d_factor);
+ cudaThreadSynchronize();
+ if (PROFILING == 1)
+ stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
+ &t_findDarcyTransmissivitiesDev);
+ checkForCudaErrors("Post findDarcyTransmissivitiesDev", iter);
+
+ // Perform explicit Darcy time step
+ if (PROFILING == 1)
+ startTimer(&kernel_tic);
+ explDarcyStepDev<<<dimGridFluid, dimBlockFluid>>>(
+ dev_d_H,
+ dev_d_H_new,
+ dev_d_T,
+ dev_d_Ss,
+ dev_d_W);
+ cudaThreadSynchronize();
+ if (PROFILING == 1)
+ stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
+ &t_explDarcyStepDev);
+ checkForCudaErrors("Post explDarcyStepDev", iter);
+
+ // Flop flop
+ swapFloatArrays(dev_d_H, dev_d_H_new);
+
+ // Find the pressure gradients
+ if (PROFILING == 1)
+ startTimer(&kernel_tic);
+ findDarcyGradientsDev<<<dimGridFluid, dimBlockFluid>>>(
+ dev_d_H, dev_d_dH);
+ cudaThreadSynchronize();
+ if (PROFILING == 1)
+ stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
+ &t_findDarcyGradientsDev);
+ checkForCudaErrors("Post findDarcyGradientsDev", iter);
+
+ // Find the pressure gradients
+ if (PROFILING == 1)
+ startTimer(&kernel_tic);
+ findDarcyVelocitiesDev<<<dimGridFluid, dimBlockFluid>>>(
+ dev_d_H,
+ dev_d_dH,
+ dev_d_V,
+ dev_d_phi,
+ dev_d_K);
+ cudaThreadSynchronize();
+ if (PROFILING == 1)
+ stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
+ &t_findDarcyVelocitiesDev);
+ checkForCudaErrors("Post findDarcyVelocitiesDev", iter);
+
+#else
// Copy device data to host memory
transferFromGlobalDeviceMemory();
// Pause the CPU thread until all CUDA calls previously issued are completed
cudaThreadSynchronize();
- // Perform explicit Darcy time step
+ // Perform a Darcy time step on the CPU
explDarcyStep();
// Transfer data from host to device memory
t@@ -884,9 +986,6 @@ __host__ void DEM::startTime()
// Pause the CPU thread until all CUDA calls previously issued are completed
cudaThreadSynchronize();
-#else
- // Perform a Darcy time step on the CPU
- explDarcyStep();
#endif
}
t@@ -1086,6 +1185,22 @@ __host__ void DEM::startTime()
<< "\t(" << 100.0*t_summation/t_sum << " %)\n"
<< " - integrateWalls:\t" << t_integrateWalls/1000.0 << " s"
<< "\t(" << 100.0*t_integrateWalls/t_sum << " %)\n";
+ if (darcy == 1) {
+ cout
+ << " - findPorositiesDev:\t" << t_findPorositiesDev/1000.0 << " s"
+ << "\t(" << 100.0*t_findPorositiesDev/t_sum << " %)\n"
+ << " - findDarcyTransmissivitiesDev:\t" <<
+ t_findDarcyTransmissivitiesDev/1000.0 << " s"
+ << "\t(" << 100.0*t_findDarcyTransmissivitiesDev/t_sum << " %)\n"
+ << " - explDarcyStepDev:\t" << t_explDarcyStepDev/1000.0 << " s"
+ << "\t(" << 100.0*t_explDarcyStepDev/t_sum << " %)\n"
+ << " - findDarcyGradientsDev:\t" << t_findDarcyGradientsDev/1000.0
+ << " s"
+ << "\t(" << 100.0*t_findDarcyGradientsDev/t_sum << " %)\n"
+ << " - findDarcyVelocitiesDev:\t"
+ << t_findDarcyVelocitiesDev/1000.0 << " s"
+ << "\t(" << 100.0*t_findDarcyVelocitiesDev/t_sum << " %)\n";
+ }
}
t@@ -1098,6 +1213,11 @@ __host__ void DEM::startTime()
delete[] k.delta_t;
#ifndef DARCY_GPU
+ if (darcy == 1) {
+ endDarcyDev();
+ endDarcy();
+ }
+#else
if (darcy == 1)
endDarcy();
#endif
(DIR) diff --git a/src/sphere.h b/src/sphere.h
t@@ -230,6 +230,9 @@ class DEM {
// Find darcy flow velocities from specific flux (q)
void findDarcyVelocities();
+ // Returns the mean particle radius
+ Float meanRadius();
+
// Get linear (1D) index from 3D coordinate
unsigned int idx(
const unsigned int x,
t@@ -238,9 +241,11 @@ class DEM {
// Initialize Darcy values and arrays
void initDarcy(const Float cellsizemultiplier = 1.0);
+ void initDarcyDev(const Float cellsizemultiplier = 1.0);
// Clean up Darcy arrays
void endDarcy();
+ void endDarcyDev();
// Check whether the explicit integration is going to meet the
// stability criteria