tFluid grid values are only read/written if nu>0.0 - sphere - GPU-based 3D discrete element method algorithm with optional fluid coupling
 (HTM) git clone git://src.adamsgaard.dk/sphere
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) LICENSE
       ---
 (DIR) commit fba2d7f021655b31ce60447fa54e3a4afc9ab5d5
 (DIR) parent 7b2baa5c83dfd64089f142f3e4f93eb7f6e91bbe
 (HTM) Author: Anders Damsgaard <adc@geo.au.dk>
       Date:   Tue, 28 May 2013 13:11:28 +0200
       
       Fluid grid values are only read/written if nu>0.0
       
       Diffstat:
         M python/sphere.py                    |      38 ++++++++++++++++---------------
         M src/device.cu                       |      44 ++++++++++++++++++++-----------
         M src/file_io.cpp                     |      39 +++++++++++++++++++++-----------
         M src/sphere.cpp                      |      36 +++++++++----------------------
         M src/sphere.h                        |       5 +++++
       
       5 files changed, 90 insertions(+), 72 deletions(-)
       ---
 (DIR) diff --git a/python/sphere.py b/python/sphere.py
       t@@ -325,17 +325,18 @@ class Spherebin:
        
                    if (fluid == True):
                        self.nu = numpy.fromfile(fh, dtype=numpy.float64, count=1)
       -                self.f_v = numpy.empty(
       -                        (self.num[0], self.num[1], self.num[2], self.nd),
       -                        dtype=numpy.float64)
       -                self.f_rho = numpy.empty((self.num[0],self.num[1],self.num[2]), dtype=numpy.float64)
       -                for z in range(self.num[2]):
       -                    for y in range(self.num[1]):
       -                        for x in range(self.num[0]):
       -                            self.f_v[x,y,z,0] = numpy.fromfile(fh, dtype=numpy.float64, count=1)
       -                            self.f_v[x,y,z,1] = numpy.fromfile(fh, dtype=numpy.float64, count=1)
       -                            self.f_v[x,y,z,2] = numpy.fromfile(fh, dtype=numpy.float64, count=1)
       -                            self.f_rho[x,y,z] = numpy.fromfile(fh, dtype=numpy.float64, count=1)
       +                if (self.nu[0] > 0.0):
       +                    self.f_v = numpy.empty(
       +                            (self.num[0], self.num[1], self.num[2], self.nd),
       +                            dtype=numpy.float64)
       +                    self.f_rho = numpy.empty((self.num[0],self.num[1],self.num[2]), dtype=numpy.float64)
       +                    for z in range(self.num[2]):
       +                        for y in range(self.num[1]):
       +                            for x in range(self.num[0]):
       +                                self.f_v[x,y,z,0] = numpy.fromfile(fh, dtype=numpy.float64, count=1)
       +                                self.f_v[x,y,z,1] = numpy.fromfile(fh, dtype=numpy.float64, count=1)
       +                                self.f_v[x,y,z,2] = numpy.fromfile(fh, dtype=numpy.float64, count=1)
       +                                self.f_rho[x,y,z] = numpy.fromfile(fh, dtype=numpy.float64, count=1)
        
        
                finally:
       t@@ -442,13 +443,14 @@ class Spherebin:
                    fh.write(self.bonds_omega_t.astype(numpy.float64))
        
                    fh.write(self.nu.astype(numpy.float64))
       -            for z in range(self.num[2]):
       -                for y in range(self.num[1]):
       -                    for x in range(self.num[0]):
       -                        fh.write(self.f_v[x,y,z,0].astype(numpy.float64))
       -                        fh.write(self.f_v[x,y,z,1].astype(numpy.float64))
       -                        fh.write(self.f_v[x,y,z,2].astype(numpy.float64))
       -                        fh.write(self.f_rho[x,y,z].astype(numpy.float64))
       +            if (self.nu[0] > 0.0):
       +                for z in range(self.num[2]):
       +                    for y in range(self.num[1]):
       +                        for x in range(self.num[0]):
       +                            fh.write(self.f_v[x,y,z,0].astype(numpy.float64))
       +                            fh.write(self.f_v[x,y,z,1].astype(numpy.float64))
       +                            fh.write(self.f_v[x,y,z,2].astype(numpy.float64))
       +                            fh.write(self.f_rho[x,y,z].astype(numpy.float64))
        
                finally:
                    if fh is not None:
 (DIR) diff --git a/src/device.cu b/src/device.cu
       t@@ -449,11 +449,14 @@ __host__ void DEM::transferToGlobalDeviceMemory()
        
            // Fluid arrays
            if (params.nu > 0.0) {
       -        cudaMemcpy( dev_f, f, sizeof(Float)*grid.num[0]*grid.num[1]*grid.num[2]*19,
       +#ifdef LBM_GPU
       +        cudaMemcpy( dev_f, f,
       +                sizeof(Float)*grid.num[0]*grid.num[1]*grid.num[2]*19,
                        cudaMemcpyHostToDevice);
                cudaMemcpy( dev_v_rho, v_rho,
       -                sizeof(Float)*grid.num[0]*grid.num[1]*grid.num[2],
       +                sizeof(Float4)*grid.num[0]*grid.num[1]*grid.num[2],
                        cudaMemcpyHostToDevice);
       +#endif
            }
        
            checkForCudaErrors("End of transferToGlobalDeviceMemory");
       t@@ -525,14 +528,16 @@ __host__ void DEM::transferFromGlobalDeviceMemory()
                    sizeof(Float4)*walls.nw, cudaMemcpyDeviceToHost);
        
            // Fluid arrays
       +#ifdef LBM_GPU
            if (params.nu > 0.0) {
                cudaMemcpy( f, dev_f,
                        sizeof(Float)*grid.num[0]*grid.num[1]*grid.num[2]*19,
                        cudaMemcpyDeviceToHost);
       -        cudaMemcpy( v_rho, dev_v_rho,
       -                sizeof(Float)*grid.num[0]*grid.num[1]*grid.num[2],
       +        cudaMemcpy(v_rho, dev_v_rho,
       +                sizeof(Float4)*grid.num[0]*grid.num[1]*grid.num[2],
                        cudaMemcpyDeviceToHost);
            }
       +#endif
        
            checkForCudaErrors("End of transferFromGlobalDeviceMemory");
        }
       t@@ -575,14 +580,10 @@ __host__ void DEM::startTime()
        
            // Use 3D block and grid layout for Lattice-Boltzmann fluid calculations
            dim3 dimBlockFluid(8, 8, 8);    // 512 threads per block
       -    //dim3 dimGridFluid(
       -            //(grid.num[2]+8-1)/8,    // Use x as the z
       -            //(grid.num[1]+8-1)/8,
       -            //(grid.num[0]+8-1)/8);
            dim3 dimGridFluid(
       -            iDivUp(grid.num[2],dimBlockFluid.x),    // Use z as x
       -            iDivUp(grid.num[1],dimBlockFluid.y),
       -            iDivUp(grid.num[0],dimBlockFluid.z));   // Use x as z
       +            iDivUp(grid.num[0], dimBlockFluid.x),
       +            iDivUp(grid.num[1], dimBlockFluid.y),
       +            iDivUp(grid.num[2], dimBlockFluid.z));
            if (dimGridFluid.z > 64) {
                cerr << "Error: dimGridFluid.z > 64" << endl;
                exit(1);
       t@@ -629,9 +630,14 @@ __host__ void DEM::startTime()
            fclose(fp);
        
            // Initialize fluid distribution array
       -    initfluid<<< dimGridFluid, dimBlockFluid >>>(dev_v_rho, dev_f);
       -    cudaThreadSynchronize();
       -    checkForCudaErrors("Post initfluid");
       +    if (params.nu > 0.0) {
       +#ifdef LBM_GPU
       +        initFluid<<< dimGridFluid, dimBlockFluid >>>(dev_v_rho, dev_f);
       +        cudaThreadSynchronize();
       +#else
       +        initFluid(v_rho, f, grid.num[0], grid.num[1], grid.num[2]);
       +#endif
       +    }
        
            if (verbose == 1) {
                cout << "\n  Entering the main calculation time loop...\n\n"
       t@@ -820,9 +826,10 @@ __host__ void DEM::startTime()
        
                // Process fluid and particle interaction in each cell
                if (params.nu > 0.0 && grid.periodic == 1) {
       +#ifdef LBM_GPU
                    if (PROFILING == 1)
                        startTimer(&kernel_tic);
       -            latticeBoltzmannD3Q19<<< dimGridFluid, dimBlockFluid>>> (
       +            latticeBoltzmannD3Q19<<<dimGridFluid, dimBlockFluid>>> (
                            dev_f,
                            dev_f_new,
                            dev_v_rho,
       t@@ -840,6 +847,13 @@ __host__ void DEM::startTime()
        
                    // Flip flop
                    swapFloatArrays(dev_f, dev_f_new);
       +#else
       +            latticeBoltzmannD3Q19(f, f_new, v_rho,
       +                    time.dt, grid, params);
       +            // Flip flop
       +            swapFloatArrays(f, f_new);
       +#endif
       +
                }
        
        
 (DIR) diff --git a/src/file_io.cpp b/src/file_io.cpp
       t@@ -238,16 +238,22 @@ void DEM::readbin(const char *target)
        
            // Read fluid parameters
            ifs.read(as_bytes(params.nu), sizeof(params.nu));
       -    v_rho = new Float4[grid.num[0]*grid.num[1]*grid.num[2]];
            f = new Float[grid.num[0]*grid.num[1]*grid.num[2]*19];
       +    f_new = new Float[grid.num[0]*grid.num[1]*grid.num[2]*19];
       +    v_rho = new Float4[grid.num[0]*grid.num[1]*grid.num[2]];
            if (params.nu > 0.0) {
       -        for (i = 0; i<grid.num[0]*grid.num[1]*grid.num[2]; ++i) {
       -            ifs.read(as_bytes(v_rho[i].x), sizeof(Float));
       -            ifs.read(as_bytes(v_rho[i].y), sizeof(Float));
       -            ifs.read(as_bytes(v_rho[i].z), sizeof(Float));
       +        unsigned int x, y, z;
       +        for (z = 0; z<grid.num[2]; ++z) {
       +            for (y = 0; y<grid.num[1]; ++y) {
       +                for (x = 0; x<grid.num[0]; ++x) {
       +                    i = x + grid.num[0]*y + grid.num[0]*grid.num[1]*z;
       +                    ifs.read(as_bytes(v_rho[i].x), sizeof(Float));
       +                    ifs.read(as_bytes(v_rho[i].y), sizeof(Float));
       +                    ifs.read(as_bytes(v_rho[i].z), sizeof(Float));
       +                    ifs.read(as_bytes(v_rho[i].w), sizeof(Float));
       +                }
       +            }
                }
       -        for (i = 0; i<grid.num[0]*grid.num[1]*grid.num[2]; ++i)
       -            ifs.read(as_bytes(v_rho[i].w), sizeof(Float));
            }
        
        
       t@@ -410,13 +416,20 @@ void DEM::writebin(const char *target)
                }
        
                ofs.write(as_bytes(params.nu), sizeof(params.nu));
       -        for (i = 0; i<grid.num[0]*grid.num[1]*grid.num[2]; ++i) {
       -            ofs.write(as_bytes(v_rho[i].x), sizeof(Float));
       -            ofs.write(as_bytes(v_rho[i].y), sizeof(Float));
       -            ofs.write(as_bytes(v_rho[i].z), sizeof(Float));
       +        unsigned int x, y, z;
       +        if (params.nu > 0.0) {
       +            for (z = 0; z<grid.num[2]; ++z) {
       +                for (y = 0; y<grid.num[1]; ++y) {
       +                    for (x = 0; x<grid.num[0]; ++x) {
       +                        i = x + grid.num[0]*y + grid.num[0]*grid.num[1]*z;
       +                        ofs.write(as_bytes(v_rho[i].x), sizeof(Float));
       +                        ofs.write(as_bytes(v_rho[i].y), sizeof(Float));
       +                        ofs.write(as_bytes(v_rho[i].z), sizeof(Float));
       +                        ofs.write(as_bytes(v_rho[i].w), sizeof(Float));
       +                    }
       +                }
       +            }
                }
       -        for (i = 0; i<grid.num[0]*grid.num[1]*grid.num[2]; ++i)
       -            ofs.write(as_bytes(v_rho[i].w), sizeof(Float));
        
                // Close file if it is still open
                if (ofs.is_open())
 (DIR) diff --git a/src/sphere.cpp b/src/sphere.cpp
       t@@ -304,6 +304,16 @@ void DEM::reportValues()
                    << grid.num[1] << " * "
                    << grid.num[2];
            cout << " cells\n";
       +    cout << "  - Domain size: ";
       +    if (nd == 1)
       +        cout << grid.L[0];
       +    else if (nd == 2)
       +        cout << grid.L[0] << " * " << grid.L[1];
       +    else 
       +        cout << grid.L[0] << " * " 
       +            << grid.L[1] << " * "
       +            << grid.L[2];
       +    cout << " m\n";
        
            cout << "  - No. of particle bonds: " << params.nb0 << endl;
        }
       t@@ -732,30 +742,4 @@ void DEM::forcechains(const std::string format, const int threedim,
        
        }
        
       -// Find hydraulic conductivities for each cell
       -
       -// Solve Darcy flow through particles
       -void DEM::startDarcy(
       -        const Float cellsizemultiplier)
       -{
       -    // Number of cells
       -    int nx = grid.L[0]/grid.num[0];
       -    int ny = grid.L[1]/grid.num[1];
       -    int nz = grid.L[2]/grid.num[2];
       -
       -    // Cell size 
       -    Float dx = grid.L[0]/nx;
       -    Float dy = grid.L[1]/nx;
       -    Float dz = grid.L[2]/nx;
       -
       -    if (verbose == 1) {
       -        std::cout << "Fluid grid dimensions: "
       -            << nx << " * "
       -            << ny << " * "
       -            << nz << std::endl;
       -    }
       -
       -
       -}
       -
        // vim: tabstop=8 expandtab shiftwidth=4 softtabstop=4
 (DIR) diff --git a/src/sphere.h b/src/sphere.h
       t@@ -4,6 +4,7 @@
        
        #include <vector>
        
       +
        #include "datatypes.h"
        
        // DEM class
       t@@ -142,6 +143,10 @@ class DEM {
                Float4 *v_rho;      // Fluid velocity v (xyz), and pressure rho (w) 
                Float4 *dev_v_rho;  // Device equivalent
        
       +        // Darcy-flow values
       +        int d_nx, d_ny, d_nz;     // Number of cells in each dim
       +        Float d_dx, d_dy, d_dz;   // Cell length in each dim
       +
        
            public:
                // Values and functions accessible from the outside