tShearmodel=2 refinements - sphere - GPU-based 3D discrete element method algorithm with optional fluid coupling
 (HTM) git clone git://src.adamsgaard.dk/sphere
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) LICENSE
       ---
 (DIR) commit 77d03f69783d2598c0920041d85d8d722c2c1868
 (DIR) parent 5e1012c4c642e3a1e740547d141434bfebe3146d
 (HTM) Author: Anders Damsgaard <adc@geo.au.dk>
       Date:   Mon, 15 Oct 2012 14:29:37 +0200
       
       Shearmodel=2 refinements
       
       Diffstat:
         M python/sphere.py                    |     101 ++++++++++++++++++++++---------
         M src/constants.cuh                   |       2 +-
         M src/contactsearch.cuh               |       6 +++---
         M src/datatypes.h                     |      11 ++++++++---
         M src/device.cu                       |      48 ++++++++++++++++++++++++++++++-
         M src/integration.cuh                 |       7 +++++--
         M src/main.cpp                        |       9 ++++++---
       
       7 files changed, 142 insertions(+), 42 deletions(-)
       ---
 (DIR) diff --git a/python/sphere.py b/python/sphere.py
       t@@ -43,7 +43,7 @@ class Spherebin:
            self.fixvel  = numpy.zeros(self.np, dtype=numpy.float64)
            self.xsum    = numpy.zeros(self.np, dtype=numpy.float64)
            self.radius  = numpy.ones(self.np, dtype=numpy.float64)
       -    self.rho     = numpy.ones(self.np, dtype=numpy.float64)
       +    self.rho     = numpy.ones(self.np, dtype=numpy.float64) * 2600.0
            self.k_n     = numpy.ones(self.np, dtype=numpy.float64) * 1.16e9
            self.k_t     = numpy.ones(self.np, dtype=numpy.float64) * 1.16e9
            self.k_r         = numpy.zeros(self.np, dtype=numpy.float64)
       t@@ -62,18 +62,19 @@ class Spherebin:
            self.bonds   = numpy.ones(self.np*4, dtype=numpy.uint32).reshape(self.np,4) * self.np
        
            # Constant, single-value physical parameters
       -    self.globalparams = numpy.zeros(1, dtype=numpy.int32)
       -    self.g            = numpy.array([0.0, 0.0, -9.80], dtype=numpy.float64)
       +    self.globalparams = numpy.ones(1, dtype=numpy.int32)
       +    self.g            = numpy.array([0.0, 0.0, 0.0], dtype=numpy.float64)
            self.kappa        = numpy.zeros(1, dtype=numpy.float64)
            self.db           = numpy.zeros(1, dtype=numpy.float64)
            self.V_b          = numpy.zeros(1, dtype=numpy.float64)
       -    self.shearmodel   = numpy.ones(1, dtype=numpy.uint32)
       +    self.shearmodel   = numpy.ones(1, dtype=numpy.uint32) * 2
        
            # Wall data
            self.nw          = numpy.ones(1, dtype=numpy.uint32) * nw
            self.wmode   = numpy.zeros(self.nw, dtype=numpy.int32)
            self.w_n     = numpy.zeros(self.nw*self.nd, dtype=numpy.float64).reshape(self.nw,self.nd)
       -    self.w_x     = numpy.zeros(self.nw, dtype=numpy.float64)
       +    self.w_n[nw-1,nd-1] = -1.0
       +    self.w_x     = numpy.ones(self.nw, dtype=numpy.float64)
            self.w_m     = numpy.zeros(self.nw, dtype=numpy.float64)
            self.w_vel   = numpy.zeros(self.nw, dtype=numpy.float64)
            self.w_force = numpy.zeros(self.nw, dtype=numpy.float64)
       t@@ -81,9 +82,9 @@ class Spherebin:
            
            # x- and y-boundary behavior
            self.periodic = numpy.zeros(1, dtype=numpy.uint32)
       -    self.gamma_wn = numpy.ones(1, dtype=numpy.float64) * 1e3
       -    self.gamma_ws = numpy.ones(1, dtype=numpy.float64) * 1e3
       -    self.gamma_wr = numpy.ones(1, dtype=numpy.float64) * 1e3
       +    self.gamma_wn = numpy.ones(1, dtype=numpy.float64) * 1.0e3
       +    self.gamma_ws = numpy.ones(1, dtype=numpy.float64) * 1.0e3
       +    self.gamma_wr = numpy.ones(1, dtype=numpy.float64) * 1.0e3
        
        
          # Read binary data
       t@@ -390,7 +391,7 @@ class Spherebin:
            self.shearmodel[0] = shearmodel
        
            # Initialize upper wall
       -    self.wmode[0] = 0        # 0: devs, 1: vel
       +    self.wmode[0] = 0        # 0: fixed, 1: devs, 2: vel
            self.w_n[0,2] = -1.0
            self.w_x[0] = self.L[2]
            self.w_m[0] = self.rho[0] * self.np * math.pi * r_max**3
       t@@ -400,23 +401,34 @@ class Spherebin:
            #self.nw[0] = numpy.ones(1, dtype=numpy.uint32) * 1
            self.nw = numpy.ones(1, dtype=numpy.uint32) * 1
        
       -  # Fit cell number according to world dimensions
       -  def fitNum(self):
       -    """ Create cells. cellsize is increased from 2*r_max until it fits 
       -        the horizontal grid.
       -        Call after setting self.L, self.num and self.radius
       +    
       +  # Generate grid based on particle positions
       +  def initGrid(self):
       +    """ Initialize grid suitable for the particle positions set previously.
       +        The margin parameter adjusts the distance (in no. of max. radii)
       +        from the particle boundaries.
            """
       +
       +
       +    # Cell configuration
            cellsize_min = 2.1 * numpy.amax(self.radius)
            self.num[0] = numpy.ceil((self.L[0]-self.origo[0])/cellsize_min)
            self.num[1] = numpy.ceil((self.L[1]-self.origo[1])/cellsize_min)
            self.num[2] = numpy.ceil((self.L[2]-self.origo[2])/cellsize_min)
        
       +    if (self.num[0] < 4 or self.num[1] < 4 or self.num[2] < 4):
       +      print("Error: The grid must be at least 3 cells in each direction")
       +      print(" Grid: x={}, y={}, z={}".format(self.num[0], self.num[1], self.num[2]))
       +
       +    # Put upper wall at top boundary
       +    self.w_x[0] = self.L[0]
       +
        
          # Generate grid based on particle positions
       -  def initGrid(self, g = numpy.array([0.0, 0.0, -9.80665]),
       -               margin = numpy.array([2.0, 2.0, 2.0]),
       -               periodic = 1,
       -               shearmodel = 2):
       +  def initGridAndWorldsize(self, g = numpy.array([0.0, 0.0, -9.80665]),
       +                           margin = numpy.array([2.0, 2.0, 2.0]),
       +                           periodic = 1,
       +                           shearmodel = 2):
            """ Initialize grid suitable for the particle positions set previously.
                The margin parameter adjusts the distance (in no. of max. radii)
                from the particle boundaries.
       t@@ -438,8 +450,13 @@ class Spherebin:
                                  numpy.amax(self.x[:,2] + self.radius[:])]) \
                                  + margin*r_max
        
       -    # Fit cell size to world dimensions
       -    fitNum()
       +    cellsize_min = 2.1 * r_max        
       +    self.num[0] = numpy.ceil((self.L[0]-self.origo[0])/cellsize_min)
       +    self.num[1] = numpy.ceil((self.L[1]-self.origo[1])/cellsize_min)
       +    self.num[2] = numpy.ceil((self.L[2]-self.origo[2])/cellsize_min)
       +
       +    if (self.num[0] < 4 or self.num[1] < 4 or self.num[2]):
       +      print("Error: The grid must be at least 3 cells in each direction")
        
            self.shearmodel[0] = shearmodel
        
       t@@ -603,7 +620,7 @@ class Spherebin:
            self.angpos  = numpy.zeros(self.np*self.nd, dtype=numpy.float64).reshape(self.np, self.nd)
        
            # Initialize upper wall
       -    self.wmode[0] = 0
       +    self.wmode[0] = 1 # devs
            self.w_n[0,2] = -1.0
            self.w_x[0] = self.L[2]
            self.w_m[0] = self.rho[0] * self.np * math.pi * (cellsize/2.0)**3
       t@@ -633,7 +650,7 @@ class Spherebin:
            self.angpos  = numpy.zeros(self.np*self.nd, dtype=numpy.float64).reshape(self.np, self.nd)
        
            # Initialize upper wall
       -    self.wmode[0] = 1
       +    self.wmode[0] = 2 # strain rate controlled
            self.w_n[0,2] = -1.0
            self.w_x[0] = self.L[2]
            self.w_m[0] = self.rho[0] * self.np * math.pi * (cellsize/2.0)**3
       t@@ -929,31 +946,57 @@ class Spherebin:
        
        
        def render(binary,
       -           out = '~/img_out/rt-out',
       +           out = '../img_out/out',
                   graphicsformat = 'jpg',
                   resolution = numpy.array([800, 800]),
                   workhorse = 'GPU',
                   method = 'pressure',
                   max_val = 4e3,
       -           rt_bin = '../raytracer/rt'):
       +           rt_bin = '../raytracer/rt',
       +           verbose=True):
          """ Render target binary using the sphere raytracer.
          """
        
       +  stdout = ""
       +  if (verbose == False):
       +    stdout = " > /dev/null"
       +
          # Use raytracer to render the scene into a temporary PPM file
          if workhorse == 'GPU':
       -    subprocess.call(rt_bin + " GPU {0} {1} {2} {3}.ppm {4} {5}" \
       -        .format(binary, resolution[0], resolution[1], out, method, max_val), shell=True)
       +    subprocess.call(rt_bin + " GPU {0} {1} {2} {3}.ppm {4} {5}"\
       +        .format(binary, resolution[0], resolution[1], out, method, max_val) + stdout, shell=True)
          if workhorse == 'CPU':
       -    subprocess.call(rt_bin + " CPU {0} {1} {2} {3}.ppm" \
       -        .format(binary, resolution[0], resolution[1], out), shell=True)
       +    subprocess.call(rt_bin + " CPU {0} {1} {2} {3}.ppm"\
       +        .format(binary, resolution[0], resolution[1], out) + stdout, shell=True)
        
          # Use ImageMagick's convert command to change the graphics format
       -  subprocess.call("convert {0}.ppm {0}.{1}" \
       +  subprocess.call("convert {0}.ppm {0}.{1}"\
              .format(out, graphicsformat), shell=True)
        
          # Delete temporary PPM file
          subprocess.call("rm {0}.ppm".format(out), shell=True)
          
       +def renderAll(project,
       +                  rt_bin = "~/code/sphere/raytracer/rt",
       +              out_folder = "../img_out",
       +              graphics_format = "png",
       +              workhorse = "GPU",
       +              method = "pressure",
       +              max_val = 10e3,
       +              resolution = numpy.array([800, 800])):
       +
       +  lastfile = status(project)
       +
       +  for i in range(lastfile+1):
       +    # Input binary filename
       +    fn = "../output/{0}.output{1}.bin".format(project, i)
       +
       +    # Output image name (without extension)
       +    out = out_folder + "/{0}.output{1}".format(project, i)
       +    
       +    # Call raytracer, also converts to format
       +    render(fn, out, graphics_format, resolution, workhorse, method, max_val, rt_bin, verbose = False)
       +
          
        def visualize(project, method = 'energy', savefig = False, outformat = 'png'):
          """ Visualize output from the target project,
 (DIR) diff --git a/src/constants.cuh b/src/constants.cuh
       t@@ -9,7 +9,7 @@ __constant__ Float        devC_dt;           // Time step length
        __constant__ int          devC_global;           // Parameter properties, 1: global, 0: individual
        __constant__ Float        devC_g[ND];           // Gravitational acceleration vector
        __constant__ unsigned int devC_np;           // Number of particles
       -__constant__ char          devC_nc;           // Max. number of contacts a particle can have
       +__constant__ int          devC_nc;           // Max. number of contacts a particle can have
        __constant__ unsigned int devC_shearmodel; // Shear force model: 1: viscous, frictional, 2: elastic, frictional
        __constant__ Float        devC_k_n;           // Material normal stiffness
        __constant__ Float        devC_k_t;           // Material tangential stiffness
 (DIR) diff --git a/src/contactsearch.cuh b/src/contactsearch.cuh
       t@@ -437,7 +437,7 @@ __global__ void interact(unsigned int* dev_gridParticleIndex, // Input: Unsorted
        
              // Loop over all possible contacts, and remove contacts
              // that no longer are valid (delta_n > 0.0)
       -      for (int i = 0; i<(int)devC_nc; ++i) {
       +      for (int i = 0; i<devC_nc; ++i) {
                mempos = (unsigned int)(idx_a_orig * devC_nc + i);
                __syncthreads();
                idx_b_orig = dev_contacts[mempos];
       t@@ -484,11 +484,11 @@ __global__ void interact(unsigned int* dev_gridParticleIndex, // Input: Unsorted
                    dev_delta_t[mempos] = MAKE_FLOAT4(0.0f, 0.0f, 0.0f, 0.0f);
                  }
        
       -        }/* else { // if dev_contacts[mempos] == devC_np
       +        } else { // if dev_contacts[mempos] == devC_np
                  __syncthreads();
                  // Zero sum of shear displacement in this position
                  dev_delta_t[mempos] = MAKE_FLOAT4(0.0f, 0.0f, 0.0f, 0.0f);
       -        } */
       +        } 
              } // Contact loop end
        
        
 (DIR) diff --git a/src/datatypes.h b/src/datatypes.h
       t@@ -10,10 +10,15 @@
        
        
        // Enable profiling of kernel runtimes?
       -// 0: No
       +// 0: No (default)
        // 1: Yes
        #define PROFILING 0
        
       +// Output information about contacts to stdout?
       +// 0: No (default)
       +// 1: Yes
       +#define CONTACTINFO 0
       +
        
        //////////////////////
        // TYPE DEFINITIONS //
       t@@ -60,8 +65,8 @@ const unsigned int ND = 3;
        const Float VERS = 0.25;
        
        // Max. number of contacts per particle
       -//const char NC = 16;
       -const char NC = 32;
       +//const int NC = 16;
       +const int NC = 32;
        
        
        ///////////////////////////
 (DIR) diff --git a/src/device.cu b/src/device.cu
       t@@ -95,7 +95,7 @@ __host__ void transferToConstantMemory(Particles* p,
          cout << "\n  Transfering data to constant device memory:     ";
        
          cudaMemcpyToSymbol("devC_np", &p->np, sizeof(p->np));
       -  cudaMemcpyToSymbol("devC_nc", &NC, sizeof(char));
       +  cudaMemcpyToSymbol("devC_nc", &NC, sizeof(int));
          cudaMemcpyToSymbol("devC_origo", grid->origo, sizeof(Float)*ND);
          cudaMemcpyToSymbol("devC_L", grid->L, sizeof(Float)*ND);
          cudaMemcpyToSymbol("devC_num", grid->num, sizeof(unsigned int)*ND);
       t@@ -217,11 +217,14 @@ __host__ void gpuMain(Float4* host_x,
        
          // Particle contact bookkeeping
          unsigned int* dev_contacts;
       +  unsigned int* host_contacts = new unsigned int[p->np*NC];
          // Particle pair distance correction across periodic boundaries
          Float4* dev_distmod;
       +  Float4* host_distmod = new Float4[p->np*NC];
          // x,y,z contains the interparticle vector, corrected if contact 
          // is across a periodic boundary. 
          Float4* dev_delta_t; // Accumulated shear distance of contact
       +  Float4* host_delta_t = new Float4[p->np*NC];
        
          // Particle memory size
          unsigned int memSizeF  = sizeof(Float) * p->np;
       t@@ -616,6 +619,14 @@ __host__ void gpuMain(Float4* host_x,
              cudaMemcpy(host_w_mvfd, dev_w_mvfd, 
                           sizeof(Float)*params->nw*4, cudaMemcpyDeviceToHost);
        
       +  
       +      // Contact information
       +      if (CONTACTINFO == 1) {
       +        cudaMemcpy(host_contacts, dev_contacts, sizeof(unsigned int)*p->np*NC, cudaMemcpyDeviceToHost);
       +        cudaMemcpy(host_delta_t, dev_delta_t, memSizeF4*NC, cudaMemcpyDeviceToHost);
       +        cudaMemcpy(host_distmod, dev_distmod, memSizeF4*NC, cudaMemcpyDeviceToHost);
       +      }
       +
              // Pause the CPU thread until all CUDA calls previously issued are completed
              cudaThreadSynchronize();
        
       t@@ -633,6 +644,36 @@ __host__ void gpuMain(Float4* host_x,
                exit(EXIT_FAILURE);
              }
        
       +      if (CONTACTINFO == 1) {
       +        // Write contact information to stdout
       +        cout << "\n\n---------------------------\n"
       +             << "t = " << time->current << " s.\n"
       +             << "---------------------------\n";
       +
       +        for (int n = 0; n < p->np; ++n) {
       +          cout << "\n## Particle " << n << " ##\n";
       +
       +          cout  << "- contacts:\n";
       +          for (int nc = 0; nc < NC; ++nc) 
       +            cout << "[" << nc << "]=" << host_contacts[nc+NC*n] << '\n';
       +
       +          cout << "\n- delta_t:\n";
       +          for (int nc = 0; nc < NC; ++nc) 
       +            cout << host_delta_t[nc+NC*n].x << '\t'
       +                 << host_delta_t[nc+NC*n].y << '\t'
       +                 << host_delta_t[nc+NC*n].z << '\t'
       +                 << host_delta_t[nc+NC*n].w << '\n';
       +
       +          cout << "\n- distmod:\n";
       +          for (int nc = 0; nc < NC; ++nc) 
       +            cout << host_distmod[nc+NC*n].x << '\t'
       +                 << host_distmod[nc+NC*n].y << '\t'
       +                 << host_distmod[nc+NC*n].z << '\t'
       +                 << host_distmod[nc+NC*n].w << '\n';
       +        }
       +        cout << '\n';
       +      }
       +
              // Update status.dat at the interval of filetime 
              sprintf(file,"%s/output/%s.status.dat", cwd, inputbin);
              fp = fopen(file, "w");
       t@@ -726,5 +767,10 @@ __host__ void gpuMain(Float4* host_x,
          cudaFree(dev_w_force);
          cudaFree(dev_w_force_partial);
        
       +  // Contact info arrays
       +  delete [] host_contacts;
       +  delete [] host_distmod;
       +  deleteĀ [] host_delta_t;
       +
          printf("Done\n");
        } /* EOF */
 (DIR) diff --git a/src/integration.cuh b/src/integration.cuh
       t@@ -181,9 +181,12 @@ __global__ void integrateWalls(Float4* dev_w_nx,
            // write-after-read, or write-after-write hazards. 
            Float4 w_nx   = dev_w_nx[idx];
            Float4 w_mvfd = dev_w_mvfd[idx];
       -    int wmode = devC_wmode[idx];  // Wall BC, 0: devs, 1: vel
       +    int wmode = devC_wmode[idx];  // Wall BC, 0: fixed, 1: devs, 2: vel
            Float acc;
        
       +    if (wmode == 0) // Wall fixed: do nothing
       +      return;
       +
            // Find the final sum of forces on wall
            w_mvfd.z = 0.0f;
            for (int i=0; i<blocksPerGrid; ++i) {
       t@@ -201,7 +204,7 @@ __global__ void integrateWalls(Float4* dev_w_nx,
            acc = (w_mvfd.z + N)/w_mvfd.x;
        
            // If Wall BC is controlled by velocity, it should not change
       -    if (wmode == 1) { 
       +    if (wmode == 2) { 
              acc = 0.0f;
            }
        
 (DIR) diff --git a/src/main.cpp b/src/main.cpp
       t@@ -180,7 +180,7 @@ int main(int argc, char *argv[])
          p.es         = new Float[p.np];      // Total shear energy dissipation
          p.ev         = new Float[p.np];      // Total viscous energy dissipation
          p.p               = new Float[p.np];      // Pressure excerted onto particle
       -  params.wmode = new int[MAXWALLS];    // Wall BC's, 0: devs, 1: vel
       +  params.wmode = new int[MAXWALLS];    // Wall BC's, 0: fixed, 1: devs, 2: vel
        
          // Allocate Float4 host arrays
          Float4 *host_x      = new Float4[p.np];  // Center coordinates for each particle (x)
       t@@ -300,6 +300,7 @@ int main(int argc, char *argv[])
            if (fread(&params.g[i], sizeof(params.g[i]), 1, fp) != 1)
              exit(1);
          }
       +
          if (fread(&params.kappa, sizeof(params.kappa), 1, fp) != 1)
            exit(1);
          if (fread(&params.db, sizeof(params.db), 1, fp) != 1)
       t@@ -328,7 +329,7 @@ int main(int argc, char *argv[])
        
          // Read wall data
          for (j=0; j<params.nw; ++j) {
       -    // Wall condition mode: 0: devs, 1: vel
       +    // Wall condition mode: 0: fixed, 1: devs, 2: vel
            if (fread(&params.wmode[j], sizeof(params.wmode[j]), 1, fp) != 1)
              exit(1);
        
       t@@ -409,8 +410,10 @@ int main(int argc, char *argv[])
        
          cout << "  - Top BC: ";
          if (params.wmode[0] == 0)
       -    cout << "Deviatoric stress\n";
       +    cout << "Fixed\n";
          else if (params.wmode[0] == 1)
       +    cout << "Deviatoric stress\n";
       +  else if (params.wmode[0] == 2)
            cout << "Velocity\n";
          else {
            cerr << "Top boundary condition not recognized!\n";