tSeveral important bugfixes - sphere - GPU-based 3D discrete element method algorithm with optional fluid coupling
 (HTM) git clone git://src.adamsgaard.dk/sphere
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) LICENSE
       ---
 (DIR) commit 4261e0c247da8874c98b9047d674d8f708d4509d
 (DIR) parent 44c5a7bf26b6ee3834079f25c6ada71ac1f0eafc
 (HTM) Author: Anders Damsgaard Christensen <adc@geo.au.dk>
       Date:   Mon, 16 Apr 2012 14:52:51 +0200
       
       Several important bugfixes
       
       Diffstat:
         M mfiles/freadbin.m                   |       4 +++-
         M mfiles/fwritebin.m                  |       1 +
         M mfiles/initsetup.m                  |       3 ++-
         M python/sphere.py                    |     486 ++++++++++++++++++++++++-------
         D python/test.py                      |      15 ---------------
         M raytracer/main.cpp                  |     343 ++++++++++++++++++-------------
         M raytracer/rt_kernel.h               |       2 +-
         M src/Makefile                        |       2 +-
         M src/constants.cuh                   |       8 +++-----
         M src/datatypes.h                     |      12 ++++--------
         M src/device.cu                       |     374 +++++++++++++++++++------------
         M src/file_io.cpp                     |       6 ++----
         M src/main.cpp                        |      26 +++++++++-----------------
       
       13 files changed, 839 insertions(+), 443 deletions(-)
       ---
 (DIR) diff --git a/mfiles/freadbin.m b/mfiles/freadbin.m
       t@@ -32,7 +32,8 @@ function [p, grids, time, params, walls] = freadbin(path, fn)
            p.k_r     = zeros(p.np, 1);               % Rolling stiffness
            p.gamma_s = zeros(p.np, 1);        % Shear viscosity
            p.gamma_r = zeros(p.np, 1);               % Rolling viscosity
       -    p.mu_s    = zeros(p.np, 1);        % Inter-particle contact shear friction coefficient
       +    p.mu_s    = zeros(p.np, 1);        % Inter-particle contact static shear friction coefficient
       +    p.mu_d    = zeros(p.np, 1);        % Inter-particle contact dynamic shear friction coefficient
            p.mu_r    = zeros(p.np, 1);        % Inter-particle contact rolling friction coefficient
            p.C       = zeros(p.np, 1);        % Inter-particle cohesion
            p.E       = zeros(p.np, 1);        % Young's modulus
       t@@ -71,6 +72,7 @@ function [p, grids, time, params, walls] = freadbin(path, fn)
                p.gamma_s(j) = fread(fid, 1, 'float');
                p.gamma_r(j) = fread(fid, 1, 'float');
                p.mu_s(j)    = fread(fid, 1, 'float');
       +        p.mu_d(j)    = fread(fid, 1, 'float');
                p.mu_r(j)    = fread(fid, 1, 'float');
                p.C(j)       = fread(fid, 1, 'float');
                p.E(j)       = fread(fid, 1, 'float');
 (DIR) diff --git a/mfiles/fwritebin.m b/mfiles/fwritebin.m
       t@@ -51,6 +51,7 @@ function fwritebin(path, fn, p, grids, time, params, walls)
                fwrite(fid, p.gamma_s(j), 'float');
                fwrite(fid, p.gamma_r(j), 'float');
                fwrite(fid, p.mu_s(j), 'float');
       +        fwrite(fid, p.mu_d(j), 'float');
                fwrite(fid, p.mu_r(j), 'float');
                fwrite(fid, p.C(j), 'float');
                fwrite(fid, p.E(j), 'float');
 (DIR) diff --git a/mfiles/initsetup.m b/mfiles/initsetup.m
       t@@ -75,7 +75,8 @@ if visualize == 1
        end
        
        % Friction angles
       -ang_s = 25; % Angle of shear resistance
       +ang_s = 30; % Angle of static shear resistance
       +ang_d = 25; % Angle of dynamic shear resistance
        ang_r = 35; % Angle of rolling resistance
        
        % Other particle parameters
 (DIR) diff --git a/python/sphere.py b/python/sphere.py
       t@@ -1,7 +1,10 @@
        #!/usr/bin/env python2.7
        import math
        import numpy
       +import matplotlib as mpl
       +mpl.use('Agg')
        import matplotlib.pyplot as plt
       +from matplotlib.font_manager import FontProperties
        import subprocess
        
        numpy.seterr(all='warn', over='raise')
       t@@ -43,14 +46,12 @@ class Spherebin:
            self.k_n     = numpy.zeros(self.np, dtype=numpy.float32)
            self.k_s     = numpy.zeros(self.np, dtype=numpy.float32)
            self.k_r         = numpy.zeros(self.np, dtype=numpy.float32)
       +    self.gamma_n = numpy.zeros(self.np, dtype=numpy.float32)
            self.gamma_s = numpy.zeros(self.np, dtype=numpy.float32)
            self.gamma_r = numpy.zeros(self.np, dtype=numpy.float32)
            self.mu_s    = numpy.zeros(self.np, dtype=numpy.float32)
       +    self.mu_d    = numpy.zeros(self.np, dtype=numpy.float32)
            self.mu_r    = numpy.zeros(self.np, dtype=numpy.float32)
       -    self.C       = numpy.zeros(self.np, dtype=numpy.float32)
       -    self.E       = numpy.zeros(self.np, dtype=numpy.float32)
       -    self.K       = numpy.zeros(self.np, dtype=numpy.float32)
       -    self.nu      = numpy.zeros(self.np, dtype=numpy.float32)
            self.es_dot  = numpy.zeros(self.np, dtype=numpy.float32)
            self.es         = numpy.zeros(self.np, dtype=numpy.float32)
            self.p         = numpy.zeros(self.np, dtype=numpy.float32)
       t@@ -66,7 +67,7 @@ class Spherebin:
            self.shearmodel   = numpy.zeros(1, dtype=numpy.uint32)
        
            # Wall data
       -    self.nw          = numpy.ones(1, dtype=numpy.uint32)
       +    self.nw          = numpy.ones(1, dtype=numpy.uint32) * nw
            self.w_n     = numpy.zeros(self.nw*self.nd, dtype=numpy.float32).reshape(self.nw,self.nd)
            self.w_x     = numpy.zeros(self.nw, dtype=numpy.float32)
            self.w_m     = numpy.zeros(self.nw, dtype=numpy.float32)
       t@@ -112,14 +113,12 @@ class Spherebin:
              self.k_n     = numpy.zeros(self.np, dtype=numpy.float32)
              self.k_s     = numpy.zeros(self.np, dtype=numpy.float32)
              self.k_r           = numpy.zeros(self.np, dtype=numpy.float32)
       +      self.gamma_n = numpy.zeros(self.np, dtype=numpy.float32)
              self.gamma_s = numpy.zeros(self.np, dtype=numpy.float32)
              self.gamma_r = numpy.zeros(self.np, dtype=numpy.float32)
              self.mu_s    = numpy.zeros(self.np, dtype=numpy.float32)
       +      self.mu_d    = numpy.zeros(self.np, dtype=numpy.float32)
              self.mu_r    = numpy.zeros(self.np, dtype=numpy.float32)
       -      self.C       = numpy.zeros(self.np, dtype=numpy.float32)
       -      self.E       = numpy.zeros(self.np, dtype=numpy.float32)
       -      self.K       = numpy.zeros(self.np, dtype=numpy.float32)
       -      self.nu      = numpy.zeros(self.np, dtype=numpy.float32)
              self.es_dot  = numpy.zeros(self.np, dtype=numpy.float32)
              self.es           = numpy.zeros(self.np, dtype=numpy.float32)
              self.p           = numpy.zeros(self.np, dtype=numpy.float32)
       t@@ -148,14 +147,12 @@ class Spherebin:
                self.k_n[j]     = numpy.fromfile(fh, dtype=numpy.float32, count=1)
                self.k_s[j]     = numpy.fromfile(fh, dtype=numpy.float32, count=1)
                self.k_r[j]     = numpy.fromfile(fh, dtype=numpy.float32, count=1)
       +        self.gamma_n[j] = numpy.fromfile(fh, dtype=numpy.float32, count=1)
                self.gamma_s[j] = numpy.fromfile(fh, dtype=numpy.float32, count=1)
                self.gamma_r[j] = numpy.fromfile(fh, dtype=numpy.float32, count=1)
                self.mu_s[j]    = numpy.fromfile(fh, dtype=numpy.float32, count=1) 
       +        self.mu_d[j]    = numpy.fromfile(fh, dtype=numpy.float32, count=1) 
                self.mu_r[j]    = numpy.fromfile(fh, dtype=numpy.float32, count=1)
       -        self.C[j]       = numpy.fromfile(fh, dtype=numpy.float32, count=1)
       -        self.E[j]       = numpy.fromfile(fh, dtype=numpy.float32, count=1)
       -        self.K[j]       = numpy.fromfile(fh, dtype=numpy.float32, count=1)
       -        self.nu[j]      = numpy.fromfile(fh, dtype=numpy.float32, count=1)
                self.es_dot[j]  = numpy.fromfile(fh, dtype=numpy.float32, count=1)
                self.es[j]      = numpy.fromfile(fh, dtype=numpy.float32, count=1)
                self.p[j]       = numpy.fromfile(fh, dtype=numpy.float32, count=1)
       t@@ -170,6 +167,7 @@ class Spherebin:
        
              # Wall data
              self.nw            = numpy.fromfile(fh, dtype=numpy.uint32, count=1)
       +
              self.w_n     = numpy.zeros(self.nw*self.nd, dtype=numpy.float32).reshape(self.nw,self.nd)
              self.w_x     = numpy.zeros(self.nw, dtype=numpy.float32)
              self.w_m     = numpy.zeros(self.nw, dtype=numpy.float32)
       t@@ -245,14 +243,12 @@ class Spherebin:
                fh.write(self.k_n[j].astype(numpy.float32))
                fh.write(self.k_s[j].astype(numpy.float32))
                fh.write(self.k_r[j].astype(numpy.float32))
       +        fh.write(self.gamma_n[j].astype(numpy.float32))
                fh.write(self.gamma_s[j].astype(numpy.float32))
                fh.write(self.gamma_r[j].astype(numpy.float32))
                fh.write(self.mu_s[j].astype(numpy.float32))
       +        fh.write(self.mu_d[j].astype(numpy.float32))
                fh.write(self.mu_r[j].astype(numpy.float32))
       -        fh.write(self.C[j].astype(numpy.float32))
       -        fh.write(self.E[j].astype(numpy.float32))
       -        fh.write(self.K[j].astype(numpy.float32))
       -        fh.write(self.nu[j].astype(numpy.float32))
                fh.write(self.es_dot[j].astype(numpy.float32))
                fh.write(self.es[j].astype(numpy.float32))
                fh.write(self.p[j].astype(numpy.float32))
       t@@ -310,22 +306,26 @@ class Spherebin:
        
            # Show radii as histogram
            if histogram == 1:
       +      fig = plt.figure(figsize=(15,10), dpi=300)
       +      figtitle = 'Particle size distribution, {0} particles'.format(self.np[0])
       +      fig.text(0.5,0.95,figtitle,horizontalalignment='center',fontproperties=FontProperties(size=18))
              bins = 20
              # Create histogram
              plt.hist(self.radius, bins)
              # Plot
       -      plt.title('Particle size distribution, {0} particles'.format(self.np))
              plt.xlabel('Radii [m]')
              plt.ylabel('Count')
              plt.axis('tight')
       -      plt.show()
       +      fig.savefig('psd.png')
       +      fig.clf()
         
        
       -  # Initialize particle positions to non-overlapping configuration
       -  def initsetup(self, g = numpy.array([0.0, 0.0, -9.80665]), 
       -                            gridnum = numpy.array([12, 12, 36]),
       -                      periodic = 1,
       -                      shearmodel = 1):
       +  # Initialize particle positions to completely random, non-overlapping configuration.
       +  # This method is very compute intensive at high particle numbers.
       +  def initRandomPos(self, g = numpy.array([0.0, 0.0, -9.80665]), 
       +                                gridnum = numpy.array([12, 12, 36]),
       +                          periodic = 1,
       +                          shearmodel = 2):
            """ Initialize particle positions in loose, cubic configuration.
                Radii must be set beforehand.
                xynum is the number of rows in both x- and y- directions.
       t@@ -358,7 +358,7 @@ class Spherebin:
                  delta = self.x[i] - self.x[j]
                  delta_len = math.sqrt(numpy.dot(delta,delta)) \
                              - (self.radius[i] + self.radius[j])
       -          if (delta_len < 0):
       +          if (delta_len < 0.0):
                    overlaps = True
              print "\rFinding non-overlapping particle positions, {0} % complete".format(numpy.ceil(i/self.np[0]*100)),
           
       t@@ -372,9 +372,222 @@ class Spherebin:
            self.w_vel[0] = 0.0
            self.w_force[0] = 0.0
            self.w_devs[0] = 0.0
       -    self.nw[0] = numpy.ones(1, dtype=numpy.uint32) * 1
       +    #self.nw[0] = numpy.ones(1, dtype=numpy.uint32) * 1
       +    self.nw = numpy.ones(1, dtype=numpy.uint32) * 1
       +
       +  # Initialize particle positions to regular, grid-like, non-overlapping configuration
       +  def initGridPos(self, g = numpy.array([0.0, 0.0, -9.80665]), 
       +                              gridnum = numpy.array([12, 12, 36]),
       +                        periodic = 1,
       +                        shearmodel = 2):
       +    """ Initialize particle positions in loose, cubic configuration.
       +        Radii must be set beforehand.
       +        xynum is the number of rows in both x- and y- directions.
       +    """
       +    self.g = g
       +    self.periodic[0] = periodic
       +
       +    # Calculate cells in grid
       +    self.num = gridnum
       +
       +    # World size
       +    r_max = numpy.amax(self.radius)
       +    cellsize = 2.1 * r_max
       +    self.L = self.num * cellsize
       +
       +    # Check whether there are enough grid cells 
       +    if ((self.num[0]*self.num[1]*self.num[2]-(2**3)) < self.np):
       +      print "Error! The grid is not sufficiently large."
       +      raise NameError('Error! The grid is not sufficiently large.')
       +
       +    gridpos = numpy.zeros(self.nd, dtype=numpy.uint32)
       +
       +    # Make sure grid is sufficiently large if every second level is moved
       +    if (self.periodic[0] == 1):
       +      self.num[0] -= 1
       +      self.num[1] -= 1
       +      
       +      # Check whether there are enough grid cells 
       +      if ((self.num[0]*self.num[1]*self.num[2]-(2*3*3)) < self.np):
       +        print "Error! The grid is not sufficiently large."
       +        raise NameError('Error! The grid is not sufficiently large.')
       +
       +
       +    # Particle positions randomly distributed without overlap
       +    for i in range(self.np):
       +
       +      # Find position in 3d mesh from linear index
       +      gridpos[0] = (i % (self.num[0]))
       +      gridpos[1] = numpy.floor(i/(self.num[0])) % (self.num[0])
       +      gridpos[2] = numpy.floor(i/((self.num[0])*(self.num[1]))) #\
       +                   #% ((self.num[0])*(self.num[1]))
       +        
       +      for d in range(self.nd):
       +        self.x[i,d] = gridpos[d] * cellsize + 0.5*cellsize
       +
       +      if (self.periodic[0] == 1): # Allow pushing every 2.nd level out of lateral boundaries
       +        # Offset every second level
       +        if (gridpos[2] % 2):
       +          self.x[i,0] += 0.5*cellsize
       +          self.x[i,1] += 0.5*cellsize
       +
       +    self.shearmodel[0] = shearmodel
       +
       +    # Readjust grid to correct size
       +    if (self.periodic[0] == 1):
       +      self.num[0] += 1
       +      self.num[1] += 1
       +
       +    # Initialize upper wall
       +    self.w_n[0,2] = -1.0
       +    self.w_x[0] = self.L[2]
       +    self.w_m[0] = self.rho[0] * self.np * math.pi * r_max**3
       +    self.w_vel[0] = 0.0
       +    self.w_force[0] = 0.0
       +    self.w_devs[0] = 0.0
       +    self.nw = numpy.ones(1, dtype=numpy.uint32) * 1
       +
       +
       +  # Initialize particle positions to non-overlapping configuration
       +  # in grid, with a certain element of randomness
       +  def initRandomGridPos(self, g = numpy.array([0.0, 0.0, -9.80665]), 
       +                                    gridnum = numpy.array([12, 12, 32]),
       +                              periodic = 1,
       +                              shearmodel = 2):
       +    """ Initialize particle positions in loose, cubic configuration.
       +        Radii must be set beforehand.
       +        xynum is the number of rows in both x- and y- directions.
       +    """
       +    self.g = g
       +    self.periodic[0] = periodic
       +
       +    # Calculate cells in grid
       +    coarsegrid = numpy.floor(gridnum/2) 
       +
       +    # World size 
       +    r_max = numpy.amax(self.radius)
       +    cellsize = 2.1 * r_max * 2 # Cells in grid 2*size to make space for random offset
       +
       +    # Check whether there are enough grid cells 
       +    if (((coarsegrid[0]-1)*(coarsegrid[1]-1)*(coarsegrid[2]-1)) < self.np):
       +      print "Error! The grid is not sufficiently large."
       +      raise NameError('Error! The grid is not sufficiently large.')
       +
       +    gridpos = numpy.zeros(self.nd, dtype=numpy.uint32)
       +
       +    # Particle positions randomly distributed without overlap
       +    for i in range(self.np):
        
       +      # Find position in 3d mesh from linear index
       +      gridpos[0] = (i % (coarsegrid[0]))
       +      gridpos[1] = numpy.floor(i/(coarsegrid[0])) % (coarsegrid[0])
       +      gridpos[2] = numpy.floor(i/((coarsegrid[0])*(coarsegrid[1])))
       +        
       +      # Place particles in grid structure, and randomly adjust the positions
       +      # within the oversized cells (uniform distribution)
       +      for d in range(self.nd):
       +        r = self.radius[i]*1.05
       +        self.x[i,d] = gridpos[d] * cellsize \
       +                      + ((cellsize-r) - r) * numpy.random.random_sample() + r
        
       +    self.shearmodel[0] = shearmodel
       +
       +    # Calculate new grid with cell size equal to max. particle diameter
       +    x_max = numpy.max(self.x[:,0] + self.radius)
       +    y_max = numpy.max(self.x[:,1] + self.radius)
       +    z_max = numpy.max(self.x[:,2] + self.radius)
       +    cellsize = 2.1 * r_max
       +    # Adjust size of world
       +    self.num[0] = numpy.ceil(x_max/cellsize)
       +    self.num[1] = numpy.ceil(y_max/cellsize)
       +    self.num[2] = numpy.ceil(z_max/cellsize)
       +    self.L = self.num * cellsize
       +
       +    # Initialize upper wall
       +    self.w_n[0,2] = -1.0
       +    self.w_x[0] = self.L[2]
       +    self.w_m[0] = self.rho[0] * self.np * math.pi * r_max**3
       +    self.w_vel[0] = 0.0
       +    self.w_force[0] = 0.0
       +    self.w_devs[0] = 0.0
       +    self.nw = numpy.ones(1, dtype=numpy.uint32) * 1
       +
       +  # Adjust grid and upper wall for consolidation
       +  def consolidate(self, deviatoric_stress = 10e3, 
       +                              periodic = 1):
       +    """ Setup consolidation experiment. Specify the upper wall 
       +        deviatoric stress in Pascal, default value is 10 kPa.
       +    """
       +
       +    # Compute new grid, scaled to fit max. and min. particle positions
       +    z_min = numpy.min(self.x[:,2] - self.radius)
       +    z_max = numpy.max(self.x[:,2] + self.radius)
       +    cellsize = self.L[0] / self.num[0]
       +    z_adjust = 1.1        # Overheightening of grid. 1.0 = no overheightening
       +    self.num[2] = numpy.ceil((z_max-z_min)*z_adjust/cellsize)
       +    self.L[2] = (z_max-z_min)*z_adjust
       +
       +    # Initialize upper wall
       +    self.w_n[0,2] = -1.0
       +    self.w_x[0] = self.L[2]
       +    self.w_m[0] = self.rho[0] * self.np * math.pi * (cellsize/2.0)**3
       +    self.w_vel[0] = 0.0
       +    self.w_force[0] = 0.0
       +    self.w_devs[0] = deviatoric_stress
       +    self.nw = numpy.ones(1, dtype=numpy.uint32) * 1
       +
       +
       +  # Adjust grid and upper wall for shear, and fix boundary particle velocities
       +  def shear(self, deviatoric_stress = 10e3, 
       +                        shear_strain_rate = 1,
       +                        periodic = 1):
       +    """ Setup shear experiment. Specify the upper wall 
       +        deviatoric stress in Pascal, default value is 10 kPa.
       +        The shear strain rate is the shear length divided by the
       +        initial height per second.
       +    """
       +
       +    # Compute new grid, scaled to fit max. and min. particle positions
       +    z_min = numpy.min(self.x[:,2] - self.radius)
       +    z_max = numpy.max(self.x[:,2] + self.radius)
       +    cellsize = self.L[0] / self.num[0]
       +    z_adjust = 1.3        # Overheightening of grid. 1.0 = no overheightening
       +    self.num[2] = numpy.ceil((z_max-z_min)*z_adjust/cellsize)
       +    self.L[2] = (z_max-z_min)*z_adjust
       +
       +    # Initialize upper wall
       +    self.w_devs[0] = deviatoric_stress
       +
       +    # Zero kinematics
       +    self.vel     = numpy.zeros(self.np*self.nd, dtype=numpy.float32).reshape(self.np,self.nd)
       +    self.angvel  = numpy.zeros(self.np*self.nd, dtype=numpy.float32).reshape(self.np,self.nd)
       +
       +    #fixheight = 2*cellsize
       +    fixheight = cellsize
       + 
       +    # Fix horizontal velocity to 0.0 of lowermost particles
       +    I = numpy.nonzero(self.x[:,2] < (z_min + fixheight)) # Find indices of lowermost 10%
       +    self.fixvel[I] = 1;
       +    self.angvel[I,0] = 0.0;
       +    self.angvel[I,1] = 0.0;
       +    self.angvel[I,2] = 0.0;
       +    self.vel[I,0] = 0.0; # x-dim
       +    self.vel[I,1] = 0.0; # y-dim
       +
       +    # Fix horizontal velocity to specific value of uppermost particles
       +    I = numpy.nonzero(self.x[:,2] > (z_max - fixheight)) # Find indices of lowermost 10%
       +    self.fixvel[I] = 1;
       +    self.angvel[I,0] = 0.0;
       +    self.angvel[I,1] = 0.0;
       +    self.angvel[I,2] = 0.0;
       +    self.vel[I,0] = (z_max-z_min)*shear_strain_rate
       +    self.vel[I,1] = 0.0; # y-dim
       +
       +    # Zero x-axis displacement
       +    self.xsum = numpy.zeros(self.np, dtype=numpy.float32)
       +
       +
       + 
          def initTemporal(self, total,
                                       current = 0.0,
                                 file_dt = 0.01,
       t@@ -395,14 +608,16 @@ class Spherebin:
            self.time_file_dt[0] = file_dt
            self.time_step_count[0] = 0
        
       -  def defaultparams(self, ang_s = 25,
       -                                ang_r = 35,
       +  def defaultParams(self, ang_s = 20,
       +                                ang_d = 15,
       +                                ang_r = 0,
                                  rho = 3600,
       -                          k_n = 4e5,
       -                          k_s = 4e5,
       +                          k_n = 1e9,
       +                          k_s = 6.6e8,
                                  k_r = 4e6,
       -                          gamma_s = 4e2,
       -                          gamma_r = 4e2,
       +                          gamma_n = 1e3,
       +                          gamma_s = 1e3,
       +                          gamma_r = 2e3,
                                  capillaryCohesion = 0):
            """ Initialize particle parameters to default values.
                Radii must be set prior to calling this function.
       t@@ -422,26 +637,28 @@ class Spherebin:
            # Contact rolling elastic stiffness (for shearmodel = 2), N/m
            self.k_r = numpy.ones(self.np, dtype=numpy.float32) * k_r
        
       -    # Contact shear viscosity (for shearmodel = 1), Ns/m
       +    # Contact normal viscosity. Critical damping: 2*sqrt(m*k_n).
       +    # Normal force component elastic if nu = 0.0.
       +    #self.gamma_n = numpy.ones(self.np, dtype=numpy.float32) \
       +    #              * nu_frac * 2.0 * math.sqrt(4.0/3.0 * math.pi * numpy.amin(self.radius)**3 \
       +    #              * self.rho[0] * self.k_n[0])
       +    self.gamma_n = numpy.ones(self.np, dtype=numpy.float32) * gamma_n
       +                      
       +    # Contact shear viscosity, Ns/m
            self.gamma_s = numpy.ones(self.np, dtype=numpy.float32) * gamma_s
        
       -    # Contact rolling visscosity (for shearmodel = 1), Ns/m?
       +    # Contact rolling viscosity, Ns/m?
            self.gamma_r = numpy.ones(self.np, dtype=numpy.float32) * gamma_r
        
       -    # Contact shear friction coefficient
       +    # Contact static shear friction coefficient
            self.mu_s = numpy.ones(self.np, dtype=numpy.float32) * numpy.tan(numpy.radians(ang_s))
        
       +    # Contact dynamic shear friction coefficient
       +    self.mu_d = numpy.ones(self.np, dtype=numpy.float32) * numpy.tan(numpy.radians(ang_d))
       +
            # Contact rolling friction coefficient
            self.mu_r = numpy.ones(self.np, dtype=numpy.float32) * numpy.tan(numpy.radians(ang_r))
        
       -    r_min = numpy.amin(self.radius)
       -    
       -    # Poisson's ratio. Critical damping: 2*sqrt(m*k_n).
       -    # Normal force component elastic if nu = 0.0.
       -    self.nu = numpy.ones(self.np, dtype=numpy.float32) \
       -              * 0.1 * 2.0 * math.sqrt(4.0/3.0 * math.pi * r_min**3 \
       -              * self.rho[0] * self.k_n[0])
       -    
            # Global parameters
            # if 1 = all particles have the same values for the physical parameters
            self.globalparams[0] = 1
       t@@ -522,6 +739,12 @@ def visualize(project, method = 'energy', savefig = False, outformat = 'png'):
        
          lastfile = status(project)
        
       +  ### Plotting
       +  fig = plt.figure(figsize=(15,10),dpi=300)
       +  figtitle = "{0}, simulation {1}".format(method, project)
       +  fig.text(0.5,0.95,figtitle,horizontalalignment='center',fontproperties=FontProperties(size=18))
       +
       +
          if method == 'energy':
        
            # Allocate arrays
       t@@ -547,38 +770,50 @@ def visualize(project, method = 'energy', savefig = False, outformat = 'png'):
            
            t = numpy.linspace(0.0, sb.time_current, lastfile+1)
        
       -    # Plotting
       -    plt.subplot(2,3,1)
       -    plt.xlabel('Time [s]')
       -    plt.ylabel('Total potential energy [J]')
       -    plt.plot(t, Epot, '+-')
       -
       -    plt.subplot(2,3,2)
       -    plt.xlabel('Time [s]')
       -    plt.ylabel('Total kinetic energy [J]')
       -    plt.plot(t, Ekin, '+-')
       -
       -    plt.subplot(2,3,3)
       -    plt.xlabel('Time [s]')
       -    plt.ylabel('Total rotational energy [J]')
       -    plt.plot(t, Erot, '+-')
       -
       -    plt.subplot(2,3,4)
       -    plt.xlabel('Time [s]')
       -    plt.ylabel('Shear energy rate [W]')
       -    plt.plot(t, Es_dot, '+-')
       -
       -    plt.subplot(2,3,5)
       -    plt.xlabel('Time [s]')
       -    plt.ylabel('Total shear energy [J]')
       -    plt.plot(t, Es, '+-')
       -
       -    plt.subplot(2,3,6)
       -    plt.xlabel('Time [s]')
       -    plt.ylabel('Total energy [J]')
       -    plt.plot(t, Esum, '+-')
       -
       -    #plt.show()
       +    # Potential energy
       +    ax1 = plt.subplot2grid((2,3),(0,0))
       +    ax1.set_xlabel('Time [s]')
       +    ax1.set_ylabel('Total potential energy [J]')
       +    ax1.plot(t, Epot, '+-')
       +
       +    # Kinetic energy
       +    ax2 = plt.subplot2grid((2,3),(0,1))
       +    ax2.set_xlabel('Time [s]')
       +    ax2.set_ylabel('Total kinetic energy [J]')
       +    ax2.plot(t, Ekin, '+-')
       +
       +    # Rotational energy
       +    ax3 = plt.subplot2grid((2,3),(0,2))
       +    ax3.set_xlabel('Time [s]')
       +    ax3.set_ylabel('Total rotational energy [J]')
       +    ax3.plot(t, Erot, '+-')
       +
       +    # Shear energy rate
       +    ax4 = plt.subplot2grid((2,3),(1,0))
       +    ax4.set_xlabel('Time [s]')
       +    ax4.set_ylabel('Shear energy rate [W]')
       +    ax4.plot(t, Es_dot, '+-')
       +    
       +    # Shear energy
       +    ax5 = plt.subplot2grid((2,3),(1,1))
       +    ax5.set_xlabel('Time [s]')
       +    ax5.set_ylabel('Total shear energy [J]')
       +    ax5.plot(t, Es, '+-')
       +
       +    # Total energy
       +    #ax6 = plt.subplot2grid((2,3),(1,2))
       +    #ax6.set_xlabel('Time [s]')
       +    #ax6.set_ylabel('Total energy [J]')
       +    #ax6.plot(t, Esum, '+-')
       +
       +    # Combined view
       +    ax6 = plt.subplot2grid((2,3),(1,2))
       +    ax6.set_xlabel('Time [s]')
       +    ax6.set_ylabel('Energy [J]')
       +    ax6.plot(t, Epot, '+-g')
       +    ax6.plot(t, Ekin, '+-b')
       +    ax6.plot(t, Erot, '+-r')
       +    ax6.legend(('$\sum E_{pot}$','$\sum E_{kin}$','$\sum E_{rot}$'), 'upper right', shadow=True)
        
          elif method == 'walls':
        
       t@@ -590,43 +825,92 @@ def visualize(project, method = 'energy', savefig = False, outformat = 'png'):
        
              # Allocate arrays on first run
              if (i == 0):
       -        wforce = numpy.zeros(lastfile+1, sb.nw[0])
       -        wvel   = numpy.zeros(lastfile+1, sb.nw[0])
       -        wpos   = numpy.zeros(lastfile+1, sb.nw[0])
       -        wdevs  = numpy.zeros(lastfile+1, sb.nw[0])
       -
       -      wforce[i] = sb.w_force
       -      wvel[i]   = sb.w_vel
       -      wpos[i]   = sb.w_x
       -      wdevs[i]  = sb.w_devs
       +        wforce = numpy.zeros((lastfile+1)*sb.nw[0], dtype=numpy.float32).reshape((lastfile+1), sb.nw[0])
       +        wvel   = numpy.zeros((lastfile+1)*sb.nw[0], dtype=numpy.float32).reshape((lastfile+1), sb.nw[0])
       +        wpos   = numpy.zeros((lastfile+1)*sb.nw[0], dtype=numpy.float32).reshape((lastfile+1), sb.nw[0])
       +        wdevs  = numpy.zeros((lastfile+1)*sb.nw[0], dtype=numpy.float32).reshape((lastfile+1), sb.nw[0])
       +
       +      wforce[i] = sb.w_force[0]
       +      wvel[i]   = sb.w_vel[0]
       +      wpos[i]   = sb.w_x[0]
       +      wdevs[i]  = sb.w_devs[0]
            
            t = numpy.linspace(0.0, sb.time_current, lastfile+1)
        
            # Plotting
       -    plt.subplot(2,2,1)
       -    plt.xlabel('Time [s]')
       -    plt.ylabel('Position [m]')
       -    plt.plot(t, wpos, '+-')
       +    ax1 = plt.subplot2grid((2,2),(0,0))
       +    ax1.set_xlabel('Time [s]')
       +    ax1.set_ylabel('Position [m]')
       +    ax1.plot(t, wpos, '+-')
       +
       +    ax2 = plt.subplot2grid((2,2),(0,1))
       +    ax2.set_xlabel('Time [s]')
       +    ax2.set_ylabel('Velocity [m/s]')
       +    ax2.plot(t, wvel, '+-')
        
       -    plt.subplot(2,2,2)
       -    plt.xlabel('Time [s]')
       -    plt.ylabel('Velocity [m/s]')
       -    plt.plot(t, wvel, '+-')
       +    ax3 = plt.subplot2grid((2,2),(1,0))
       +    ax3.set_xlabel('Time [s]')
       +    ax3.set_ylabel('Force [N]')
       +    ax3.plot(t, wforce, '+-')
        
       -    plt.subplot(2,2,3)
       -    plt.xlabel('Time [s]')
       -    plt.ylabel('Force [N]')
       -    plt.plot(t, wforce, '+-')
       +    ax4 = plt.subplot2grid((2,2),(1,1))
       +    ax4.set_xlabel('Time [s]')
       +    ax4.set_ylabel('Deviatoric stress [Pa]')
       +    ax4.plot(t, wdevs, '+-')
        
       -    plt.subplot(2,2,4)
       -    plt.xlabel('Time [s]')
       -    plt.ylabel('Deviatoric stress [Pa]')
       -    plt.plot(t, wdevs, '+-')
       +
       +  elif method == 'shear':
       +
       +    sb = Spherebin()
       +    # Read stress values from project binaries
       +    for i in range(lastfile+1):
       +
       +      fn = "../output/{0}.output{1}.bin".format(project, i)
       +      sb.readbin(fn, verbose = False)
       +
       +      # First iteration: Allocate arrays and find constant values
       +      if (i == 0):
       +        xdisp    = numpy.zeros(lastfile+1, dtype=numpy.float32)  # Shear displacement
       +        sigma    = numpy.zeros(lastfile+1, dtype=numpy.float32)  # Normal stress
       +        tau      = numpy.zeros(lastfile+1, dtype=numpy.float32)  # Shear stress
       +        dilation = numpy.zeros(lastfile+1, dtype=numpy.float32)  # Upper wall position
       +
       +        fixvel = numpy.nonzero(sb.fixvel > 0.0)
       +        #fixvel_upper = numpy.nonzero(sb.vel[fixvel,0] > 0.0)
       +        shearvel = sb.vel[fixvel,0].max()
       +        w_x0 = sb.w_x[0]
       +        A = sb.L[0] * sb.L[1]   # Upper surface area
       +
       +      # Summation of shear stress contributions
       +      for j in fixvel[0]:
       +        if (sb.vel[j,0] > 0.0):
       +          tau[i] += -sb.force[j,0]
       +
       +      xdisp[i]    = sb.time_current[0] * shearvel
       +      sigma[i]    = sb.w_force[0] / A
       +      sigma[i]    = sb.w_devs[0]
       +      #tau[i]      = sb.force[fixvel_upper,0].sum() / A
       +      dilation[i] = sb.w_x[0] - w_x0
       +
       +    # Plot stresses
       +    ax1 = plt.subplot2grid((2,1),(0,0))
       +    ax1.set_xlabel('Shear distance [m]')
       +    ax1.set_ylabel('Stress [Pa]')
       +    ax1.plot(xdisp, sigma, '+-g')
       +    ax1.plot(xdisp, tau, '+-r')
       +    #plt.legend('$\sigma`$','$\tau$')
       +
       +    # Plot dilation
       +    ax2 = plt.subplot2grid((2,1),(1,0))
       +    ax2.set_xlabel('Shear distance [m]')
       +    ax2.set_ylabel('Dilation [m]')
       +    ax2.plot(xdisp, dilation, '+-')
        
        
          # Optional save of figure
          if (savefig == True):
       -    plt.savefig("{0}-{1}.{2}".format(project, method, outformat))
       +    fig.savefig("{0}-{1}.{2}".format(project, method, outformat))
       +    fig.clf()
          else:
            plt.show()
        
 (DIR) diff --git a/python/test.py b/python/test.py
       t@@ -1,15 +0,0 @@
       -#!/usr/bin/env python
       -
       -# Import sphere functionality
       -from sphere import *
       -
       -#render("../input/1e3-init-pyout.bin")
       -
       -# New class
       -init = Spherebin(np = 1e3, nd = 3)
       -init.generateRadii(psd = 'uni', histogram = 0)
       -init.defaultparams()
       -init.initsetup()
       -init.initTemporal(total = 1.5)
       -init.writebin("../input/1e3-pyinit.bin")
       -render("../input/1e3-pyinit.bin", out = "~/Desktop/init")
 (DIR) diff --git a/raytracer/main.cpp b/raytracer/main.cpp
       t@@ -15,8 +15,8 @@ int main(const int argc, const char* argv[])
          if (argc < 6 || argc > 8 ){
            cout << "Usage: " << argv[0] << " <GPU or CPU> <sphere-binary.bin> <width> <height> <output-image.ppm>\n"
                 << "or\n"
       -         << "Usage: " << argv[0] << " GPU <sphere-binary.bin> <width> <height> <output-image.ppm>"
       -         << "<pressure | pressure50 | es | es_dot> <max value in color range>\n";
       +         << "Usage: " << argv[0] << " GPU <sphere-binary.bin | colorbar> <width> <height> <output-image.ppm>"
       +         << " <pressure | pressure50 | es | es_dot> <max value in color range>\n";
            return 1;
          }
        
       t@@ -24,113 +24,6 @@ int main(const int argc, const char* argv[])
               << "This is the SPHERE raytracer\n"
               << "----------------------------\n";
        
       -
       -  cout << "Reading binary: " << argv[2] << "\n";
       -
       -  FILE* fin;
       -  if ((fin = fopen(argv[2], "rb")) == NULL) {
       -    cout << "  Error encountered while trying to open binary '"
       -         << argv[2] << "'\n";
       -    return 1;
       -  }
       -
       -  // Read the number of dimensions and particles
       -  unsigned int nd, np;
       -  (void)fread(&nd, sizeof(nd), 1, fin);
       -  if (nd != 3) {
       -    cout << "  Input binary contains " << nd
       -         << "D data, this raytracer is made for 3D data\n";
       -    return 1;
       -  }
       -  
       -  (void)fread(&np, sizeof(np), 1, fin);
       -  cout << "  Number of particles: " << np << "\n";
       -
       -  // Allocate particle structure array
       -  cout << "  Allocating host memory\n";
       -  float4* p      = new float4[np];
       -  float*  es_dot = new float[np];
       -  float*  es     = new float[np];
       -  float*  pres   = new float[np];
       -  for (unsigned int i=0; i<np; i++) {
       -    pres[i]   = 0.0f; // Initialize values to zero
       -    es_dot[i] = 0.0f;
       -  }
       -
       -  // Read temporal information
       -  float dt, file_dt;
       -  double current, total;
       -  unsigned int step_count;
       -  (void)fread(&dt, sizeof(dt), 1, fin);
       -  (void)fread(&current, sizeof(current), 1, fin);
       -  (void)fread(&total, sizeof(total), 1, fin);
       -  (void)fread(&file_dt, sizeof(file_dt), 1, fin);
       -  (void)fread(&step_count, sizeof(step_count), 1, fin);
       -
       -  // Canonical coordinate system origo
       -  f3 origo;
       -  (void)fread(&origo, sizeof(float)*3, 1, fin);
       -  
       -  // Canonical coordinate system dimensions
       -  f3 L;
       -  (void)fread(&L, sizeof(float)*3, 1, fin);
       -
       -  // Skip over irrelevant data
       -  float blankf;
       -  //f3 blankf3;
       -  unsigned int blankui;
       -  for (int j=0; j<3; j++)         // Skip over grid.num data
       -    (void)fread(&blankui, sizeof(blankui), 1, fin);
       -
       -
       -  // Load data into particle array
       -  for (unsigned int i=0; i<np; i++) {
       -    (void)fread(&p[i].x, sizeof(float), 1, fin);
       -    for (int j=0; j<4; j++)
       -      (void)fread(&blankf, sizeof(blankf), 1, fin);
       -
       -    (void)fread(&p[i].y, sizeof(float), 1, fin);
       -    for (int j=0; j<4; j++)
       -      (void)fread(&blankf, sizeof(blankf), 1, fin);
       -    
       -    (void)fread(&p[i].z, sizeof(float), 1, fin);
       -    for (int j=0; j<4; j++)
       -      (void)fread(&blankf, sizeof(blankf), 1, fin);
       -  }
       -  for (unsigned int i=0; i<np; i++) {
       -    (void)fread(&blankf, sizeof(blankf), 1, fin); // fixvel
       -    (void)fread(&blankf, sizeof(blankf), 1, fin); // xsum
       -    (void)fread(&p[i].w, sizeof(float), 1, fin);
       -    for (int j=0; j<12; j++)
       -      (void)fread(&blankf, sizeof(blankf), 1, fin);
       -    (void)fread(&es_dot[i], sizeof(float), 1, fin);
       -    (void)fread(&es[i], sizeof(float), 1, fin);
       -    (void)fread(&pres[i], sizeof(float), 1, fin);
       -  }
       -
       -  fclose(fin);        // Binary read complete
       -
       -  cout << "  Spatial dimensions: " 
       -       << L.x << "*" << L.y << "*" << L.z << " m\n";
       - 
       -  // Eye position and focus point
       -  f3 eye    = {2.5f*L.x, -4.2*L.y, 2.0f*L.z};
       -  f3 lookat = {0.45f*L.x, 0.5f*L.y, 0.4f*L.z};
       -  if (L.z > (L.x + L.y)/1.5f) { // Render only bottom of world (for initsetup's)
       -    eye.x = 2.5f*L.x;
       -    eye.y = -6.2*L.y;
       -    eye.z = 0.12*L.z;
       -    lookat.x = 0.45f*L.x;
       -    lookat.y = 0.5f*L.y;
       -    lookat.z = 0.012f*L.z;
       -  }
       -
       -  // Screen width in world coordinates
       -  //float imgw = 1.4f*L.x;
       -  //float imgw = pow(L.x*L.y*L.z, 0.32f);
       -  //float imgw = sqrt(L.x*L.y)*1.2f; // Adjust last float to capture entire height
       -  float imgw = L.x*1.35f;
       -
          // Allocate memory for image
          unsigned int width = atoi(argv[3]);
          unsigned int height = atoi(argv[4]);
       t@@ -141,52 +34,212 @@ int main(const int argc, const char* argv[])
          rgb* img;
          img = new rgb [height*width];
        
       -  // Determine visualization mode
       -  int visualize = 0; // 0: ordinary view
       -  float max_val = 0;
       -  if (argc == 8) {
       -    if(strcmp(argv[6],"pressure") == 0)
       -      visualize = 1; // 1: pressure view
       -    if(strcmp(argv[6],"es_dot") == 0)
       -      visualize = 2; // 2: es_dot view
       -    if(strcmp(argv[6],"es") == 0)
       -      visualize = 3; // 3: es view
       -    if(strcmp(argv[6],"pressure50") == 0)
       -      visualize = 4; // 4: pressure50 view
       - 
       -    // Read max. value specified in command args.
       -    max_val = atof(argv[7]);
       -  }
       +  // Render colorbar image
       +  if (strcmp(argv[2],"colorbar") == 0) {
       +
       +    for (unsigned int y=0; y<height; y++) {
       +      for (unsigned int x=0; x<width; x++) {
       +
       +        // Colormap value is relative to width position
       +        float ratio = (float) (x+1)/width;
       +
       +        // Determine Blue-White-Red color components
       +        float red   = fmin(1.0f, 0.209f*ratio*ratio*ratio - 2.49f*ratio*ratio + 3.0f*ratio + 0.0109f);
       +        float green = fmin(1.0f, -2.44f*ratio*ratio + 2.15f*ratio + 0.369f);
       +        float blue  = fmin(1.0f, -2.21f*ratio*ratio + 1.61f*ratio + 0.573f);
       +
       +        // Write pixel value to image array
       +        //img[x + y*width].r = red * 250.f;
       +        img[x + y*width].r = red * 250.f;
       +        img[x + y*width].g = green * 250.f;
       +        img[x + y*width].b = blue * 250.f;
       +
       +      }
       +    }
       +  } else { // Render sphere binary
       +
       +    cout << "Reading binary: " << argv[2] << "\n";
       +
       +    FILE* fin;
       +    if ((fin = fopen(argv[2], "rb")) == NULL) {
       +      cout << "  Error encountered while trying to open binary '"
       +        << argv[2] << "'\n";
       +      return 1;
       +    }
        
       -  if (strcmp(argv[1],"GPU") == 0) {
       -    
       -    // Call cuda wrapper
       -    if (rt(p, np, img, width, height, origo, L, eye, lookat, imgw, visualize, max_val, pres, es_dot, es) != 0) {
       -      cout << "\nError in rt()\n";
       +    // Read the number of dimensions and particles
       +    unsigned int nd, np;
       +    (void)fread(&nd, sizeof(nd), 1, fin);
       +    if (nd != 3) {
       +      cout << "  Input binary contains " << nd
       +        << "D data, this raytracer is made for 3D data\n";
              return 1;
            }
       -  } else if (strcmp(argv[1],"CPU") == 0) {
       -    
       -    // Call CPU wrapper
       -    if (rt_cpu(p, np, img, width, height, origo, L, eye, lookat, imgw) != 0) {
       -      cout << "\nError in rt_cpu()\n";
       +
       +    (void)fread(&np, sizeof(np), 1, fin);
       +    cout << "  Number of particles: " << np << "\n";
       +
       +    // Allocate particle structure array
       +    cout << "  Allocating host memory\n";
       +    float4* p      = new float4[np];
       +    float*  fixvel = new float[np];
       +    float*  es_dot = new float[np];
       +    float*  es     = new float[np];
       +    float*  pres   = new float[np];
       +    for (unsigned int i=0; i<np; i++) {
       +      fixvel[i] = 0.0f;
       +      pres[i]   = 0.0f; // Initialize values to zero
       +      es_dot[i] = 0.0f;
       +      es[i]     = 0.0f;
       +    }
       +
       +    // Read temporal information
       +    float dt, file_dt;
       +    double current, total;
       +    unsigned int step_count;
       +    (void)fread(&dt, sizeof(dt), 1, fin);
       +    (void)fread(&current, sizeof(current), 1, fin);
       +    (void)fread(&total, sizeof(total), 1, fin);
       +    (void)fread(&file_dt, sizeof(file_dt), 1, fin);
       +    (void)fread(&step_count, sizeof(step_count), 1, fin);
       +
       +    // Canonical coordinate system origo
       +    f3 origo;
       +    (void)fread(&origo, sizeof(float)*3, 1, fin);
       +
       +    // Canonical coordinate system dimensions
       +    f3 L;
       +    (void)fread(&L, sizeof(float)*3, 1, fin);
       +
       +    // Skip over irrelevant data
       +    float blankf;
       +    //f3 blankf3;
       +    unsigned int blankui;
       +    for (int j=0; j<3; j++)         // Skip over grid.num data
       +      (void)fread(&blankui, sizeof(blankui), 1, fin);
       +
       +
       +    // Load data into particle array
       +    for (unsigned int i=0; i<np; i++) {
       +      (void)fread(&p[i].x, sizeof(float), 1, fin);
       +      for (int j=0; j<4; j++)
       +        (void)fread(&blankf, sizeof(blankf), 1, fin);
       +
       +      (void)fread(&p[i].y, sizeof(float), 1, fin);
       +      for (int j=0; j<4; j++)
       +        (void)fread(&blankf, sizeof(blankf), 1, fin);
       +
       +      (void)fread(&p[i].z, sizeof(float), 1, fin);
       +      for (int j=0; j<4; j++)
       +        (void)fread(&blankf, sizeof(blankf), 1, fin);
       +    }
       +    for (unsigned int i=0; i<np; i++) {
       +      //(void)fread(&blankf, sizeof(blankf), 1, fin); // fixvel
       +      (void)fread(&fixvel[i], sizeof(float), 1, fin); // fixvel
       +      (void)fread(&blankf, sizeof(blankf), 1, fin); // xsum
       +      (void)fread(&p[i].w, sizeof(float), 1, fin);  // radius
       +      for (int j=0; j<10; j++)
       +        (void)fread(&blankf, sizeof(blankf), 1, fin);
       +      (void)fread(&es_dot[i], sizeof(float), 1, fin);
       +      (void)fread(&es[i], sizeof(float), 1, fin);
       +      (void)fread(&pres[i], sizeof(float), 1, fin);
       +    }
       +
       +    fclose(fin);        // Binary read complete
       +
       +    cout << "  Spatial dimensions: " 
       +      << L.x << "*" << L.y << "*" << L.z << " m\n";
       +
       +    // Eye position and focus point
       +    //f3 eye    = {0.5f*L.x, -4.2*L.y, 0.5f*L.z};
       +    f3 eye    = {2.5f*L.x, -4.2*L.y, 2.0f*L.z};
       +    f3 lookat = {0.45f*L.x, 0.5f*L.y, 0.45f*L.z};
       +    if (L.z > (L.x + L.y)/1.5f) { // Render only bottom of world (for initsetup's)
       +      eye.x = 1.1f*L.x;
       +      eye.y = 15.1*L.y;
       +      eye.z = 1.1*L.z;
       +      /*lookat.x = 0.45f*L.x;
       +        lookat.y = 0.5f*L.y;
       +        lookat.z = 0.30f*L.z;*/
       +    }
       +
       +    // Screen width in world coordinates
       +    //float imgw = 1.4f*L.x;
       +    //float imgw = pow(L.x*L.y*L.z, 0.32f);
       +    //float imgw = sqrt(L.x*L.y)*1.2f; // Adjust last float to capture entire height
       +    float imgw = L.x*1.35f;
       +
       +    // Determine visualization mode
       +    int visualize = 0; // 0: ordinary view
       +    float max_val = 0;
       +    if (argc == 8) {
       +      if(strcmp(argv[6],"pressure") == 0)
       +        visualize = 1; // 1: pressure view
       +      if(strcmp(argv[6],"es_dot") == 0)
       +        visualize = 2; // 2: es_dot view
       +      if(strcmp(argv[6],"es") == 0)
       +        visualize = 3; // 3: es view
       +      if(strcmp(argv[6],"pressure50") == 0)
       +        visualize = 4; // 4: pressure50 view
       +
       +      // Read max. value specified in command args.
       +      max_val = atof(argv[7]);
       +    }
       +
       +    // Render colorbar image
       +    if (strcmp(argv[2],"colorbar") == 0) {
       +
       +      for (unsigned int x=0; x<width; x++) {
       +        for (unsigned int y=0; y<height; y++) {
       +
       +          // Colormap value is relative to width position
       +          float ratio = (float)x/width;
       +
       +          // Determine Blue-White-Red color components
       +          float red   = fmin(1.0f, 0.209f*ratio*ratio*ratio - 2.49f*ratio*ratio + 3.0f*ratio + 0.0109f);
       +          float green = fmin(1.0f, -2.44f*ratio*ratio + 2.15f*ratio + 0.369f);
       +          float blue  = fmin(1.0f, -2.21f*ratio*ratio + 1.61f*ratio + 0.573f);
       +
       +          // Write pixel value to image array
       +          img[y*height + x].r = (unsigned char) red * 255;
       +          img[y*height + x].g = (unsigned char) green * 255;
       +          img[y*height + x].b = (unsigned char) blue * 255;
       +
       +        }
       +      }
       +    }
       +
       +    if (strcmp(argv[1],"GPU") == 0) {
       +
       +      // Call cuda wrapper
       +      if (rt(p, np, img, width, height, origo, L, eye, lookat, imgw, visualize, max_val, fixvel, pres, es_dot, es) != 0) {
       +        cout << "\nError in rt()\n";
       +        return 1;
       +      }
       +    } else if (strcmp(argv[1],"CPU") == 0) {
       +
       +      // Call CPU wrapper
       +      if (rt_cpu(p, np, img, width, height, origo, L, eye, lookat, imgw) != 0) {
       +        cout << "\nError in rt_cpu()\n";
       +        return 1;
       +      }
       +    } else {
       +      cout << "Please specify CPU or GPU for execution\n";
              return 1;
            }
       -  } else {
       -    cout << "Please specify CPU or GPU for execution\n";
       -    return 1;
       -  }
        
       +    // Free dynamically allocated memory
       +    delete [] p;
       +    delete [] fixvel;
       +    delete [] pres;
       +    delete [] es;
       +    delete [] es_dot;
       +
       +  }
        
          // Write final image to PPM file
          image_to_ppm(img, argv[5], width, height);
        
       -
       -  // Free dynamically allocated memory
       -  delete [] p;
       -  delete [] pres;
       -  delete [] es;
       -  delete [] es_dot;
          delete [] img;
        
          cout << "Terminating successfully\n\n";
 (DIR) diff --git a/raytracer/rt_kernel.h b/raytracer/rt_kernel.h
       t@@ -26,6 +26,6 @@ int rt(float4* p, const unsigned int np,
               rgb* img, const unsigned int width, const unsigned int height,
               f3 origo, f3 L, f3 eye, f3 lookat, float imgw,
               const int visualize, const float max_val, 
       -       float* pres, float* es_dot, float* es);
       +       float* fixvel, float* pres, float* es_dot, float* es);
        
        #endif
 (DIR) diff --git a/src/Makefile b/src/Makefile
       t@@ -82,5 +82,5 @@ backup:
        
        clean:
                $(RM) $(OBJECTS)
       -        $(RM) $(EXECUTABLE)
       +        $(RM) ../sphere_*
        
 (DIR) diff --git a/src/constants.cuh b/src/constants.cuh
       t@@ -14,15 +14,13 @@ __constant__ unsigned int devC_shearmodel; // Shear force model: 1: viscous, fri
        __constant__ float        devC_k_n;           // Material normal stiffness
        __constant__ float        devC_k_s;           // Material shear stiffness
        __constant__ float        devC_k_r;           // Material rolling stiffness
       +__constant__ float        devC_gamma_n;           // Material normal viscosity
        __constant__ float        devC_gamma_s;           // Material shear viscosity
        __constant__ float          devC_gamma_r;           // Material rolling viscosity
       -__constant__ float        devC_mu_s;           // Material shear friction coefficient
       +__constant__ float        devC_mu_s;           // Material static shear friction coefficient
       +__constant__ float        devC_mu_d;           // Material dynamic shear friction coefficient
        __constant__ float          devC_mu_r;           // Material rolling friction coefficient
       -__constant__ float        devC_C;           // Material cohesion
        __constant__ float        devC_rho;           // Material density
       -__constant__ float        devC_E;           // Material Young's modulus
       -__constant__ float        devC_K;           // Material bulk modulus
       -__constant__ float        devC_nu;           // Material viscosity
        __constant__ float          devC_kappa;           // Capillary bond prefactor
        __constant__ float          devC_db;           // Debonding distance
        __constant__ float          devC_V_b;           // Liquid volume of capillary bond
 (DIR) diff --git a/src/datatypes.h b/src/datatypes.h
       t@@ -35,15 +35,13 @@ struct Particles {
          float *k_n;
          float *k_s;
          float *k_r;
       +  float *gamma_n;
          float *gamma_s;
          float *gamma_r;
          float *mu_s;
       +  float *mu_d;
          float *mu_r;
       -  float *C;
          float *rho;
       -  float *E;
       -  float *K;
       -  float *nu;
          float *es_dot;
          float *es;
          float *p;
       t@@ -80,15 +78,13 @@ struct Params {
          float k_n;
          float k_s;
          float k_r;
       +  float gamma_n;
          float gamma_s;
          float gamma_r;
          float mu_s; 
       +  float mu_d;
          float mu_r;
       -  float C;  
          float rho;
       -  float E;  
       -  float K;  
       -  float nu; 
          float kappa;
          float db;
          float V_b;
 (DIR) diff --git a/src/device.cu b/src/device.cu
       t@@ -152,65 +152,71 @@ __device__ float contactLinear_wall(float3* N, float3* T, float* es_dot, float* 
                                            float4* dev_vel_sorted, float4* dev_angvel_sorted,
                                            float3 n, float delta, float wvel)
        {
       -  // Fetch velocities from global memory
       -  float4 linvel_a_tmp = dev_vel_sorted[idx_a];
       -  float4 angvel_a_tmp = dev_angvel_sorted[idx_a];
       +  // Fetch particle velocities from global memory
       +  const float4 linvel_tmp = dev_vel_sorted[idx_a];
       +  const float4 angvel_tmp = dev_angvel_sorted[idx_a];
        
          // Convert velocities to three-component vectors
       -  float3 linvel_a = make_float3(linvel_a_tmp.x,
       -                                      linvel_a_tmp.y,
       -                                linvel_a_tmp.z);
       -  float3 angvel_a = make_float3(angvel_a_tmp.x,
       -                                      angvel_a_tmp.y,
       -                                angvel_a_tmp.z);
       +  const float3 linvel = make_float3(linvel_tmp.x,
       +                                          linvel_tmp.y,
       +                                    linvel_tmp.z);
       +  const float3 angvel = make_float3(angvel_tmp.x,
       +                                          angvel_tmp.y,
       +                                    angvel_tmp.z);
        
          // Store the length of the angular velocity for later use
       -  float  angvel_length = length(angvel_a);
       +  const float angvel_length = length(angvel);
        
          // Contact velocity is the sum of the linear and
          // rotational components
       -  float3 vel = linvel_a + radius_a * cross(n, angvel_a) + wvel;
       +  const float3 vel = linvel + radius_a * cross(n, angvel) + wvel;
        
          // Normal component of the contact velocity
       -  float  vel_n = dot(vel, n);
       +  const float vel_n = dot(vel, n);
        
          // The tangential velocity is the contact velocity
          // with the normal component subtracted
       -  float3 vel_t = vel - n * (dot(vel, n));
       -  float  vel_t_length = length(vel_t);
       +  const float3 vel_t = vel - n * (dot(vel, n));
       +  const float  vel_t_length = length(vel_t);
        
          // Calculate elastic normal component
          //float3 f_n = -devC_k_n * delta * n;
        
          // Normal force component: Elastic - viscous damping
       -  float3 f_n = (-devC_k_n * delta - devC_nu * vel_n) * n;
       +  float3 f_n = (-devC_k_n * delta - devC_gamma_n * vel_n) * n;
        
          // Make sure the viscous damping doesn't exceed the elastic component,
          // i.e. the damping factor doesn't exceed the critical damping, 2*sqrt(m*k_n)
          if (dot(f_n, n) < 0.0f)
            f_n = make_float3(0.0f, 0.0f, 0.0f);
        
       -  float  f_n_length = length(f_n); // Save length for later use
       +  const float  f_n_length = length(f_n); // Save length for later use
        
          // Initialize vectors
       -  float3 f_s   = make_float3(0.0f, 0.0f, 0.0f);
       +  float3 f_t   = make_float3(0.0f, 0.0f, 0.0f);
          float3 T_res = make_float3(0.0f, 0.0f, 0.0f);
        
          // Check that the tangential velocity is high enough to avoid
          // divide by zero (producing a NaN)
          if (vel_t_length > 0.f) {
        
       -    // Shear force component
       -    // Limited by Mohr Coulomb failure criterion
       -    f_s = -1.0f * fmin(devC_gamma_s * vel_t_length,
       -                       devC_mu_s * f_n_length)
       -          * vel_t/vel_t_length;
       +    const float f_t_visc  = devC_gamma_s * vel_t_length; // Tangential force by viscous model
       +    const float f_t_limit = devC_mu_s * f_n_length;      // Max. friction
        
       -    // Shear energy production rate [W]
       -    *es_dot += -dot(vel_t, f_s);
       +    // If the shear force component exceeds the friction,
       +    // the particle slips and energy is dissipated
       +    if (f_t_visc < f_t_limit) {
       +      f_t = -1.0f * f_t_visc * vel_t/vel_t_length;
       +
       +    } else { // Dynamic friction, friction failure
       +      f_t = -1.0f * f_t_limit * vel_t/vel_t_length;
       +      
       +      // Shear energy production rate [W]
       +      //*es_dot += -dot(vel_t, f_t);
       +    }
          }
        
       -  if (angvel_length > 0.f) {
       +/*  if (angvel_length > 0.f) {
            // Apply rolling resistance (Zhou et al. 1999)
            //T_res = -angvel_a/angvel_length * devC_mu_r * radius_a * f_n_length;
        
       t@@ -218,13 +224,14 @@ __device__ float contactLinear_wall(float3* N, float3* T, float* es_dot, float* 
            T_res = -1.0f * fmin(devC_gamma_r * radius_a * angvel_length,
                                 devC_mu_r * radius_a * f_n_length)
                    * angvel_a/angvel_length;
       -  }
       +  }*/
        
          // Total force from wall
       -  *N += f_n + f_s;
       +  *N += f_n + f_t;
       +//  *N += f_n;
        
          // Total torque from wall
       -  *T += -radius_a * cross(n, f_s) + T_res;
       +  *T += -radius_a * cross(n, f_t) + T_res;
        
          // Pressure excerted onto particle from this contact
          *p += f_n_length / (4.0f * PI * radius_a*radius_a);
       t@@ -247,49 +254,49 @@ __device__ void contactLinearViscous(float3* N, float3* T, float* es_dot, float*
        {
        
          // Allocate variables and fetch missing time=t values for particle A and B
       -  float4 vel_a     = dev_vel_sorted[idx_a];
       -  float4 vel_b     = dev_vel_sorted[idx_b];
       -  float4 angvel4_a = dev_angvel_sorted[idx_a];
       -  float4 angvel4_b = dev_angvel_sorted[idx_b];
       +  const float4 vel_a     = dev_vel_sorted[idx_a];
       +  const float4 vel_b     = dev_vel_sorted[idx_b];
       +  const float4 angvel4_a = dev_angvel_sorted[idx_a];
       +  const float4 angvel4_b = dev_angvel_sorted[idx_b];
        
          // Convert to float3's
       -  float3 angvel_a = make_float3(angvel4_a.x, angvel4_a.y, angvel4_a.z);
       -  float3 angvel_b = make_float3(angvel4_b.x, angvel4_b.y, angvel4_b.z);
       +  const float3 angvel_a = make_float3(angvel4_a.x, angvel4_a.y, angvel4_a.z);
       +  const float3 angvel_b = make_float3(angvel4_b.x, angvel4_b.y, angvel4_b.z);
        
          // Force between grain pair decomposed into normal- and tangential part
       -  float3 f_n, f_s, f_c, T_res;
       +  float3 f_n, f_t, f_c, T_res;
        
          // Normal vector of contact
       -  float3 n_ab = x_ab/x_ab_length;
       +  const float3 n_ab = x_ab/x_ab_length;
        
          // Relative contact interface velocity, w/o rolling
       -  float3 vel_ab_linear = make_float3(vel_a.x - vel_b.x, 
       -                                           vel_a.y - vel_b.y, 
       -                                     vel_a.z - vel_b.z);
       +  const float3 vel_ab_linear = make_float3(vel_a.x - vel_b.x, 
       +                                                 vel_a.y - vel_b.y, 
       +                                           vel_a.z - vel_b.z);
        
          // Relative contact interface velocity of particle surfaces at
          // the contact, with rolling (Hinrichsen and Wolf 2004, eq. 13.10)
       -  float3 vel_ab = vel_ab_linear
       -                  + radius_a * cross(n_ab, angvel_a)
       -                  + radius_b * cross(n_ab, angvel_b);
       +  const float3 vel_ab = vel_ab_linear
       +                        + radius_a * cross(n_ab, angvel_a)
       +                        + radius_b * cross(n_ab, angvel_b);
        
          // Relative contact interface rolling velocity
       -  float3 angvel_ab = angvel_a - angvel_b;
       -  float  angvel_ab_length = length(angvel_ab);
       +  const float3 angvel_ab = angvel_a - angvel_b;
       +  const float  angvel_ab_length = length(angvel_ab);
        
          // Normal component of the relative contact interface velocity
       -  float vel_n_ab = dot(vel_ab_linear, n_ab);
       +  const float vel_n_ab = dot(vel_ab_linear, n_ab);
        
          // Tangential component of the relative contact interface velocity
          // Hinrichsen and Wolf 2004, eq. 13.9
       -  float3 vel_t_ab = vel_ab - (n_ab * dot(vel_ab, n_ab));
       -  float  vel_t_ab_length = length(vel_t_ab);
       +  const float3 vel_t_ab = vel_ab - (n_ab * dot(vel_ab, n_ab));
       +  const float  vel_t_ab_length = length(vel_t_ab);
        
          // Compute the normal stiffness of the contact
          //float k_n_ab = k_n_a * k_n_b / (k_n_a + k_n_b);
        
          // Calculate rolling radius
       -  float R_bar = (radius_a + radius_b) / 2.0f;
       +  const float R_bar = (radius_a + radius_b) / 2.0f;
        
          // Normal force component: linear-elastic approximation (Augier 2009, eq. 3)
          // with velocity dependant damping
       t@@ -311,20 +318,20 @@ __device__ void contactLinearViscous(float3* N, float3* T, float* es_dot, float*
          //f_n = -k_n_ab * delta_ab * n_ab;
        
          // Normal force component: Elastic - viscous damping
       -  f_n = (-devC_k_n * delta_ab - devC_nu * vel_n_ab) * n_ab;
       +  f_n = (-devC_k_n * delta_ab - devC_gamma_n * vel_n_ab) * n_ab;
        
          // Make sure the viscous damping doesn't exceed the elastic component,
          // i.e. the damping factor doesn't exceed the critical damping, 2*sqrt(m*k_n)
          if (dot(f_n, n_ab) < 0.0f)
            f_n = make_float3(0.0f, 0.0f, 0.0f);
        
       -  float f_n_length = length(f_n);
       +  const float f_n_length = length(f_n);
        
          // Add max. capillary force
          f_c = -kappa * sqrtf(radius_a * radius_b) * n_ab;
        
          // Initialize force vectors to zero
       -  f_s   = make_float3(0.0f, 0.0f, 0.0f);
       +  f_t   = make_float3(0.0f, 0.0f, 0.0f);
          T_res = make_float3(0.0f, 0.0f, 0.0f);
        
          // Shear force component: Nonlinear relation
       t@@ -332,16 +339,31 @@ __device__ void contactLinearViscous(float3* N, float3* T, float* es_dot, float*
          // to the normal force
          if (vel_t_ab_length > 0.f) {
        
       -    // Shear force
       -    f_s = -1.0f * fmin(devC_gamma_s * vel_t_ab_length, 
       -                       devC_mu_s * length(f_n-f_c)) 
       -          * vel_t_ab/vel_t_ab_length;
       +    // Tangential force by viscous model
       +    const float f_t_visc  = devC_gamma_s * vel_t_ab_length;
       +
       +    // Determine max. friction
       +    float f_t_limit;
       +    if (vel_t_ab_length > 0.001f) { // Dynamic
       +      f_t_limit = devC_mu_d * length(f_n-f_c);
       +    } else { // Static
       +      f_t_limit = devC_mu_s * length(f_n-f_c);
       +    }
       +
       +    // If the shear force component exceeds the friction,
       +    // the particle slips and energy is dissipated
       +    if (f_t_visc < f_t_limit) { // Static
       +      f_t = -1.0f * f_t_visc * vel_t_ab/vel_t_ab_length;
       +
       +    } else { // Dynamic, friction failure
       +      f_t = -1.0f * f_t_limit * vel_t_ab/vel_t_ab_length;
        
       -    // Shear friction production rate [W]
       -    *es_dot += -dot(vel_t_ab, f_s);
       +      // Shear friction production rate [W]
       +      //*es_dot += -dot(vel_t_ab, f_t);
       +    }
          }
        
       -  if (angvel_ab_length > 0.f) {
       +/*  if (angvel_ab_length > 0.f) {
            // Apply rolling resistance (Zhou et al. 1999)
            //T_res = -angvel_ab/angvel_ab_length * devC_mu_r * R_bar * length(f_n);
        
       t@@ -350,11 +372,11 @@ __device__ void contactLinearViscous(float3* N, float3* T, float* es_dot, float*
                                 devC_mu_r * R_bar * f_n_length)
                    * angvel_ab/angvel_ab_length;
          }
       -
       +*/
        
          // Add force components from this collision to total force for particle
       -  *N += f_n + f_s + f_c; 
       -  *T += -R_bar * cross(n_ab, f_s) + T_res;
       +  *N += f_n + f_t + f_c; 
       +  *T += -R_bar * cross(n_ab, f_t) + T_res;
        
          // Pressure excerted onto the particle from this contact
          *p += f_n_length / (4.0f * PI * radius_a*radius_a);
       t@@ -378,88 +400,133 @@ __device__ void contactLinear(float3* N, float3* T,
        {
        
          // Allocate variables and fetch missing time=t values for particle A and B
       -  float4 vel_b     = dev_vel[idx_b_orig];
       -  float4 angvel4_b = dev_angvel[idx_b_orig];
       +  const float4 vel_b     = dev_vel[idx_b_orig];
       +  const float4 angvel4_b = dev_angvel[idx_b_orig];
        
          // Fetch previous sum of shear displacement for the contact pair
       -  float4 delta_t0  = dev_delta_t[mempos];
       +  const float4 delta_t0_4 = dev_delta_t[mempos];
       +
       +  const float3 delta_t0_uncor = make_float3(delta_t0_4.x,
       +                                                  delta_t0_4.y,
       +                                            delta_t0_4.z);
        
          // Convert to float3
       -  float3 angvel_b = make_float3(angvel4_b.x, angvel4_b.y, angvel4_b.z);
       +  const float3 angvel_b = make_float3(angvel4_b.x, angvel4_b.y, angvel4_b.z);
        
          // Force between grain pair decomposed into normal- and tangential part
       -  float3 f_n, f_s, f_c, T_res;
       +  float3 f_n, f_t, f_c, T_res;
        
          // Normal vector of contact
       -  float3 n_ab = x_ab/x_ab_length;
       +  const float3 n_ab = x_ab/x_ab_length;
        
          // Relative contact interface velocity, w/o rolling
       -  float3 vel_ab_linear = make_float3(vel_a.x - vel_b.x, 
       -                                           vel_a.y - vel_b.y, 
       -                                     vel_a.z - vel_b.z);
       +  const float3 vel_ab_linear = make_float3(vel_a.x - vel_b.x, 
       +                                                 vel_a.y - vel_b.y, 
       +                                           vel_a.z - vel_b.z);
        
          // Relative contact interface velocity of particle surfaces at
          // the contact, with rolling (Hinrichsen and Wolf 2004, eq. 13.10)
       -  float3 vel_ab = vel_ab_linear
       -                  + radius_a * cross(n_ab, angvel_a)
       -                  + radius_b * cross(n_ab, angvel_b);
       +  const float3 vel_ab = vel_ab_linear
       +                        + radius_a * cross(n_ab, angvel_a)
       +                        + radius_b * cross(n_ab, angvel_b);
        
          // Relative contact interface rolling velocity
       -  float3 angvel_ab = angvel_a - angvel_b;
       -  float  angvel_ab_length = length(angvel_ab);
       +  const float3 angvel_ab = angvel_a - angvel_b;
       +  const float  angvel_ab_length = length(angvel_ab);
        
          // Normal component of the relative contact interface velocity
       -  float vel_n_ab = dot(vel_ab_linear, n_ab);
       +  const float vel_n_ab = dot(vel_ab_linear, n_ab);
        
          // Tangential component of the relative contact interface velocity
          // Hinrichsen and Wolf 2004, eq. 13.9
       -  float3 vel_t_ab = vel_ab - (n_ab * dot(vel_ab, n_ab));
       +  const float3 vel_t_ab = vel_ab - (n_ab * dot(vel_ab, n_ab));
       +  const float  vel_t_ab_length = length(vel_t_ab);
        
       -  // Add tangential displacement to total tangential displacement
       -  float3 delta_t  = make_float3(delta_t0.x, delta_t0.y, delta_t0.z) + vel_t_ab * devC_dt;
       -  float  delta_t_length = length(delta_t);
       +  // Correct tangential displacement vector, which is
       +  // necessary if the tangential plane rotated
       +  const float3 delta_t0 = delta_t0_uncor - (n_ab * dot(delta_t0_uncor, n_ab));
       +  const float  delta_t0_length = length(delta_t0);
       +
       +  // New tangential displacement vector
       +  float3 delta_t;
        
          // Compute the normal stiffness of the contact
          //float k_n_ab = k_n_a * k_n_b / (k_n_a + k_n_b);
        
          // Calculate rolling radius
       -  float R_bar = (radius_a + radius_b) / 2.0f;
       +  const float R_bar = (radius_a + radius_b) / 2.0f;
        
          // Normal force component: Elastic
          //f_n = -devC_k_n * delta_ab * n_ab;
        
          // Normal force component: Elastic - viscous damping
       -  f_n = (-devC_k_n * delta_ab - devC_nu * vel_n_ab) * n_ab;
       +  f_n = (-devC_k_n * delta_ab - devC_gamma_n * vel_n_ab) * n_ab;
        
          // Make sure the viscous damping doesn't exceed the elastic component,
          // i.e. the damping factor doesn't exceed the critical damping, 2*sqrt(m*k_n)
          if (dot(f_n, n_ab) < 0.0f)
            f_n = make_float3(0.0f, 0.0f, 0.0f);
        
       -  float f_n_length = length(f_n);
       +  const float f_n_length = length(f_n);
        
          // Add max. capillary force
          f_c = -devC_kappa * sqrtf(radius_a * radius_b) * n_ab;
        
          // Initialize force vectors to zero
       -  f_s   = make_float3(0.0f, 0.0f, 0.0f);
       +  f_t   = make_float3(0.0f, 0.0f, 0.0f);
          T_res = make_float3(0.0f, 0.0f, 0.0f);
        
       -  // Shear force component: Nonlinear relation
       -  // Coulomb's law of friction limits the tangential force to less or equal
       -  // to the normal force
       -  if (delta_t_length > 0.f) {
       +  // Apply a tangential force if the previous tangential displacement
       +  // is non-zero, or the current sliding velocity is non-zero.
       +  if (delta_t0_length > 0.f || vel_t_ab_length > 0.f) {
       +
       +    // Shear force: Visco-Elastic, limited by Coulomb friction
       +    float3 f_t_elast = -1.0f * devC_k_s * delta_t0;
       +    float3 f_t_visc  = -1.0f * devC_gamma_s * vel_t_ab;
       +
       +    float f_t_limit;
       +    
       +    if (vel_t_ab_length > 0.001f) { // Dynamic friciton
       +      f_t_limit = devC_mu_d * length(f_n-f_c);
       +    } else { // Static friction
       +      f_t_limit = devC_mu_s * length(f_n-f_c);
       +    }
       +
       +    // Tangential force before friction limit correction
       +    f_t = f_t_elast + f_t_visc;
       +    float f_t_length = length(f_t);
       +
       +    // If failure criterion is not met, contact is viscous-linear elastic.
       +    // If failure criterion is met, contact force is limited, 
       +    // resulting in a slip and energy dissipation
       +    if (f_t_length > f_t_limit) { // Dynamic case
       +      
       +      // Frictional force is reduced to equal the limit
       +      f_t *= f_t_limit/f_t_length;
        
       -    // Shear force: Elastic, limited by Mohr-Coulomb
       -    f_s = -1.0f * fmin(devC_k_s * delta_t_length, 
       -                       devC_mu_s * length(f_n-f_c)) 
       -          * delta_t/delta_t_length;
       +      // A slip event zeros the displacement vector
       +      //delta_t = make_float3(0.0f, 0.0f, 0.0f);
        
       -    // Shear friction production rate [W]
       -    *es_dot += -dot(vel_t_ab, f_s);
       +      // In a slip event, the tangential spring is adjusted to a 
       +      // length which is consistent with Coulomb's equation
       +      // (Hinrichsen and Wolf, 2004)
       +      delta_t = -1.0f/devC_k_s * f_t + devC_gamma_s * vel_t_ab;
       +
       +      // Shear friction heat production rate: 
       +      // The energy lost from the tangential spring is dissipated as heat
       +      //*es_dot += -dot(vel_t_ab, f_t);
       +      *es_dot += length(delta_t0 - delta_t) * devC_k_s / devC_dt; // Seen in EsyS-Particle
       +
       +    } else { // Static case
       +
       +      // No correction of f_t is required
       +
       +      // Add tangential displacement to total tangential displacement
       +      delta_t = delta_t0 + vel_t_ab * devC_dt;
       +    }
          }
        
       -  /*if (angvel_ab_length > 0.f) {
       +  if (angvel_ab_length > 0.f) {
            // Apply rolling resistance (Zhou et al. 1999)
            //T_res = -angvel_ab/angvel_ab_length * devC_mu_r * R_bar * length(f_n);
        
       t@@ -467,11 +534,11 @@ __device__ void contactLinear(float3* N, float3* T,
            T_res = -1.0f * fmin(devC_gamma_r * R_bar * angvel_ab_length,
                                 devC_mu_r * R_bar * f_n_length)
                    * angvel_ab/angvel_ab_length;
       -  }*/
       +  }
        
          // Add force components from this collision to total force for particle
       -  *N += f_n + f_s + f_c; 
       -  *T += -R_bar * cross(n_ab, f_s) + T_res;
       +  *N += f_n + f_t + f_c; 
       +  *T += -R_bar * cross(n_ab, f_t) + T_res;
        
          // Pressure excerted onto the particle from this contact
          *p += f_n_length / (4.0f * PI * radius_a*radius_a);
       t@@ -533,7 +600,7 @@ __device__ void bondLinear(float3* N, float3* T, float* es_dot, float* p,
            //float  vel_t_ab_length = length(vel_t_ab);
        
            float3 f_n = make_float3(0.0f, 0.0f, 0.0f);
       -    float3 f_s = make_float3(0.0f, 0.0f, 0.0f);
       +    float3 f_t = make_float3(0.0f, 0.0f, 0.0f);
        
            // Mean radius
            float R_bar = (radius_a + radius_b)/2.0f;
       t@@ -543,15 +610,15 @@ __device__ void bondLinear(float3* N, float3* T, float* es_dot, float* p,
        
            if (length(vel_t_ab) > 0.f) {
              // Shear force component: Viscous
       -      f_s = -1.0f * devC_gamma_s * vel_t_ab;
       +      f_t = -1.0f * devC_gamma_s * vel_t_ab;
        
              // Shear friction production rate [W]
       -      *es_dot += -dot(vel_t_ab, f_s);
       +      //*es_dot += -dot(vel_t_ab, f_t);
            }
        
            // Add force components from this bond to total force for particle
       -    *N += f_n + f_s;
       -    *T += -R_bar * cross(n_ab, f_s);
       +    *N += f_n + f_t;
       +    *T += -R_bar * cross(n_ab, f_t);
        
            // Pressure excerted onto the particle from this bond
            *p += length(f_n) / (4.0f * PI * radius_a*radius_a);
       t@@ -763,7 +830,8 @@ __device__ void findContactsInCell(int3 targetCell,
                                           unsigned int* dev_cellEnd,
                                           unsigned int* dev_gridParticleIndex,
                                           int* nc,
       -                                   unsigned int* dev_contacts)
       +                                   unsigned int* dev_contacts,
       +                                   float4* dev_distmod)
        {
          // Variable containing modifier for interparticle
          // vector, if it crosses a periodic boundary
       t@@ -897,11 +965,15 @@ __device__ void findContactsInCell(int3 targetCell,
        
                  // Write the interparticle vector and radius of particle B
                 //dev_x_ab_r_b[(unsigned int)(idx_a_orig*devC_nc+cpos)] = make_float4(x_ab, radius_b);
       +          dev_distmod[(unsigned int)(idx_a_orig*devC_nc+cpos)] = make_float4(distmod, radius_b);
                  
                  // Increment contact counter
                  ++*nc;
                }
        
       +        // Write the inter-particle position vector correction and radius of particle B
       +        //dev_distmod[(unsigned int)(idx_a_orig*devC_nc+cpos)] = make_float4(distmod, radius_b);
       +
                // Check wether particles are bonded together
                /*if (bonds.x == idx_b || bonds.y == idx_b ||
                    bonds.z == idx_b || bonds.w == idx_b) {
       t@@ -927,7 +999,8 @@ __global__ void topology(unsigned int* dev_cellStart,
                                     unsigned int* dev_cellEnd, // Input: Particles in cell 
                                 unsigned int* dev_gridParticleIndex, // Input: Unsorted-sorted key
                                 float4* dev_x_sorted, float* dev_radius_sorted, 
       -                         unsigned int* dev_contacts)
       +                         unsigned int* dev_contacts,
       +                         float4* dev_distmod)
        {
          // Thread index equals index of particle A
          unsigned int idx_a = __umul24(blockIdx.x, blockDim.x) + threadIdx.x;
       t@@ -958,7 +1031,7 @@ __global__ void topology(unsigned int* dev_cellStart,
                                            dev_x_sorted, dev_radius_sorted,
                                     dev_cellStart, dev_cellEnd,
                                     dev_gridParticleIndex,
       -                                 &nc, dev_contacts);
       +                                 &nc, dev_contacts, dev_distmod);
                }
              }
            }
       t@@ -983,6 +1056,7 @@ __global__ void interact(unsigned int* dev_gridParticleIndex, // Input: Unsorted
                                 float4* dev_w_nx, float4* dev_w_mvfd, 
                                 float* dev_w_force, //uint4* dev_bonds_sorted,
                                 unsigned int* dev_contacts, 
       +                         float4* dev_distmod,
                                 float4* dev_delta_t)
        {
          // Thread index equals index of particle A
       t@@ -1027,6 +1101,7 @@ __global__ void interact(unsigned int* dev_gridParticleIndex, // Input: Unsorted
              unsigned int idx_b_orig, mempos;
              float delta_n, x_ab_length;
              float4 x_b;
       +      float4 distmod;
              float  radius_b;
              float3 x_ab;
              float4 vel_a     = dev_vel_sorted[idx_a];
       t@@ -1039,11 +1114,16 @@ __global__ void interact(unsigned int* dev_gridParticleIndex, // Input: Unsorted
                mempos = (unsigned int)(idx_a_orig * devC_nc + i);
                __syncthreads();
                idx_b_orig = dev_contacts[mempos];
       +        distmod    = dev_distmod[mempos];
                x_b        = dev_x[idx_b_orig];
       -        radius_b   = dev_radius[idx_b_orig];
       -        x_ab = make_float3(x_a.x - x_b.x,
       -                               x_a.y - x_b.y,
       -                           x_a.z - x_b.z);
       +        //radius_b   = dev_radius[idx_b_orig];
       +        radius_b   = distmod.w;
       +
       +        // Inter-particle vector, corrected for periodic boundaries
       +        x_ab = make_float3(x_a.x - x_b.x + distmod.x,
       +                               x_a.y - x_b.y + distmod.y,
       +                           x_a.z - x_b.z + distmod.z);
       +
                x_ab_length = length(x_ab);
                delta_n = x_ab_length - (radius_a + radius_b);
        
       t@@ -1055,16 +1135,16 @@ __global__ void interact(unsigned int* dev_gridParticleIndex, // Input: Unsorted
                    //cuPrintf("\nProcessing contact, idx_a_orig = %u, idx_b_orig = %u, contact = %d, delta_n = %f\n",
                    //  idx_a_orig, idx_b_orig, i, delta_n);
                    contactLinear(&N, &T, &es_dot, &p, 
       -                idx_a_orig,
       -                idx_b_orig,
       -                vel_a,
       -                dev_vel,
       -                angvel_a,
       -                dev_angvel,
       -                radius_a, radius_b, 
       -                x_ab, x_ab_length,
       -                delta_n, dev_delta_t, 
       -                mempos);
       +                          idx_a_orig,
       +                          idx_b_orig,
       +                          vel_a,
       +                          dev_vel,
       +                          angvel_a,
       +                          dev_angvel,
       +                          radius_a, radius_b, 
       +                          x_ab, x_ab_length,
       +                          delta_n, dev_delta_t, 
       +                          mempos);
                  } else {
                    __syncthreads();
                    // Remove this contact (there is no particle with index=np)
       t@@ -1074,7 +1154,7 @@ __global__ void interact(unsigned int* dev_gridParticleIndex, // Input: Unsorted
                  }
                } else {
                  __syncthreads();
       -          dev_delta_t[mempos]  = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
       +          dev_delta_t[mempos] = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
                }
              } // Contact loop end
        
       t@@ -1266,18 +1346,24 @@ __global__ void integrate(float4* dev_x_sorted, float4* dev_vel_sorted, // Input
            acc.z += devC_g[2];
            //acc.z -= 9.82f;
        
       -    // Only update velocity (and position) if the horizontal velocity is not fixed
       +    // Update angular velocity
       +    angvel.x += angacc.x * dt;
       +    angvel.y += angacc.y * dt;
       +    angvel.z += angacc.z * dt;
       +
       +    // Check if particle has a fixed horizontal velocity
            if (vel.w > 0.0f) {
        
       -      // Zero horizontal acceleration
       +      // Zero horizontal acceleration and disable
       +      // gravity to counteract segregation.
       +      // Particles may move in the z-dimension,
       +      // to allow for dilation.
              acc.x = 0.0f;
              acc.y = 0.0f;
       -
       -      // Update vertical linear velocity
       -      vel.z += acc.z * dt;
       +      acc.z -= devC_g[2];
        
              // Zero the angular acceleration and -velocity
       -      angacc = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
       +      angvel = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
            } 
        
            // Update linear velocity
       t@@ -1285,11 +1371,6 @@ __global__ void integrate(float4* dev_x_sorted, float4* dev_vel_sorted, // Input
            vel.y += acc.y * dt;
            vel.z += acc.z * dt;
        
       -    // Update angular velocity
       -    angvel.x += angacc.x * dt;
       -    angvel.y += angacc.y * dt;
       -    angvel.z += angacc.z * dt;
       -
            // Update position. First-order Euler's scheme:
            //x.x += vel.x * dt;
            //x.y += vel.y * dt;
       t@@ -1490,27 +1571,23 @@ __host__ void transferToConstantMemory(Particles* p,
            params->k_n     = p->k_n[0];
            params->k_s            = p->k_s[0];
            params->k_r            = p->k_r[0];
       +    params->gamma_n = p->gamma_n[0];
            params->gamma_s = p->gamma_s[0];
            params->gamma_r = p->gamma_r[0];
            params->mu_s    = p->mu_s[0];
       +    params->mu_d    = p->mu_d[0];
            params->mu_r    = p->mu_r[0];
       -    params->C       = p->C[0];
            params->rho     = p->rho[0];
       -    params->E       = p->E[0];
       -    params->K       = p->K[0];
       -    params->nu      = p->nu[0];
            cudaMemcpyToSymbol("devC_k_n", &params->k_n, sizeof(float));
            cudaMemcpyToSymbol("devC_k_s", &params->k_s, sizeof(float));
            cudaMemcpyToSymbol("devC_k_r", &params->k_r, sizeof(float));
       +    cudaMemcpyToSymbol("devC_gamma_n", &params->gamma_n, sizeof(float));
            cudaMemcpyToSymbol("devC_gamma_s", &params->gamma_s, sizeof(float));
            cudaMemcpyToSymbol("devC_gamma_r", &params->gamma_r, sizeof(float));
            cudaMemcpyToSymbol("devC_mu_s", &params->mu_s, sizeof(float));
       +    cudaMemcpyToSymbol("devC_mu_d", &params->mu_d, sizeof(float));
            cudaMemcpyToSymbol("devC_mu_r", &params->mu_r, sizeof(float));
       -    cudaMemcpyToSymbol("devC_C", &params->C, sizeof(float));
            cudaMemcpyToSymbol("devC_rho", &params->rho, sizeof(float));
       -    cudaMemcpyToSymbol("devC_E", &params->E, sizeof(float));
       -    cudaMemcpyToSymbol("devC_K", &params->K, sizeof(float));
       -    cudaMemcpyToSymbol("devC_nu", &params->nu, sizeof(float));
            cudaMemcpyToSymbol("devC_kappa", &params->kappa, sizeof(float));
            cudaMemcpyToSymbol("devC_db", &params->db, sizeof(float));
            cudaMemcpyToSymbol("devC_V_b", &params->V_b, sizeof(float));
       t@@ -1591,6 +1668,8 @@ __host__ void gpuMain(float4* host_x,
        
          // Particle contact bookkeeping
          unsigned int* dev_contacts;
       +  // Particle pair distance correction across periodic boundaries
       +  float4* dev_distmod;
          // x,y,z contains the interparticle vector, corrected if contact 
          // is across a periodic boundary. 
          float4* dev_delta_t; // Accumulated shear distance of contact
       t@@ -1630,6 +1709,7 @@ __host__ void gpuMain(float4* host_x,
        
          // Particle contact bookkeeping arrays
          cudaMalloc((void**)&dev_contacts, sizeof(unsigned int)*p->np*NC); // Max NC contacts per particle
       +  cudaMalloc((void**)&dev_distmod, sizeof(float4)*p->np*NC);
          cudaMalloc((void**)&dev_delta_t, sizeof(float4)*p->np*NC);
        
          // Wall arrays
       t@@ -1668,10 +1748,12 @@ __host__ void gpuMain(float4* host_x,
          cudaMemcpy(dev_contacts, npu, sizeof(unsigned int)*p->np*NC, cudaMemcpyHostToDevice);
          delete[] npu;
        
       -  // Create array of 0.0 values on the host and transfer these to the shear displacement array
       +  // Create array of 0.0 values on the host and transfer these to the distance 
       +  // modifier and shear displacement arrays
          float4* zerosf4 = new float4[p->np*NC];
          for (unsigned int i=0; i<(p->np*NC); ++i)
            zerosf4[i] = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
       +  cudaMemcpy(dev_distmod, zerosf4, sizeof(float4)*p->np*NC, cudaMemcpyHostToDevice);
          cudaMemcpy(dev_delta_t, zerosf4, sizeof(float4)*p->np*NC, cudaMemcpyHostToDevice);
          delete[] zerosf4;
        
       t@@ -1827,7 +1909,8 @@ __host__ void gpuMain(float4* host_x,
                                              dev_gridParticleIndex,
                                              dev_x_sorted, 
                                              dev_radius_sorted, 
       -                                      dev_contacts);
       +                                      dev_contacts,
       +                                      dev_distmod);
        
              // Empty cuPrintf() buffer to console
              //cudaThreadSynchronize();
       t@@ -1835,7 +1918,7 @@ __host__ void gpuMain(float4* host_x,
        
              // Synchronization point
              cudaThreadSynchronize();
       -      checkForCudaErrors("Post topology. Possibly caused by numerical instability. Is the computational time step too large?", iter);
       +      checkForCudaErrors("Post topology: One or more particles moved outside the grid.\nThis could possibly be caused by a numerical instability.\nIs the computational time step too large?", iter);
            }
        
        
       t@@ -1851,7 +1934,8 @@ __host__ void gpuMain(float4* host_x,
                                            dev_es_dot, dev_es, dev_p,
                                            dev_w_nx, dev_w_mvfd, dev_w_force,
                                            //dev_bonds_sorted,
       -                                    dev_contacts, 
       +                                    dev_contacts,
       +                                    dev_distmod,
                                            dev_delta_t);
        
            // Empty cuPrintf() buffer to console
       t@@ -1886,6 +1970,7 @@ __host__ void gpuMain(float4* host_x,
            cudaThreadSynchronize();
            checkForCudaErrors("Post integrateWalls");
        
       +
            // Update timers and counters
            time->current    += time->dt;
            filetimeclock    += time->dt;
       t@@ -1997,6 +2082,7 @@ __host__ void gpuMain(float4* host_x,
          //cudaFree(dev_bonds);
          //cudaFree(dev_bonds_sorted);
          cudaFree(dev_contacts);
       +  cudaFree(dev_distmod);
          cudaFree(dev_delta_t);
        
          // Cell-related arrays
 (DIR) diff --git a/src/file_io.cpp b/src/file_io.cpp
       t@@ -85,14 +85,12 @@ int fwritebin(char *target, Particles *p, float4 *host_x, float4 *host_vel, floa
            fwrite(&p->k_n[j], sizeof(p->k_n[j]), 1, fp);
            fwrite(&p->k_s[j], sizeof(p->k_s[j]), 1, fp);
            fwrite(&p->k_r[j], sizeof(p->k_r[j]), 1, fp);
       +    fwrite(&p->gamma_n[j], sizeof(p->gamma_n[j]), 1, fp);
            fwrite(&p->gamma_s[j], sizeof(p->gamma_s[j]), 1, fp);
            fwrite(&p->gamma_r[j], sizeof(p->gamma_r[j]), 1, fp);
            fwrite(&p->mu_s[j], sizeof(p->mu_s[j]), 1, fp);
       +    fwrite(&p->mu_d[j], sizeof(p->mu_d[j]), 1, fp);
            fwrite(&p->mu_r[j], sizeof(p->mu_r[j]), 1, fp);
       -    fwrite(&p->C[j], sizeof(p->C[j]), 1, fp);
       -    fwrite(&p->E[j], sizeof(p->E[j]), 1, fp);
       -    fwrite(&p->K[j], sizeof(p->K[j]), 1, fp);
       -    fwrite(&p->nu[j], sizeof(p->nu[j]), 1, fp);
            fwrite(&p->es_dot[j], sizeof(p->es_dot[j]), 1, fp);
            fwrite(&p->es[j], sizeof(p->es[j]), 1, fp);
            fwrite(&p->p[j], sizeof(p->p[j]), 1, fp);
 (DIR) diff --git a/src/main.cpp b/src/main.cpp
       t@@ -156,14 +156,12 @@ int main(int argc, char *argv[])
          p.k_n      = new float[p.np];      // Particle normal stiffnesses
          p.k_s             = new float[p.np];             // Particle shear stiffnesses
          p.k_r             = new float[p.np];             // Particle rolling stiffnesses
       +  p.gamma_n  = new float[p.np];             // Particle normal viscosity
          p.gamma_s  = new float[p.np];      // Particle shear viscosity
          p.gamma_r  = new float[p.np];             // Particle rolling viscosity
       -  p.mu_s     = new float[p.np];      // Inter-particle shear contact friction coefficients
       +  p.mu_s     = new float[p.np];      // Inter-particle static shear contact friction coefficients
       +  p.mu_d     = new float[p.np];             // Inter-particle dynamic shear contact friction coefficients
          p.mu_r     = new float[p.np];             // Inter-particle rolling contact friction coefficients
       -  p.C        = new float[p.np];      // Inter-particle tensile strengths (cohesion)
       -  p.E        = new float[p.np];      // Youngs modulus for each particle
       -  p.K        = new float[p.np];      // Bulk modulus for each particle
       -  p.nu       = new float[p.np];      // Poisson's ratio for each particle
          p.es_dot   = new float[p.np];             // Rate of shear energy dissipation
          p.es       = new float[p.np];             // Total shear energy dissipation
          p.p             = new float[p.np];             // Pressure excerted onto particle
       t@@ -247,21 +245,17 @@ int main(int argc, char *argv[])
              return 1;
            if (fread(&p.k_r[j], sizeof(p.k_r[j]), 1, fp) != 1)
              return 1;
       +    if (fread(&p.gamma_n[j], sizeof(p.gamma_n[j]), 1, fp) != 1)
       +      return 1;
            if (fread(&p.gamma_s[j], sizeof(p.gamma_s[j]), 1, fp) != 1)
              return 1;
            if (fread(&p.gamma_r[j], sizeof(p.gamma_r[j]), 1, fp) != 1)
              return 1;
            if (fread(&p.mu_s[j], sizeof(p.mu_s[j]), 1, fp) != 1)
              return 1;
       -    if (fread(&p.mu_r[j], sizeof(p.mu_r[j]), 1, fp) != 1)
       -      return 1;
       -    if (fread(&p.C[j], sizeof(p.C[j]), 1, fp) != 1)
       +    if (fread(&p.mu_d[j], sizeof(p.mu_d[j]), 1, fp) != 1)
              return 1;
       -    if (fread(&p.E[j], sizeof(p.E[j]), 1, fp) != 1)
       -      return 1;
       -    if (fread(&p.K[j], sizeof(p.K[j]), 1, fp) != 1)
       -      return 1;
       -    if (fread(&p.nu[j], sizeof(p.nu[j]), 1, fp) != 1)
       +    if (fread(&p.mu_r[j], sizeof(p.mu_r[j]), 1, fp) != 1)
              return 1;
            if (fread(&p.es_dot[j], sizeof(p.es_dot[j]), 1, fp) != 1)
              return 1;
       t@@ -431,15 +425,13 @@ int main(int argc, char *argv[])
          delete[] p.k_n;
          delete[] p.k_s;
          delete[] p.k_r;
       +  delete[] p.gamma_n;
          delete[] p.gamma_s;
          delete[] p.gamma_r;
          delete[] p.mu_s;
       +  delete[] p.mu_d;
          delete[] p.mu_r;
       -  delete[] p.C;
          delete[] p.rho;
       -  delete[] p.E;
       -  delete[] p.K;
       -  delete[] p.nu;
          delete[] p.es_dot;
          delete[] p.es;
          delete[] p.p;