tBig restructuring, moving towards OO form - sphere - GPU-based 3D discrete element method algorithm with optional fluid coupling
 (HTM) git clone git://src.adamsgaard.dk/sphere
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) LICENSE
       ---
 (DIR) commit 3a0858de8254872921ec35fc73d9b7a2e6e42e14
 (DIR) parent 42825a06bb016be6ea4ee40eede9c20ebb1649bc
 (HTM) Author: Anders Damsgaard <adc@geo.au.dk>
       Date:   Fri, 26 Oct 2012 16:01:28 +0200
       
       Big restructuring, moving towards OO form
       
       Diffstat:
         M src/constants.cuh                   |      10 +++++++++-
         A src/constants.h                     |      26 ++++++++++++++++++++++++++
         A src/cuPrintf.cuh                    |     130 +++++++++++++++++++++++++++++++
         M src/datatypes.h                     |     213 +++++++++----------------------
         A src/debug.h                         |      14 ++++++++++++++
         M src/file_io.cpp                     |     660 +++++++++++++++----------------
         M src/main.cpp                        |     505 ++-----------------------------
         A src/sphere.cpp                      |     180 +++++++++++++++++++++++++++++++
         A src/sphere.h                        |      97 ++++++++++++++++++++++++++++++
         A src/typedefs.h                      |      34 +++++++++++++++++++++++++++++++
         A src/utility.cuh                     |       9 +++++++++
       
       11 files changed, 915 insertions(+), 963 deletions(-)
       ---
 (DIR) diff --git a/src/constants.cuh b/src/constants.cuh
       t@@ -1,10 +1,18 @@
        #ifndef CONSTANTS_CUH_
        #define CONSTANTS_CUH_
        
       +#include "datatypes.h"
       +
        // Most constant memory variables are stored in
        // structures, see datatypes.cuh
        
        // Constant memory size: 64 kb
       -__constant__ int          devC_nc;           // Max. number of contacts a particle can have
       +__constant__ unsigned int devC_nd;   // Number of dimensions
       +__constant__ unsigned int devC_np;   // Number of particles
       +__constant__ int          devC_nc;   // Max. number of contacts a particle can have
        
       +// Device constant memory structures
       +__constant__ Params devC_params;
       +__constant__ Grid   devC_grid;
       + 
        #endif
 (DIR) diff --git a/src/constants.h b/src/constants.h
       t@@ -0,0 +1,26 @@
       +#ifndef CONSTANTS_H_
       +#define CONSTANTS_H_
       +
       +#include "typedefs.h"
       +
       +////////////////////////
       +// SYMBOLIC CONSTANTS //
       +////////////////////////
       +
       +// Define the max. number of walls
       +#define MAXWALLS 6
       +
       +
       +const Float PI = 3.14159265358979;
       +
       +// Number of dimensions (1 and 2 NOT functional)
       +const unsigned int ND = 3;
       +
       +// Define source code version
       +const Float VERS = 0.35;
       +
       +// Max. number of contacts per particle
       +//const int NC = 16;
       +const int NC = 32;
       +
       +#endif
 (DIR) diff --git a/src/cuPrintf.cuh b/src/cuPrintf.cuh
       t@@ -0,0 +1,130 @@
       +/*
       + * Copyright 1993-2010 NVIDIA Corporation.  All rights reserved.
       + *
       + * Please refer to the NVIDIA end user license agreement (EULA) associated
       + * with this source code for terms and conditions that govern your use of
       + * this software. Any use, reproduction, disclosure, or distribution of
       + * this software and related documentation outside the terms of the EULA
       + * is strictly prohibited.
       + *
       + */
       +
       +#ifndef CUPRINTF_H
       +#define CUPRINTF_H
       +
       +/*
       + *      This is the header file supporting cuPrintf.cu and defining both
       + *      the host and device-side interfaces. See that file for some more
       + *      explanation and sample use code. See also below for details of the
       + *      host-side interfaces.
       + *
       + *  Quick sample code:
       + *
       +        #include "cuPrintf.cu"
       +        
       +        __global__ void testKernel(int val)
       +        {
       +                cuPrintf("Value is: %d\n", val);
       +        }
       +
       +        int main()
       +        {
       +                cudaPrintfInit();
       +                testKernel<<< 2, 3 >>>(10);
       +                cudaPrintfDisplay(stdout, true);
       +                cudaPrintfEnd();
       +        return 0;
       +        }
       + */
       +
       +///////////////////////////////////////////////////////////////////////////////
       +// DEVICE SIDE
       +// External function definitions for device-side code
       +
       +// Abuse of templates to simulate varargs
       +__device__ int cuPrintf(const char *fmt);
       +template <typename T1> __device__ int cuPrintf(const char *fmt, T1 arg1);
       +template <typename T1, typename T2> __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2);
       +template <typename T1, typename T2, typename T3> __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3);
       +template <typename T1, typename T2, typename T3, typename T4> __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4);
       +template <typename T1, typename T2, typename T3, typename T4, typename T5> __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5);
       +template <typename T1, typename T2, typename T3, typename T4, typename T5, typename T6> __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6);
       +template <typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7> __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6, T7 arg7);
       +template <typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, typename T8> __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6, T7 arg7, T8 arg8);
       +template <typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, typename T8, typename T9> __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6, T7 arg7, T8 arg8, T9 arg9);
       +template <typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, typename T8, typename T9, typename T10> __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6, T7 arg7, T8 arg8, T9 arg9, T10 arg10);
       +
       +
       +//
       +//      cuPrintfRestrict
       +//
       +//      Called to restrict output to a given thread/block. Pass
       +//      the constant CUPRINTF_UNRESTRICTED to unrestrict output
       +//      for thread/block IDs. Note you can therefore allow
       +//      "all printfs from block 3" or "printfs from thread 2
       +//      on all blocks", or "printfs only from block 1, thread 5".
       +//
       +//      Arguments:
       +//              threadid - Thread ID to allow printfs from
       +//              blockid - Block ID to allow printfs from
       +//
       +//      NOTE: Restrictions last between invocations of
       +//      kernels unless cudaPrintfInit() is called again.
       +//
       +#define CUPRINTF_UNRESTRICTED   -1
       +__device__ void cuPrintfRestrict(int threadid, int blockid);
       +
       +
       +
       +///////////////////////////////////////////////////////////////////////////////
       +// HOST SIDE
       +// External function definitions for host-side code
       +
       +//
       +//      cudaPrintfInit
       +//
       +//      Call this once to initialise the printf system. If the output
       +//      file or buffer size needs to be changed, call cudaPrintfEnd()
       +//      before re-calling cudaPrintfInit().
       +//
       +//      The default size for the buffer is 1 megabyte. For CUDA
       +//      architecture 1.1 and above, the buffer is filled linearly and
       +//      is completely used;     however for architecture 1.0, the buffer
       +//      is divided into as many segments are there are threads, even
       +//      if some threads do not call cuPrintf().
       +//
       +//      Arguments:
       +//              bufferLen - Length, in bytes, of total space to reserve
       +//                          (in device global memory) for output.
       +//
       +//      Returns:
       +//              cudaSuccess if all is well.
       +//
       +extern "C" cudaError_t cudaPrintfInit(size_t bufferLen=1048576);   // 1-meg - that's enough for 4096 printfs by all threads put together
       +
       +//
       +//      cudaPrintfEnd
       +//
       +//      Cleans up all memories allocated by cudaPrintfInit().
       +//      Call this at exit, or before calling cudaPrintfInit() again.
       +//
       +extern "C" void cudaPrintfEnd();
       +
       +//
       +//      cudaPrintfDisplay
       +//
       +//      Dumps the contents of the output buffer to the specified
       +//      file pointer. If the output pointer is not specified,
       +//      the default "stdout" is used.
       +//
       +//      Arguments:
       +//              outputFP     - A file pointer to an output stream.
       +//              showThreadID - If "true", output strings are prefixed
       +//                             by "[blockid, threadid] " at output.
       +//
       +//      Returns:
       +//              cudaSuccess if all is well.
       +//
       +extern "C" cudaError_t cudaPrintfDisplay(void *outputFP=NULL, bool showThreadID=false);
       +
       +#endif  // CUPRINTF_H
 (DIR) diff --git a/src/datatypes.h b/src/datatypes.h
       t@@ -1,5 +1,3 @@
       -// datatypes.h -- Structure templates and function prototypes
       -
        // Avoiding multiple inclusions of header file
        #ifndef DATATYPES_H_
        #define DATATYPES_H_
       t@@ -7,171 +5,80 @@
        #include <math.h>
        #include "vector_functions.h"
        //#include "vector_arithmetic.h"
       +#include "typedefs.h"
       +#include "constants.h"
       +
       +
       +////////////////////////////
       +// STRUCTURE DECLARATIONS //
       +////////////////////////////
       +
       +// Structure containing kinematic particle values
       +struct Kinematics {
       +  Float4 *x;                // Positions + radii (w)
       +  Float2 *xysum;        // Horizontal distance traveled
       +  Float4 *vel;                // Translational velocities + fixvels (w)
       +  Float4 *force;        // Sums of forces
       +  Float4 *angpos;        // Angular positions
       +  Float4 *angvel;        // Angular velocities
       +  Float4 *torque;        // Sums of torques
       +};
        
       -
       -// Enable profiling of kernel runtimes?
       -// 0: No (default)
       -// 1: Yes
       -#define PROFILING 1
       -
       -// Output information about contacts to stdout?
       -// 0: No (default)
       -// 1: Yes
       -#define CONTACTINFO 0
       -
       -
       -//////////////////////
       -// TYPE DEFINITIONS //
       -//////////////////////
       -
       -// REMEMBER: When changing the precision below,
       -// change values in typedefs.h accordingly.
       -
       -// Uncomment all five lines below for single precision
       -/*
       -typedef Float Float;
       -typedef Float3 Float3;
       -typedef Float4 Float4;
       -#define MAKE_FLOAT3(x, y, z) make_Float3(x, y, z)
       -#define MAKE_FLOAT4(x, y, z, w) make_Float4(x, y, z, w)
       -*/
       -
       -
       -// Uncomment all five lines below for double precision
       -///*
       -typedef double Float;
       -typedef double2 Float2;
       -typedef double3 Float3;
       -typedef double4 Float4;
       -#define MAKE_FLOAT3(x, y, z) make_double3(x, y, z)
       -#define MAKE_FLOAT4(x, y, z, w) make_double4(x, y, z, w)
       -//*/
       -
       -
       -////////////////////////
       -// SYMBOLIC CONSTANTS //
       -////////////////////////
       -
       -// Define the max. number of walls
       -#define MAXWALLS 6
       -
       -
       -const Float PI = 3.14159265358979;
       -
       -// Number of dimensions (1 and 2 NOT functional)
       -const unsigned int ND = 3;
       -
       -// Define source code version
       -const Float VERS = 0.25;
       -
       -// Max. number of contacts per particle
       -//const int NC = 16;
       -const int NC = 32;
       -
       -
       -///////////////////////////
       -// STRUCTURE DECLARATION //
       -///////////////////////////
       -
       -// Structure containing variable particle parameters
       -struct Particles {
       -  Float *radius;
       -  Float *k_n;
       -  Float *k_t;
       -  Float *k_r;
       -  Float *gamma_n;
       -  Float *gamma_t;
       -  Float *gamma_r;
       -  Float *mu_s;
       -  Float *mu_d;
       -  Float *mu_r;
       -  Float *rho;
       -  Float *es_dot;
       -  Float *ev_dot;
       -  Float *es;
       -  Float *ev;
       -  Float *p;
       -  Float *m;
       -  Float *I;
       -  unsigned int np;
       +// Structure containing individual physical particle parameters
       +struct Energies {
       +  Float *es_dot;        // Frictional dissipation rates
       +  Float *es;                // Frictional dissipations
       +  Float *ev_dot;        // Viscous dissipation rates
       +  Float *ev;                // Viscous dissipations
       +  Float *p;                // Pressures
       +  //uint4 *bonds;                // Cohesive bonds
        };
        
        // Structure containing grid parameters
        struct Grid {
       -  unsigned int nd;
       -  Float origo[ND];
       -  Float L[ND];
       -  unsigned int num[ND];
       +  Float origo[ND];        // World coordinate system origo
       +  Float L[ND];                // World dimensions
       +  unsigned int num[ND];        // Neighbor-search cells along each axis
       +  int periodic;                // Behavior of boundaries at 1st and 2nd world edge
        };
        
        // Structure containing time parameters
        struct Time {
       -  Float dt;
       -  double current;
       -  double total;
       -  Float file_dt;
       -  unsigned int step_count;
       +  Float dt;                // Computational time step length
       +  double current;        // Current time
       +  double total;                // Total time (at the end of experiment)
       +  Float file_dt;        // Time between output files
       +  unsigned int step_count; // Number of steps taken at current time
        };
        
        // Structure containing constant, global physical parameters
        struct Params {
       -  int global;
       -  Float g[ND];
       -  Float dt;
       -  unsigned int np;
       -  unsigned int nw;
       -  int wmode[MAXWALLS];
       -  Float k_n;
       -  Float k_t;
       -  Float k_r;
       -  Float gamma_n;
       -  Float gamma_t;
       -  Float gamma_r;
       -  Float gamma_wn;
       -  Float gamma_wt;
       -  Float gamma_wr;
       -  Float mu_s; 
       -  Float mu_d;
       -  Float mu_r;
       -  Float rho;
       -  Float kappa;
       -  Float db;
       -  Float V_b;
       -  int periodic;
       -  unsigned int shearmodel;
       +  Float g[ND];                // Gravitational acceleration
       +  Float k_n;                // Normal stiffness
       +  Float k_t;                // Tangential stiffness
       +  Float k_r;                // Rotational stiffness
       +  Float gamma_n;        // Normal viscosity
       +  Float gamma_t;        // Tangential viscosity
       +  Float gamma_r;        // Rotational viscosity
       +  Float mu_s;                 // Static friction coefficient
       +  Float mu_d;                // Dynamic friction coefficient
       +  Float mu_r;                // Rotational friction coefficient
       +  Float rho;                // Material density
       +  unsigned int contactmodel; // Inter-particle contact model
       +  Float kappa;                // Capillary bond prefactor
       +  Float db;                // Capillary bond debonding distance
       +  Float V_b;                // Volume of fluid in capillary bond
        };
        
       -
       -/////////////////////////
       -// PROTOTYPE FUNCTIONS //
       -/////////////////////////
       -int fwritebin(char *target, Particles *p, 
       -                  Float4 *host_x, Float4 *host_vel, 
       -              Float4 *host_angvel, Float4 *host_force, 
       -              Float4 *host_torque, Float4 *host_angpos, 
       -              uint4 *host_bonds,
       -              Grid *grid, Time *time, Params *params,
       -              Float4 *host_w_nx, Float4 *host_w_mvfd);
       -
       -// device.cu
       -//extern "C"
       -void initializeGPU(void);
       -
       -//extern "C"
       -void gpuMain(Float4* host_x,
       -                 Float4* host_vel,
       -             Float4* host_acc,
       -             Float4* host_angvel,
       -             Float4* host_angacc,
       -             Float4* host_force,
       -             Float4* host_torque,
       -             Float4* host_angpos,
       -             uint4*  host_bonds,
       -             Particles p, Grid grid,
       -             Time time, Params params,
       -             Float4* host_w_nx,
       -             Float4* host_w_mvfd,
       -             const char* cwd,
       -             const char* inputbin);
       +// Structure containing wall parameters
       +struct Walls {
       +  unsigned int nw;        // Number of walls (<= MAXWALLS)
       +  int wmode[MAXWALLS];        // Wall modes
       +  Float4* nx;                // Wall normal and position
       +  Float4* mvfd;                // Wall mass, velocity, force and dev. stress
       +  Float gamma_wn;        // Wall normal viscosity
       +  Float gamma_wt;        // Wall tangential viscosity
       +  Float gamma_wr;        // Wall rolling viscosity
       +};
        
        #endif
 (DIR) diff --git a/src/debug.h b/src/debug.h
       t@@ -0,0 +1,14 @@
       +#ifndef DEBUG_H_
       +#define DEBUG_H_
       +
       +// Enable profiling of kernel runtimes?
       +// 0: No (default)
       +// 1: Yes
       +#define PROFILING 1
       +
       +// Output information about contacts to stdout?
       +// 0: No (default)
       +// 1: Yes
       +#define CONTACTINFO 0
       +
       +#endif
 (DIR) diff --git a/src/file_io.cpp b/src/file_io.cpp
       t@@ -1,348 +1,344 @@
       -#include <stdio.h>  // Standard library functions for file input and output
       -#include <stdlib.h> // Functions involving memory allocation, process control, conversions and others
       -#include <unistd.h> // UNIX only: For getcwd
       -#include <string.h> // For strerror and strcmp
       -#include <cuda.h>
       -#include "datatypes.h"
       +#include <iostream>
       +#include <cstdio>
       +#include <cstdlib>
        
       +#include "typedefs.h"
       +#include "datatypes.h"
       +#include "constants.h"
       +#include "sphere.h"
        
       -// Write host variables to target binary file
       -// The output format should ALWAYS be double precision,
       -// so this function will typecast the data before write
       -// if it is single precision.
       -int fwritebin(char *target, 
       -    Particles *p, 
       -    Float4 *host_x, 
       -    Float4 *host_vel, 
       -    Float4 *host_angvel, 
       -    Float4 *host_force, 
       -    Float4 *host_torque, 
       -    Float4 *host_angpos, 
       -    uint4 *host_bonds,
       -    Grid *grid, 
       -    Time *time, 
       -    Params *params,
       -    Float4 *host_w_nx, 
       -    Float4 *host_w_mvfd)
       +// Read DEM data from binary file
       +void DEM::readbin(const char *target)
        {
        
       -  FILE *fp;
       -  unsigned int u;
       -  unsigned int j;
       +  if (verbose == 1)
       +    std::cout << "reading binary: " << target << '\n';
       +
       +  int err = 0;
        
       -  if ((fp = fopen(target,"wb")) == NULL) {
       -    printf("Could create output binary file. Bye.\n");
       -    return 1; // Return unsuccessful exit status
       +  // Open input file
       +  FILE *fp;
       +  if ((fp = fopen(target, "rb")) == NULL) {
       +    std::cerr << "Could not read input binary file '"
       +      << target << "'\n";
       +    exit(++err);
          }
        
       -  // If double precision: Values can be written directly
       -  if (sizeof(Float) == sizeof(double)) {
       +  // Read data
       +  if(fread(&nd, sizeof(nd), 1, fp) != 1)
       +    exit(++err); // Return unsuccessful exit status
        
       -    // World dimensions
       -    fwrite(&grid->nd, sizeof(grid->nd), 1, fp);
       -
       -    // Number of particles
       -    fwrite(&p->np, sizeof(p->np), 1, fp);
       -
       -    // Temporal parameters
       -    fwrite(&time->dt, sizeof(time->dt), 1, fp);
       -    fwrite(&time->current, sizeof(time->current), 1, fp);
       -    fwrite(&time->total, sizeof(time->total), 1, fp);
       -    fwrite(&time->file_dt, sizeof(time->file_dt), 1, fp);
       -    fwrite(&time->step_count, sizeof(time->step_count), 1, fp);
       -
       -    // World coordinate system origo
       -    for (u=0; u<grid->nd; ++u) {
       -      fwrite(&grid->origo[u], sizeof(grid->origo[u]), 1, fp);
       -    }
       -
       -    // World dimensions
       -    for (u=0; u<grid->nd; ++u) {
       -      fwrite(&grid->L[u], sizeof(grid->L[u]), 1, fp);
       -    }
       -
       -    // Grid cells along each dimension
       -    for (u=0; u<grid->nd; ++u) {
       -      fwrite(&grid->num[u], sizeof(grid->num[u]), 1, fp);
       -    }
       -
       -    // Particle vectors
       -    for (j=0; j<p->np; ++j) {
       -      // x-axis
       -      fwrite(&host_x[j].x, sizeof(Float), 1, fp);
       -      fwrite(&host_vel[j].x, sizeof(Float), 1, fp);
       -      fwrite(&host_angvel[j].x, sizeof(Float), 1, fp);
       -      fwrite(&host_force[j].x, sizeof(Float), 1, fp);
       -      fwrite(&host_torque[j].x, sizeof(Float), 1, fp);
       -      fwrite(&host_angpos[j].x, sizeof(Float), 1, fp);
       -
       -      // y-axis
       -      fwrite(&host_x[j].y, sizeof(Float), 1, fp);
       -      fwrite(&host_vel[j].y, sizeof(Float), 1, fp);
       -      fwrite(&host_angvel[j].y, sizeof(Float), 1, fp);
       -      fwrite(&host_force[j].y, sizeof(Float), 1, fp);
       -      fwrite(&host_torque[j].y, sizeof(Float), 1, fp);
       -      fwrite(&host_angpos[j].y, sizeof(Float), 1, fp);
       -
       -      // z-axis
       -      fwrite(&host_x[j].z, sizeof(Float), 1, fp);
       -      fwrite(&host_vel[j].z, sizeof(Float), 1, fp);
       -      fwrite(&host_angvel[j].z, sizeof(Float), 1, fp);
       -      fwrite(&host_force[j].z, sizeof(Float), 1, fp);
       -      fwrite(&host_torque[j].z, sizeof(Float), 1, fp);
       -      fwrite(&host_angpos[j].z, sizeof(Float), 1, fp);
       -    } 
       -
       -    // Individual particle values
       -    for (j=0; j<p->np; ++j) {
       -      fwrite(&host_vel[j].w, sizeof(Float), 1, fp);
       -      fwrite(&host_x[j].w, sizeof(Float), 1, fp);
       -      fwrite(&p->radius[j], sizeof(p->radius[j]), 1, fp);
       -      fwrite(&p->rho[j], sizeof(p->rho[j]), 1, fp);
       -      fwrite(&p->k_n[j], sizeof(p->k_n[j]), 1, fp);
       -      fwrite(&p->k_t[j], sizeof(p->k_t[j]), 1, fp);
       -      fwrite(&p->k_r[j], sizeof(p->k_r[j]), 1, fp);
       -      fwrite(&p->gamma_n[j], sizeof(p->gamma_n[j]), 1, fp);
       -      fwrite(&p->gamma_t[j], sizeof(p->gamma_t[j]), 1, fp);
       -      fwrite(&p->gamma_r[j], sizeof(p->gamma_r[j]), 1, fp);
       -      fwrite(&p->mu_s[j], sizeof(p->mu_s[j]), 1, fp);
       -      fwrite(&p->mu_d[j], sizeof(p->mu_d[j]), 1, fp);
       -      fwrite(&p->mu_r[j], sizeof(p->mu_r[j]), 1, fp);
       -      fwrite(&p->es_dot[j], sizeof(p->es_dot[j]), 1, fp);
       -      fwrite(&p->ev_dot[j], sizeof(p->ev_dot[j]), 1, fp);
       -      fwrite(&p->es[j], sizeof(p->es[j]), 1, fp);
       -      fwrite(&p->ev[j], sizeof(p->ev[j]), 1, fp);
       -      fwrite(&p->p[j], sizeof(p->p[j]), 1, fp);
       -    }
       -
       -    // Singular parameters
       -    fwrite(&params->global, sizeof(params->global), 1, fp);
       -    for (u=0; u<grid->nd; ++u) {
       -      fwrite(&params->g[u], sizeof(params->g[u]), 1, fp);
       -    }
       -    fwrite(&params->kappa, sizeof(params->kappa), 1, fp);
       -    fwrite(&params->db, sizeof(params->db), 1, fp);
       -    fwrite(&params->V_b, sizeof(params->V_b), 1, fp);
       -    fwrite(&params->shearmodel, sizeof(params->shearmodel), 1, fp);
       -
       -    // Walls
       -    fwrite(&params->nw, sizeof(params->nw), 1, fp); // No. of walls
       -    for (j=0; j<params->nw; ++j) {
       -      fwrite(&params->wmode[j], sizeof(params->wmode[j]), 1, fp);
       -      // Wall normal
       -      fwrite(&host_w_nx[j].x, sizeof(Float), 1, fp);
       -      fwrite(&host_w_nx[j].y, sizeof(Float), 1, fp);
       -      fwrite(&host_w_nx[j].z, sizeof(Float), 1, fp);
       -
       -      fwrite(&host_w_nx[j].w, sizeof(Float), 1, fp);   // Wall position
       -      fwrite(&host_w_mvfd[j].x, sizeof(Float), 1, fp); // Wall mass
       -      fwrite(&host_w_mvfd[j].y, sizeof(Float), 1, fp); // Wall velocity
       -      fwrite(&host_w_mvfd[j].z, sizeof(Float), 1, fp); // Wall force
       -      fwrite(&host_w_mvfd[j].w, sizeof(Float), 1, fp); // Wall deviatoric stress
       -    }
       -    fwrite(&params->periodic, sizeof(params->periodic), 1, fp);
       -    fwrite(&params->gamma_wn, sizeof(params->gamma_wn), 1, fp);
       -    fwrite(&params->gamma_wt, sizeof(params->gamma_wt), 1, fp);
       -    fwrite(&params->gamma_wr, sizeof(params->gamma_wr), 1, fp);
       -
       -
       -    // Write bond pair values
       -    for (j=0; j<p->np; ++j) {
       -      fwrite(&host_bonds[j].x, sizeof(unsigned int), 1, fp);
       -      fwrite(&host_bonds[j].y, sizeof(unsigned int), 1, fp);
       -      fwrite(&host_bonds[j].z, sizeof(unsigned int), 1, fp);
       -      fwrite(&host_bonds[j].w, sizeof(unsigned int), 1, fp);
       -    }
       -
       -  } else if (sizeof(Float) == sizeof(float)) {
       -    // Single precision: Type conversion required
       -
       -    double d; // Double precision placeholder
       -
       -    // World dimensions
       -    fwrite(&grid->nd, sizeof(grid->nd), 1, fp);
       -
       -    // Number of particles
       -    fwrite(&p->np, sizeof(p->np), 1, fp);
       -
       -    // Temporal parameters
       -    d = (double)time->dt;
       -    fwrite(&d, sizeof(d), 1, fp);
       -    fwrite(&time->current, sizeof(time->current), 1, fp);
       -    fwrite(&time->total, sizeof(time->total), 1, fp);
       -    d = (double)time->file_dt;
       -    fwrite(&d, sizeof(d), 1, fp);
       -    fwrite(&time->step_count, sizeof(time->step_count), 1, fp);
       -
       -    // World coordinate system origo
       -    for (u=0; u<grid->nd; ++u) {
       -      d = (double)grid->origo[u];
       -      fwrite(&d, sizeof(d), 1, fp);
       -    }
       -
       -    // World dimensions
       -    for (u=0; u<grid->nd; ++u) {
       -      d = (double)grid->L[u];
       -      fwrite(&d, sizeof(d), 1, fp);
       -    }
       -
       -    // Grid cells along each dimension
       -    for (u=0; u<grid->nd; ++u) {
       -      fwrite(&grid->num[u], sizeof(grid->num[u]), 1, fp);
       -    }
       -
       -    // Particle vectors
       -    for (j=0; j<p->np; ++j) {
       -      // x-axis
       -      d = (double)host_x[j].x;
       -      fwrite(&d, sizeof(d), 1, fp);
       -      d = (double)host_vel[j].x;
       -      fwrite(&d, sizeof(d), 1, fp);
       -      d = (double)host_angvel[j].x;
       -      fwrite(&d, sizeof(d), 1, fp);
       -      d = (double)host_force[j].x;
       -      fwrite(&d, sizeof(d), 1, fp);
       -      d = (double)host_torque[j].x;
       -      fwrite(&d, sizeof(d), 1, fp);
       -      d = (double)host_angpos[j].x;
       -      fwrite(&d, sizeof(d), 1, fp);
       -
       -      // y-axis
       -      d = (double)host_x[j].y;
       -      fwrite(&d, sizeof(d), 1, fp);
       -      d = (double)host_vel[j].y;
       -      fwrite(&d, sizeof(d), 1, fp);
       -      d = (double)host_angvel[j].y;
       -      fwrite(&d, sizeof(d), 1, fp);
       -      d = (double)host_force[j].y;
       -      fwrite(&d, sizeof(d), 1, fp);
       -      d = (double)host_torque[j].y;
       -      fwrite(&d, sizeof(d), 1, fp);
       -      d = (double)host_angpos[j].y;
       -      fwrite(&d, sizeof(d), 1, fp);
       -
       -      // z-axis
       -      d = (double)host_x[j].z;
       -      fwrite(&d, sizeof(d), 1, fp);
       -      d = (double)host_vel[j].z;
       -      fwrite(&d, sizeof(d), 1, fp);
       -      d = (double)host_angvel[j].z;
       -      fwrite(&d, sizeof(d), 1, fp);
       -      d = (double)host_force[j].z;
       -      fwrite(&d, sizeof(d), 1, fp);
       -      d = (double)host_torque[j].z;
       -      fwrite(&d, sizeof(d), 1, fp);
       -      d = (double)host_angpos[j].z;
       -      fwrite(&d, sizeof(d), 1, fp);
       -    } 
       -
       -    // Individual particle values
       -    for (j=0; j<p->np; ++j) {
       -      d = (double)host_vel[j].w;
       -      fwrite(&d, sizeof(d), 1, fp);
       -      d = (double)host_x[j].w;
       -      fwrite(&d, sizeof(d), 1, fp);
       -      d = (double)p->radius[j];
       -      fwrite(&d, sizeof(d), 1, fp);
       -      d = (double)p->rho[j];
       -      fwrite(&d, sizeof(d), 1, fp);
       -      d = (double)p->k_n[j];
       -      fwrite(&d, sizeof(d), 1, fp);
       -      d = (double)p->k_t[j];
       -      fwrite(&d, sizeof(d), 1, fp);
       -      d = (double)p->k_r[j];
       -      fwrite(&d, sizeof(d), 1, fp);
       -      d = (double)p->gamma_n[j];
       -      fwrite(&d, sizeof(d), 1, fp);
       -      d = (double)p->gamma_t[j];
       -      fwrite(&d, sizeof(d), 1, fp);
       -      d = (double)p->gamma_r[j];
       -      fwrite(&d, sizeof(d), 1, fp);
       -      d = (double)p->mu_s[j];
       -      fwrite(&d, sizeof(d), 1, fp);
       -      d = (double)p->mu_d[j];
       -      fwrite(&d, sizeof(d), 1, fp);
       -      d = (double)p->mu_r[j];
       -      fwrite(&d, sizeof(d), 1, fp);
       -      d = (double)p->es_dot[j];
       -      fwrite(&d, sizeof(d), 1, fp);
       -      d = (double)p->ev_dot[j];
       -      fwrite(&d, sizeof(d), 1, fp);
       -      d = (double)p->es[j];
       -      fwrite(&d, sizeof(d), 1, fp);
       -      d = (double)p->ev[j];
       -      fwrite(&d, sizeof(d), 1, fp);
       -      d = (double)p->p[j];
       -      fwrite(&d, sizeof(d), 1, fp);
       -    }
       -
       -    // Singular parameters
       -    fwrite(&params->global, sizeof(params->global), 1, fp);
       -    for (u=0; u<grid->nd; ++u) {
       -      d = (double)params->g[u];
       -      fwrite(&d, sizeof(d), 1, fp);
       -    }
       -    d = (double)params->kappa;
       -    fwrite(&d, sizeof(d), 1, fp);
       -    d = (double)params->db;
       -    fwrite(&d, sizeof(d), 1, fp);
       -    d = (double)params->V_b;
       -    fwrite(&d, sizeof(d), 1, fp);
       -    fwrite(&params->shearmodel, sizeof(params->shearmodel), 1, fp);
       -
       -    // Walls
       -    fwrite(&params->nw, sizeof(params->nw), 1, fp); // No. of walls
       -    for (j=0; j<params->nw; ++j) {
       -
       -      // Wall mode
       -      d = (double)params->wmode[j];
       -      fwrite(&d, sizeof(d), 1, fp);
       -
       -      // Wall normal
       -      d = (double)host_w_nx[j].x;
       -      fwrite(&d, sizeof(d), 1, fp);
       -      d = (double)host_w_nx[j].y;
       -      fwrite(&d, sizeof(d), 1, fp);
       -      d = (double)host_w_nx[j].z;
       -      fwrite(&d, sizeof(d), 1, fp);
       -
       -      d = (double)host_w_nx[j].w;         // Wall position
       -      fwrite(&d, sizeof(d), 1, fp);
       -      d = (double)host_w_mvfd[j].x;        // Wall mass
       -      fwrite(&d, sizeof(d), 1, fp);
       -      d = (double)host_w_mvfd[j].y;        // Wall velocity
       -      fwrite(&d, sizeof(d), 1, fp);
       -      d = (double)host_w_mvfd[j].z;        // Wall force
       -      fwrite(&d, sizeof(d), 1, fp);
       -      d = (double)host_w_mvfd[j].w;        // Wall deviatoric stress
       -      fwrite(&d, sizeof(d), 1, fp);
       -    }
       -    fwrite(&params->periodic, sizeof(params->periodic), 1, fp);
       -    d = (double)params->gamma_wn;
       -    fwrite(&d, sizeof(d), 1, fp);
       -    d = (double)params->gamma_wt;
       -    fwrite(&d, sizeof(d), 1, fp);
       -    d = (double)params->gamma_wr;
       -    fwrite(&d, sizeof(d), 1, fp);
       -
       -    // Write bond pair values
       -    for (j=0; j<p->np; ++j) {
       -      fwrite(&host_bonds[j].x, sizeof(unsigned int), 1, fp);
       -      fwrite(&host_bonds[j].y, sizeof(unsigned int), 1, fp);
       -      fwrite(&host_bonds[j].z, sizeof(unsigned int), 1, fp);
       -      fwrite(&host_bonds[j].w, sizeof(unsigned int), 1, fp);
       -    }
       +  if (fread(&np, sizeof(np), 1, fp) != 1)
       +    exit(++err); // Return unsuccessful exit status
       +  if (verbose == 1) {
       +    std::cout << "  - Number of dimensions: nd = " << nd << "\n"
       +      << "  - Number of particles:  np = " << np << "\n";
       +  }
       +
       +  if (nd != ND) {
       +    std::cerr << "Dimensionality mismatch between dataset and this SPHERE program.\n"
       +      << "The dataset is " << nd 
       +      << "D, this SPHERE binary is " << ND << "D.\n"
       +      << "This execution is terminating.\n";
       +    exit(++err); // Return unsuccessful exit status
       +  }
        
       +  // Check precision choice
       +  if (verbose == 1)
       +    std::cout << "  - Compiled for ";
       +  if (sizeof(Float) == sizeof(float)) {
       +    if (verbose == 1)
       +      std::cout << "single";
       +  } else if (sizeof(Float) == sizeof(double)) {
       +    if (verbose == 1)
       +      std::cout << "double";
          } else {
       -    fprintf(stderr, "Error: Chosen floating-point precision is incompatible with the data file format.\n");
       -    exit(1);
       +    std::cerr << "Error! Chosen precision not available. Check datatypes.h\n";
       +    exit(++err);
       +  }
       +  if (verbose == 1)
       +    std::cout << " precision\n";
       +
       +  // Read time parameters
       +  if (fread(&time.dt, sizeof(time.dt), 1, fp) != 1)
       +    exit(++err); // Return unsuccessful exit status
       +  if (fread(&time.current, sizeof(time.current), 1, fp) != 1)
       +    exit(++err); 
       +  if (fread(&time.total, sizeof(time.total), 1, fp) != 1)
       +    exit(++err); 
       +  if (fread(&time.file_dt, sizeof(time.file_dt), 1, fp) != 1)
       +    exit(++err); 
       +  if (fread(&time.step_count, sizeof(time.step_count), 1, fp) != 1)
       +    exit(++err); 
       +
       +  // Output display parameters to screen
       +  if (verbose == 1) {
       +    std::cout << "  - Timestep length:      time.dt         = " 
       +      << time.dt << " s\n"
       +      << "  - Start at time:        time.current    = " 
       +      << time.current << " s\n"
       +      << "  - Total sim. time:      time.total      = " 
       +      << time.total << " s\n"
       +      << "  - File output interval: time.file_dt    = " 
       +      << time.file_dt << " s\n"
       +      << "  - Start at step count:  time.step_count = " 
       +      << time.step_count << "\n";
       +  }
       +
       +  // For spatial vectors an array of Float4 vectors is chosen for best fit with 
       +  // GPU memory handling. Vector variable structure: ( x, y, z, <empty>).
       +  // Indexing starts from 0.
       +
       +  // Allocate host arrays
       +  if (verbose == 1)
       +    std::cout << "\n  Allocating host memory:                         ";
       +  // Allocate more host arrays
       +  k.x           = new Float4[np];
       +  k.xysum  = new Float2[np];
       +  k.vel           = new Float4[np];
       +  k.force  = new Float4[np];
       +  k.angpos = new Float4[np];
       +  k.angvel = new Float4[np];
       +  k.torque = new Float4[np];
       +
       +  e.es_dot = new Float[np];
       +  e.es     = new Float[np];
       +  e.ev_dot = new Float[np];
       +  e.ev     = new Float[np];
       +  e.p           = new Float[np];
       +
       +  if (verbose == 1)
       +    std::cout << "Done\n";
       +
       +  if (verbose == 1)
       +    std::cout << "  Reading remaining data from input binary:       ";
       +
       +  // Read grid parameters
       +  if (fread(&grid.origo, sizeof(grid.origo[0]), nd, fp) != nd)
       +    exit(++err); // Return unsuccessful exit status
       +  if (fread(&grid.L, sizeof(grid.L[0]), nd, fp) != nd)
       +    exit(++err);
       +  if (fread(&grid.num, sizeof(grid.num[0]), nd, fp) != nd)
       +    exit(++err);
       +  if (fread(&grid.periodic, sizeof(grid.periodic), 1, fp) != 1)
       +    exit(++err);
       +
       +  // Read kinematic values
       +  if (fread(&k.x, sizeof(Float4), np, fp) != np)
       +    exit(++err);
       +  if (fread(&k.xysum, sizeof(Float2), np, fp) != np)
       +    exit(++err);
       +  if (fread(&k.vel, sizeof(Float4), np, fp) != np)
       +    exit(++err);
       +  if (fread(&k.force, sizeof(Float4), np, fp) != np)
       +    exit(++err);
       +  if (fread(&k.angpos, sizeof(Float4), np, fp) != np)
       +    exit(++err);
       +  if (fread(&k.angvel, sizeof(Float4), np, fp) != np)
       +    exit(++err);
       +  if (fread(&k.torque, sizeof(Float4), np, fp) != np)
       +    exit(++err);
       +  // mass (m) and inertia (I) are calculated on device
       +
       +  // Read energies
       +  if (fread(&e.es_dot, sizeof(e.es_dot[0]), np, fp) != np)
       +    exit(++err);
       +  if (fread(&e.es, sizeof(e.es[0]), np, fp) != np)
       +    exit(++err);
       +  if (fread(&e.ev_dot, sizeof(e.ev_dot[0]), np, fp) != np)
       +    exit(++err);
       +  if (fread(&e.ev, sizeof(e.ev[0]), np, fp) != np)
       +    exit(++err);
       +  if (fread(&e.p, sizeof(e.p[0]), np, fp) != np)
       +    exit(++err);
       +
       +  // Read constant, global physical parameters
       +  if (fread(&params.g, sizeof(params.g[0]), nd, fp) != nd)
       +    exit(++err);
       +  if (fread(&params.k_n, sizeof(params.k_n), 1, fp) != 1)
       +    exit(++err);
       +  if (fread(&params.k_t, sizeof(params.k_t), 1, fp) != 1)
       +    exit(++err);
       +  if (fread(&params.k_r, sizeof(params.k_r), 1, fp) != 1)
       +    exit(++err);
       +  if (fread(&params.gamma_n, sizeof(params.gamma_n), 1, fp) != 1)
       +    exit(++err);
       +  if (fread(&params.gamma_t, sizeof(params.gamma_t), 1, fp) != 1)
       +    exit(++err);
       +  if (fread(&params.gamma_r, sizeof(params.gamma_r), 1, fp) != 1)
       +    exit(++err);
       +  if (fread(&params.mu_s, sizeof(params.mu_s), 1, fp) != 1)
       +    exit(++err);
       +  if (fread(&params.mu_d, sizeof(params.mu_d), 1, fp) != 1)
       +    exit(++err);
       +  if (fread(&params.mu_r, sizeof(params.mu_r), 1, fp) != 1)
       +    exit(++err);
       +  if (fread(&params.rho, sizeof(params.rho), 1, fp) != 1)
       +    exit(++err);
       +  if (fread(&params.contactmodel, sizeof(params.contactmodel), 1, fp) != 1)
       +    exit(++err);
       +  if (fread(&params.kappa, sizeof(params.kappa), 1, fp) != 1)
       +    exit(++err);
       +  if (fread(&params.db, sizeof(params.db), 1, fp) != 1)
       +    exit(++err); 
       +  if (fread(&params.V_b, sizeof(params.V_b), 1, fp) != 1)
       +    exit(++err); 
       +
       +  // Read wall parameters
       +  if (fread(&walls.nw, sizeof(walls.nw), 1, fp) != 1)
       +    exit(++err); 
       +  // Allocate host memory for walls
       +  // Wall normal (x,y,z), w: wall position on axis parallel to wall normal
       +  // Wall mass (x), velocity (y), force (z), and deviatoric stress (w)
       +  walls.nx   = new Float4[walls.nw];
       +  walls.mvfd = new Float4[walls.nw]; 
       +
       +  if (fread(&walls.wmode, sizeof(walls.wmode[0]), walls.nw, fp) != walls.nw)
       +    exit(++err);
       +  if (fread(&walls.nx, sizeof(Float4), walls.nw, fp) != 1)
       +    exit(++err);
       +  if (fread(&walls.mvfd, sizeof(Float4), walls.nw, fp) != 1)
       +    exit(++err);
       +  if (fread(&walls.gamma_wn, sizeof(walls.gamma_wn), 1, fp) != 1)
       +    exit(++err);
       +  if (fread(&walls.gamma_wt, sizeof(walls.gamma_wt), 1, fp) != 1)
       +    exit(++err);
       +  if (fread(&walls.gamma_wr, sizeof(walls.gamma_wr), 1, fp) != 1)
       +    exit(++err);
       +
       +
       +  if (walls.nw > MAXWALLS) {
       +    std::cerr << "Error; MAXWALLS (" << MAXWALLS << ") in datatypes.h "
       +      << "is smaller than the number of walls specified in the "
       +      << "input file (" << walls.nw << ").\n";
          }
        
          fclose(fp);
        
       -  // This function returns 0 if it ended without problems,
       -  // and 1 if there were problems opening the target file.
       -  return 0;
       -} // End of fwritebin(...)
       +  if (verbose == 1)
       +    std::cout << "Done\n";
        
       +}
       +
       +// Write DEM data to binary file
       +void DEM::writebin(const char *target)
       +{
       +  int err = 0;
       +
       +  // Open output file
       +  FILE *fp;
       +  if ((fp = fopen(target, "wb")) == NULL) {
       +    std::cerr << "could create output binary file '"
       +      << target << "'.\n";
       +    exit(++err); // Return unsuccessful exit status
       +  }
       +
       +  // If double precision: Values can be written directly
       +  if (sizeof(Float) == sizeof(double)) {
       +
       +    fwrite(&nd, sizeof(nd), 1, fp);
       +    fwrite(&np, sizeof(np), 1, fp);
       +
       +    // Write temporal parameters
       +    if (fwrite(&time.dt, sizeof(time.dt), 1, fp) != 1)
       +      exit(++err);
       +    if (fwrite(&time.current, sizeof(time.current), 1, fp) != 1)
       +      exit(++err);
       +    if (fwrite(&time.total, sizeof(time.total), 1, fp) != 1)
       +      exit(++err);
       +    if (fwrite(&time.file_dt, sizeof(time.file_dt), 1, fp) != 1)
       +      exit(++err);
       +    if (fwrite(&time.step_count, sizeof(time.step_count), 1, fp) != 1)
       +      exit(++err);
       +
       +    // Write grid parameters
       +    if (fwrite(&grid.origo, sizeof(grid.origo[0]), nd, fp) != nd)
       +      exit(++err);
       +    if (fwrite(&grid.L, sizeof(grid.L[0]), nd, fp) != nd)
       +      exit(++err);
       +    if (fwrite(&grid.num, sizeof(grid.num[0]), nd, fp) != nd)
       +      exit(++err);
       +    if (fwrite(&grid.periodic, sizeof(grid.periodic), 1, fp) != 1)
       +      exit(++err);
       +
       +    // Write kinematic values
       +    if (fwrite(&k.x, sizeof(Float4), np, fp) != np)
       +      exit(++err);
       +    if (fwrite(&k.xysum, sizeof(Float2), np, fp) != np)
       +      exit(++err);
       +    if (fwrite(&k.vel, sizeof(Float4), np, fp) != np)
       +      exit(++err);
       +    if (fwrite(&k.force, sizeof(Float4), np, fp) != np)
       +      exit(++err);
       +    if (fwrite(&k.angpos, sizeof(Float4), np, fp) != np)
       +      exit(++err);
       +    if (fwrite(&k.angvel, sizeof(Float4), np, fp) != np)
       +      exit(++err);
       +    if (fwrite(&k.torque, sizeof(Float4), np, fp) != np)
       +      exit(++err);
       +
       +    // Write energies
       +    if (fwrite(&e.es_dot, sizeof(e.es_dot[0]), np, fp) != np)
       +      exit(++err);
       +    if (fwrite(&e.es, sizeof(e.es[0]), np, fp) != np)
       +      exit(++err);
       +    if (fwrite(&e.ev_dot, sizeof(e.ev_dot[0]), np, fp) != np)
       +      exit(++err);
       +    if (fwrite(&e.ev, sizeof(e.ev[0]), np, fp) != np)
       +      exit(++err);
       +    if (fwrite(&e.p, sizeof(e.p[0]), np, fp) != np)
       +      exit(++err);
       +
       +    // Write constant, global physical parameters
       +    if (fwrite(&params.g, sizeof(params.g[0]), nd, fp) != nd)
       +      exit(++err);
       +    if (fwrite(&params.k_n, sizeof(params.k_n), 1, fp) != 1)
       +      exit(++err);
       +    if (fwrite(&params.k_t, sizeof(params.k_t), 1, fp) != 1)
       +      exit(++err);
       +    if (fwrite(&params.k_r, sizeof(params.k_r), 1, fp) != 1)
       +      exit(++err);
       +    if (fwrite(&params.gamma_n, sizeof(params.gamma_n), 1, fp) != 1)
       +      exit(++err);
       +    if (fwrite(&params.gamma_t, sizeof(params.gamma_t), 1, fp) != 1)
       +      exit(++err);
       +    if (fwrite(&params.gamma_r, sizeof(params.gamma_r), 1, fp) != 1)
       +      exit(++err);
       +    if (fwrite(&params.mu_s, sizeof(params.mu_s), 1, fp) != 1)
       +      exit(++err);
       +    if (fwrite(&params.mu_d, sizeof(params.mu_d), 1, fp) != 1)
       +      exit(++err);
       +    if (fwrite(&params.mu_r, sizeof(params.mu_r), 1, fp) != 1)
       +      exit(++err);
       +    if (fwrite(&params.rho, sizeof(params.rho), 1, fp) != 1)
       +      exit(++err);
       +    if (fwrite(&params.contactmodel, sizeof(params.contactmodel), 1, fp) != 1)
       +      exit(++err);
       +    if (fwrite(&params.kappa, sizeof(params.kappa), 1, fp) != 1)
       +      exit(++err);
       +    if (fwrite(&params.db, sizeof(params.db), 1, fp) != 1)
       +      exit(++err);
       +    if (fwrite(&params.V_b, sizeof(params.V_b), 1, fp) != 1)
       +      exit(++err);
       +
       +    // Write walls parameters
       +    if (fwrite(&walls.nw, sizeof(walls.nw), 1, fp) != 1)
       +      exit(++err);
       +    if (fwrite(&walls.wmode, sizeof(walls.wmode[0]), walls.nw, fp) != walls.nw)
       +      exit(++err);
       +    if (fwrite(&walls.nx, sizeof(Float4), walls.nw, fp) != walls.nw)
       +      exit(++err);
       +    if (fwrite(&walls.mvfd, sizeof(Float4), walls.nw, fp) != walls.nw)
       +      exit(++err);
       +    if (fwrite(&walls.gamma_wn, sizeof(walls.gamma_wn), 1, fp) != 1)
       +      exit(++err);
       +    if (fwrite(&walls.gamma_wt, sizeof(walls.gamma_wt), 1, fp) != 1)
       +      exit(++err);
       +    if (fwrite(&walls.gamma_wr, sizeof(walls.gamma_wr), 1, fp) != 1)
       +      exit(++err);
       +
       +  } else {
       +    std::cerr << "Can't write output when in single precision mode.\n";
       +  }
       +}
        
 (DIR) diff --git a/src/main.cpp b/src/main.cpp
       t@@ -2,7 +2,8 @@
        /*  SPHERE source code by Anders Damsgaard Christensen, 2010-12,       */
        /*  a 3D Discrete Element Method algorithm with CUDA GPU acceleration. */
        /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
       -// Licence: Gnu Public License (GPL) v. 3. See license.txt.
       +
       +// Licence: GNU Public License (GPL) v. 3. See license.txt.
        // See doc/sphere-doc.pdf for full documentation.
        // Compile with GNU make by typing 'make' in the src/ directory.               
        // SPHERE is called from the command line with './sphere_<architecture> projectname' 
       t@@ -10,16 +11,11 @@
        
        // Including library files
        #include <iostream>
       -#include <cstdio>  // Standard library functions for file input and output
       -#include <stdlib.h> // Functions involving memory allocation, process control, conversions and others
       -#include <unistd.h> // UNIX only: For getcwd
       -#include <string.h> // For strerror and strcmp
       -#include <errno.h>  // For errno
       -#include <math.h>   // Basic Floating point mathematical operations
       -#include <time.h>   // Time and date functions for timer
        
        // Including user files
       +#include "constants.h"
        #include "datatypes.h"
       +#include "sphere.h"
        
        
        //////////////////
       t@@ -29,488 +25,43 @@
        // successfully, and 1 if an error occured which caused the program to crash.
        int main(int argc, char *argv[]) 
        {
       -  // Namespace declarations
       -  using std::cout;
       -  using std::cerr;
        
          // LOCAL VARIABLE DECLARATIONS
          if(!argv[1] || argc != 2) {
       -    cout << "Error: Specify simulation name from the input/ directory (input binary file), e.g. ./sphere input\n";
       -    return 1; // Return unsuccessful exit status
       -  }
       -
       -  unsigned int i,j; // Counter variables
       -  char file[1000];  // Complete path+filename variable
       -  FILE *fp;             // Declare file pointer
       -
       -  // Host particle variable structure
       -  Particles p;            
       -
       -  // Host grid structure
       -  Grid grid;
       -
       -  // Host time structure
       -  Time time;
       -
       -  // Host physical global constants structure
       -  Params params;
       -
       -  // Read path to current working directory
       -  char *cwd;
       -  cwd = getcwd(0, 0);
       -  if (!cwd) {        // Terminate program execution if path is not obtained
       -    cerr << "Error: getcwd failed: " << strerror(errno) << '\n';
       +    std::cerr << "Error: Specify input binary file, e.g. "
       +      << argv[0] << " input/test.bin\n";
            return 1; // Return unsuccessful exit status
          }
       -
          char *inputbin = argv[1]; // Input binary file read from command line argument
        
       -  // Opening cerenomy with fancy ASCII art
       -  cout << ".-------------------------------------.\n"
       -       << "|              _    Compiled for " << ND << "D   |\n" 
       -       << "|             | |                     |\n" 
       -       << "|    ___ _ __ | |__   ___ _ __ ___    |\n"
       -       << "|   / __| '_ \\| '_ \\ / _ \\ '__/ _ \\   |\n"
       -       << "|   \\__ \\ |_) | | | |  __/ | |  __/   |\n"
       -       << "|   |___/ .__/|_| |_|\\___|_|  \\___|   |\n"
       -       << "|       | |                           |\n"
       -       << "|       |_|           Version: " << VERS << "   |\n"           
       -       << "`-------------------------------------´\n"
       -       << " Simulation ID:\n"
       -       << " " << inputbin << "\n"
       -       << " -------------------------------------\n";
       -  
       -  initializeGPU();
       -
       -  // Import binary data
       -  printf("Importing initial data from input/%s.bin:\n",inputbin);
       -
       -  sprintf(file,"%s/input/%s.bin", cwd, inputbin);
       -  if ((fp = fopen(file,"rb")) == NULL) {
       -    cout << "Could not read input binary file " << cwd << "/input/" 
       -         << inputbin << ".bin. Bye.\n";
       -    exit(1); // Return unsuccessful exit status
       -  }
       -
       -  // Read the number of dimensions and particles
       -  if (fread(&grid.nd, sizeof(grid.nd), 1, fp) != 1)
       -    exit(1); // Return unsuccessful exit status
       -  if (fread(&p.np, sizeof(p.np), 1, fp) != 1)
       -    exit(1); // Return unsuccessful exit status
       -  cout << "  - Number of dimensions: grid.nd         = " << grid.nd << "\n"
       -       << "  - Number of particles:  p.np            = " << p.np << "\n";
       -
       -  if (grid.nd != ND) {
       -    cout << "Dimensionality mismatch between dataset and this SPHERE program.\n"
       -         << "The dataset is " << grid.nd 
       -         << "D, this SPHERE binary is " << ND << "D.\n"
       -         << "This execution is terminating.\n";
       -    exit(1); // Return unsuccessful exit status
       -  }
       -
       -  // Report precision choice
       -  cout << "  - Compiled for ";
       -  if (sizeof(Float) == sizeof(float))
       -    cout << "single";
       -  else if (sizeof(Float) == sizeof(double))
       -    cout << "double";
       -  else {
       -    cerr << "Error! Chosen precision not available. Check datatypes.h\n";
       -    exit(1);
       -  }
       -  cout << " precision\n";
       -
       -  // Read time parameters
       -  if (fread(&time.dt, sizeof(time.dt), 1, fp) != 1)
       -    exit(1); // Return unsuccessful exit status
       -  if (fread(&time.current, sizeof(time.current), 1, fp) != 1)
       -    exit(1); 
       -  if (fread(&time.total, sizeof(time.total), 1, fp) != 1)
       -    exit(1); 
       -  if (fread(&time.file_dt, sizeof(time.file_dt), 1, fp) != 1)
       -    exit(1); 
       -  if (fread(&time.step_count, sizeof(time.step_count), 1, fp) != 1)
       -    exit(1); 
       -
       -  // Copy timestep length to constant memory structure
       -  params.dt = time.dt;
       -
       -  // Copy number of particles to constant memory structure
       -  params.np = p.np;
       -
       -  // Output display parameters to screen
       -  cout << "  - Timestep length:      time.dt         = " 
       -       << time.dt << " s\n"
       -       << "  - Start at time:        time.current    = " 
       -       << time.current << " s\n"
       -       << "  - Total sim. time:      time.total      = " 
       -       << time.total << " s\n"
       -       << "  - File output interval: time.file_dt    = " 
       -       << time.file_dt << " s\n"
       -       << "  - Start at step count:  time.step_count = " 
       -       << time.step_count << "\n";
       -
       -
       -  // For spatial vectors an array of Float4 vectors is chosen for best fit with 
       -  // GPU memory handling. Vector variable structure: ( x, y, z, <empty>).
       -  // Indexing starts from 0.
       -
       -  // Allocate host arrays
       -  cout << "\n  Allocating host memory:                         ";
       -  //grid.origo   = new Float[ND];        // Coordinate system origo
       -  //grid.L       = new Float[ND];        // Model world dimensions
       -  //grid.num     = new unsigned int[ND]; // Number of cells in each dimension
       -  //params.g     = new Float[ND];               // Gravitational acceleration vector
       -  p.radius     = new Float[p.np];      // Particle radii
       -  p.rho        = new Float[p.np];      // Particle densities
       -  p.m          = new Float[p.np];      // Particle masses
       -  p.I          = new Float[p.np];      // Particle moment of inertia
       -  p.k_n        = new Float[p.np];      // Particle normal stiffnesses
       -  p.k_t               = new Float[p.np];      // Particle shear stiffnesses
       -  p.k_r               = new Float[p.np];      // Particle rolling stiffnesses
       -  p.gamma_n    = new Float[p.np];      // Particle normal viscosity
       -  p.gamma_t    = new Float[p.np];      // Particle shear viscosity
       -  p.gamma_r    = new Float[p.np];      // Particle rolling viscosity
       -  p.mu_s       = new Float[p.np];      // Inter-particle static shear contact friction coefficients
       -  p.mu_d       = new Float[p.np];      // Inter-particle dynamic shear contact friction coefficients
       -  p.mu_r       = new Float[p.np];      // Inter-particle rolling contact friction coefficients
       -  p.es_dot     = new Float[p.np];      // Rate of shear energy dissipation
       -  p.ev_dot     = new Float[p.np];      // Rate of viscous energy dissipation
       -  p.es         = new Float[p.np];      // Total shear energy dissipation
       -  p.ev         = new Float[p.np];      // Total viscous energy dissipation
       -  p.p               = new Float[p.np];      // Pressure excerted onto particle
       -  //params.wmode = new int[MAXWALLS];    // Wall BC's, 0: fixed, 1: devs, 2: vel
       -
       -  // Allocate Float4 host arrays
       -  Float4 *host_x      = new Float4[p.np];  // Center coordinates for each particle (x)
       -  Float4 *host_vel    = new Float4[p.np];  // Particle velocities (dotx = v)
       -  Float4 *host_acc    = new Float4[p.np];  // Particle accellerations (dotdotx = a)
       -  Float4 *host_angvel = new Float4[p.np];  // Particle angular velocity vector (omega)
       -  Float4 *host_angacc = new Float4[p.np];  // Particle angular acceleration vector (dotomega)
       -  Float4 *host_force  = new Float4[p.np];  // Particle summed force
       -  Float4 *host_torque = new Float4[p.np];  // Particle summed torque
       -  Float4 *host_angpos = new Float4[p.np];  // Particle angular position
       -
       -
       -  uint4  *host_bonds  = new uint4[p.np];   // Bonds from particle [i] to two particles
       -  cout << "Done\n";
       -
       -  cout << "  Reading remaining data from input binary:       ";
       -  // Read remaining data from input binary
       -  for (i=0; i<ND; ++i) {
       -    if (fread(&grid.origo[i], sizeof(grid.origo[i]), 1, fp) != 1)
       -      exit(1); // Return unsuccessful exit status
       -  }
       -
       -  for (i=0; i<ND; ++i) {
       -    if (fread(&grid.L[i], sizeof(grid.L[i]), 1, fp) != 1)
       -      exit(1); // Return unsuccessful exit status
       -  }
       -
       -  for (i=0; i<ND; ++i) {
       -    if (fread(&grid.num[i], sizeof(grid.num[i]), 1, fp) != 1)
       -      exit(1); // Return unsuccessful exit status
       -  }
       -
       -  for (j=0; j<p.np; ++j) {
       -    if (fread(&host_x[j].x, sizeof(Float), 1, fp) != 1)
       -      exit(1); // Return unsuccessful exit status
       -    if (fread(&host_vel[j].x, sizeof(Float), 1, fp) != 1)
       -      exit(1);
       -    if (fread(&host_angvel[j].x, sizeof(Float), 1, fp) != 1)
       -      exit(1);
       -    if (fread(&host_force[j].x, sizeof(Float), 1, fp) != 1)
       -      exit(1);
       -    if (fread(&host_torque[j].x, sizeof(Float), 1, fp) != 1)
       -      exit(1);
       -    if (fread(&host_angpos[j].x, sizeof(Float), 1, fp) != 1)
       -      exit(1);
       -
       -
       -    if (fread(&host_x[j].y, sizeof(Float), 1, fp) != 1)
       -      exit(1); // Return unsuccessful exit status
       -    if (fread(&host_vel[j].y, sizeof(Float), 1, fp) != 1)
       -      exit(1);
       -    if (fread(&host_angvel[j].y, sizeof(Float), 1, fp) != 1)
       -      exit(1);
       -    if (fread(&host_force[j].y, sizeof(Float), 1, fp) != 1)
       -      exit(1);
       -    if (fread(&host_torque[j].y, sizeof(Float), 1, fp) != 1)
       -      exit(1);
       -    if (fread(&host_angpos[j].y, sizeof(Float), 1, fp) != 1)
       -      exit(1);
       -
       -    if (fread(&host_x[j].z, sizeof(Float), 1, fp) != 1)
       -      exit(1); // Return unsuccessful exit status
       -    if (fread(&host_vel[j].z, sizeof(Float), 1, fp) != 1)
       -      exit(1);
       -    if (fread(&host_angvel[j].z, sizeof(Float), 1, fp) != 1)
       -      exit(1);
       -    if (fread(&host_force[j].z, sizeof(Float), 1, fp) != 1)
       -      exit(1);
       -    if (fread(&host_torque[j].z, sizeof(Float), 1, fp) != 1)
       -      exit(1);
       -    if (fread(&host_angpos[j].z, sizeof(Float), 1, fp) != 1)
       -      exit(1);
       -  }
       -
       -  for (j=0; j<p.np; ++j) {
       -    if (fread(&host_vel[j].w, sizeof(Float), 1, fp) != 1) // Fixvel
       -      exit(1); // Return unsuccessful exit status
       -    if (fread(&host_x[j].w, sizeof(Float), 1, fp) != 1) // xsum
       -      exit(1);
       -    if (fread(&p.radius[j], sizeof(p.radius[j]), 1, fp) != 1)
       -      exit(1);
       -    if (fread(&p.rho[j], sizeof(p.rho[j]), 1, fp) != 1)
       -      exit(1);
       -    if (fread(&p.k_n[j], sizeof(p.k_n[j]), 1, fp) != 1)
       -      exit(1);
       -    if (fread(&p.k_t[j], sizeof(p.k_t[j]), 1, fp) != 1)
       -      exit(1);
       -    if (fread(&p.k_r[j], sizeof(p.k_r[j]), 1, fp) != 1)
       -      exit(1);
       -    if (fread(&p.gamma_n[j], sizeof(p.gamma_n[j]), 1, fp) != 1)
       -      exit(1);
       -    if (fread(&p.gamma_t[j], sizeof(p.gamma_t[j]), 1, fp) != 1)
       -      exit(1);
       -    if (fread(&p.gamma_r[j], sizeof(p.gamma_r[j]), 1, fp) != 1)
       -      exit(1);
       -    if (fread(&p.mu_s[j], sizeof(p.mu_s[j]), 1, fp) != 1)
       -      exit(1);
       -    if (fread(&p.mu_d[j], sizeof(p.mu_d[j]), 1, fp) != 1)
       -      exit(1);
       -    if (fread(&p.mu_r[j], sizeof(p.mu_r[j]), 1, fp) != 1)
       -      exit(1);
       -    if (fread(&p.es_dot[j], sizeof(p.es_dot[j]), 1, fp) != 1)
       -      exit(1);
       -    if (fread(&p.ev_dot[j], sizeof(p.ev_dot[j]), 1, fp) != 1)
       -      exit(1);
       -    if (fread(&p.es[j], sizeof(p.es[j]), 1, fp) != 1)
       -      exit(1);
       -    if (fread(&p.ev[j], sizeof(p.ev[j]), 1, fp) != 1)
       -      exit(1);
       -    if (fread(&p.p[j], sizeof(p.p[j]), 1, fp) != 1)
       -      exit(1);
       -  }
       -
       -  if (fread(&params.global, sizeof(params.global), 1, fp) != 1)
       -    exit(1); // Return unsuccessful exit status
       -  for (i=0; i<ND; ++i) {
       -    if (fread(&params.g[i], sizeof(params.g[i]), 1, fp) != 1)
       -      exit(1);
       -  }
       -
       -  if (fread(&params.kappa, sizeof(params.kappa), 1, fp) != 1)
       -    exit(1);
       -  if (fread(&params.db, sizeof(params.db), 1, fp) != 1)
       -    exit(1); 
       -  if (fread(&params.V_b, sizeof(params.V_b), 1, fp) != 1)
       -    exit(1); 
       -  if (fread(&params.shearmodel, sizeof(params.shearmodel), 1, fp) != 1)
       -    exit(1);
       -
       -
       -  // Number of dynamic walls
       -  if (fread(&params.nw, sizeof(params.nw), 1, fp) != 1)
       -    exit(1); 
       -
       -  if (params.nw > MAXWALLS) {
       -    cerr << "Error; MAXWALLS (" << MAXWALLS << ") in datatypes.h "
       -         << "is smaller than the number of walls specified in the "
       -         << "input file (" << params.nw << ").\n";
       -  }
       +  int verbose = 1;
       +  int checkVals = 1;
        
       -  // Allocate host memory for walls
       -  // Wall normal (x,y,z), w: wall position on axis parallel to wall normal
       -  // Wall mass (x), velocity (y), force (z), and deviatoric stress (w)
       -  Float4 *host_w_nx   = new Float4[params.nw];
       -  Float4 *host_w_mvfd = new Float4[params.nw]; 
       -
       -  // Read wall data
       -  for (j=0; j<params.nw; ++j) {
       -    // Wall condition mode: 0: fixed, 1: devs, 2: vel
       -    if (fread(&params.wmode[j], sizeof(params.wmode[j]), 1, fp) != 1)
       -      exit(1);
       -
       -    // Wall normal, x-dimension
       -    if (fread(&host_w_nx[j].x, sizeof(Float), 1, fp) != 1)
       -      exit(1);
       -    // Wall normal, y-dimension
       -    if (fread(&host_w_nx[j].y, sizeof(Float), 1, fp) != 1)
       -      exit(1);
       -    // Wall normal, z-dimension
       -    if (fread(&host_w_nx[j].z, sizeof(Float), 1, fp) != 1)
       -      exit(1);
       -
       -    // Wall position along axis parallel to wall normal
       -    if (fread(&host_w_nx[j].w, sizeof(Float), 1, fp) != 1)
       -      exit(1);
       -
       -    // Wall mass
       -    if (fread(&host_w_mvfd[j].x, sizeof(Float), 1, fp) != 1)
       -      exit(1);
       -
       -    // Wall velocity along axis parallel to wall normal
       -    if (fread(&host_w_mvfd[j].y, sizeof(Float), 1, fp) != 1)
       -      exit(1);
       -
       -    // Wall force along axis parallel to wall normal
       -    if (fread(&host_w_mvfd[j].z, sizeof(Float), 1, fp) != 1)
       -      exit(1);
       -
       -    // Wall deviatoric stress
       -    if (fread(&host_w_mvfd[j].w, sizeof(Float), 1, fp) != 1)
       -      exit(1);
       -  }
       -  // x- and y boundary behavior.
       -  // 0: walls, 1: periodic boundaries, 2: x boundary periodic, y frictional walls
       -  if (fread(&params.periodic, sizeof(params.periodic), 1, fp) != 1) {
       -    cout << "  - x- and y boundaries: Behavior not set, assuming frictional walls.";
       -    params.periodic = 0;
       +  if (verbose == 1) {
       +    // Opening cerenomy with fancy ASCII art
       +    std::cout << ".-------------------------------------.\n"
       +      << "|              _    Compiled for " << ND << "D   |\n" 
       +      << "|             | |                     |\n" 
       +      << "|    ___ _ __ | |__   ___ _ __ ___    |\n"
       +      << "|   / __| '_ \\| '_ \\ / _ \\ '__/ _ \\   |\n"
       +      << "|   \\__ \\ |_) | | | |  __/ | |  __/   |\n"
       +      << "|   |___/ .__/|_| |_|\\___|_|  \\___|   |\n"
       +      << "|       | |                           |\n"
       +      << "|       |_|           Version: " << VERS << "   |\n"           
       +      << "`-------------------------------------´\n";
          }
        
       -  // Wall viscosities
       -  if (fread(&params.gamma_wn, sizeof(params.gamma_wn), 1, fp) != 1)
       -    exit(1);
       -  if (fread(&params.gamma_wt, sizeof(params.gamma_wt), 1, fp) != 1)
       -    exit(1);
       -  if (fread(&params.gamma_wr, sizeof(params.gamma_wr), 1, fp) != 1)
       -    exit(1);
       -
       -
       -  for (i=0; i<p.np; ++i) {
       -    if (fread(&host_bonds[i].x, sizeof(unsigned int), 1, fp) != 1)
       -      exit(1);
       -    if (fread(&host_bonds[i].y, sizeof(unsigned int), 1, fp) != 1)
       -      exit(1);
       -    if (fread(&host_bonds[i].z, sizeof(unsigned int), 1, fp) != 1)
       -      exit(1);
       -    if (fread(&host_bonds[i].w, sizeof(unsigned int), 1, fp) != 1)
       -      exit(1);
       -  }
       -
       -  fclose(fp);
       -
       -  cout << "Done\n";
       -
       -  if (params.shearmodel == 1)
       -    cout << "  - Shear force model: Viscous, fricional\n";
       -  else if (params.shearmodel == 2)
       -    cout << "  - Shear force model: Linear elastic, viscous, frictional\n";
       -  else if (params.shearmodel == 3)
       -    cout << "  - Shear force model: Nonlinear (Hertzian) elastic, viscous, frictional\n";
       -  else {
       -    cerr << "Error: Shear model value not understood.\n";
       -    exit(1);
       -  }
       -
       -  cout << "  - Number of dynamic walls: " << params.nw << "\n";
       -
       -  if (params.periodic == 1)
       -    cout << "  - x- and y boundaries: Periodic\n";
       -  else if (params.periodic == 2)
       -    cout << "  - x boundaries: Periodic. y boundaries: Frictional walls\n";
       -  else 
       -    cout << "  - x- and y boundaries: Frictional walls\n";
       -
       -  cout << "  - Top BC: ";
       -  if (params.wmode[0] == 0)
       -    cout << "Fixed\n";
       -  else if (params.wmode[0] == 1)
       -    cout << "Deviatoric stress\n";
       -  else if (params.wmode[0] == 2)
       -    cout << "Velocity\n";
       -  else {
       -    cerr << "Top boundary condition not recognized!\n";
       -    exit(1);
       -  }
       -
       -  if (grid.nd == 1) {
       -    cout << "  - Grid: " 
       -         << grid.num[0] << " cells (x)\n";
       -  } else if (grid.nd == 2) {
       -    cout << "  - Grid: " 
       -         << grid.num[0] << " (x) * " 
       -         << grid.num[1] << " (y) = " 
       -         << grid.num[0]*grid.num[1] << " cells\n"; 
       -  } else if (grid.nd == 3) {
       -    cout << "  - Grid: " 
       -         << grid.num[0] << " (x) * " 
       -         << grid.num[1] << " (y) * " 
       -         << grid.num[2] << " (z) = " 
       -         << grid.num[0]*grid.num[1]*grid.num[2] << " cells\n";
       -  } else {
       -    cerr << "\nError; the number of dimensions must be 1, 2 or 3, and was\n"
       -         << "defined as " << grid.nd << ". This SPHERE binary was compiled for " 
       -         << ND << "D. Bye!\n";
       -    exit(1); // Return unsuccessful exit status
       -  }
       -
       -  cout << "\nEntering the CUDA environment...\n";
       -  gpuMain(host_x,
       -          host_vel,
       -          host_acc,
       -          host_angvel,
       -          host_angacc,
       -          host_force,
       -          host_torque,
       -          host_angpos,
       -          host_bonds,
       -          p, grid, 
       -          time, params,
       -          host_w_nx,
       -          host_w_mvfd,
       -          cwd, inputbin);
       -
       -
       -  // Free host memory. Delete pointers:
       -  printf("Liberating host memory:                          ");
       -
       -  // Particle vectors
       -  delete[] host_x;
       -  delete[] host_vel;
       -  delete[] host_angvel;
       -  delete[] host_acc;
       -  delete[] host_angacc;
       -  delete[] host_force;
       -  delete[] host_torque;
       -  delete[] host_angpos;
       -
       -  // Particle bonds
       -  delete[] host_bonds;
       -
       -  // Particle single-value parameters
       -  //delete[] grid.origo;
       -  //delete[] grid.L;
       -  //delete[] grid.num;
       -  //delete[] params.g;
       -  delete[] p.radius;
       -  delete[] p.k_n;
       -  delete[] p.k_t;
       -  delete[] p.k_r;
       -  delete[] p.gamma_n;
       -  delete[] p.gamma_t;
       -  delete[] p.gamma_r;
       -  delete[] p.mu_s;
       -  delete[] p.mu_d;
       -  delete[] p.mu_r;
       -  delete[] p.rho;
       -  delete[] p.es_dot;
       -  delete[] p.ev_dot;
       -  delete[] p.es;
       -  delete[] p.ev;
       -  delete[] p.p;
       -
       -  // Wall arrays
       -  delete[] host_w_nx;
       -  delete[] host_w_mvfd;
       +  std::cout << "Input file: " << inputbin << "\n";
       +  
       +  // Create DEM class, read data from input binary, check values
       +  DEM dem(inputbin, verbose, checkVals);
        
       -  // Free other dynamic host memory
       -  free(cwd);
       -  printf("Done\n");
       +  // Start iterating through time
       +  std::cout << "\nStarting time loop...\n";
       +  dem.startTime();
        
          // Terminate execution
       -  printf("\nBye!\n");
       +  std::cout << "\nBye!\n";
          return 0; // Return successfull exit status
        } 
        // END OF FILE
 (DIR) diff --git a/src/sphere.cpp b/src/sphere.cpp
       t@@ -0,0 +1,180 @@
       +#include <iostream>
       +#include <cstdio>
       +#include <cstdlib>
       +
       +#include "typedefs.h"
       +#include "datatypes.h"
       +#include "constants.h"
       +#include "sphere.h"
       +
       +// Constructor: Reads an input binary, and optionally checks
       +// and reports the values
       +DEM::DEM(const char *inputbin, 
       +    const int verbosity,
       +    const int checkVals)
       +: verbose(verbosity)
       +{
       +  using std::cout;
       +  using std::cerr;
       +
       +  // Initialize CUDA
       +  initializeGPU();
       +
       +  // Read target input binary
       +  readbin(inputbin);
       +
       +  // Check numeric values of chosen parameters
       +  if (checkVals == 1)
       +    checkValues();
       +
       +  // Report data values
       +  if (verbose == 1) {
       +    if (params.contactmodel == 1)
       +      cout << "  - Contact model: Linear-elastic-viscous (n), visco-frictional (t)\n";
       +    if (params.contactmodel == 2)
       +      cout << "  - Contact model: Linear-elastic-visco-frictional\n";
       +    if (params.contactmodel == 3)
       +      cout << "  - Contact model: Nonlinear-elastic-visco-frictional\n";
       +    else {
       +      cerr << "Error: Contact model value not understood.\n";
       +      exit(1);
       +    }
       +
       +    cout << "  - Number of dynamic walls: " << walls.nw << '\n';
       +
       +    if (grid.periodic == 1)
       +      cout << "  - 1st and 2nd dim. boundaries: Periodic\n";
       +    else if (grid.periodic == 2)
       +      cout << "  - 1st dim. boundaries: Visco-frictional walls\n";
       +    else
       +      cout << "  - 1st and 2nd dim. boundaries: Visco-frictional walls\n";
       +
       +    cout << "  - Top BC: ";
       +    if (walls.wmode[0] == 0)
       +      cout << "Fixed\n";
       +    else if (walls.wmode[0] == 1)
       +      cout << "Deviatoric stress\n";
       +    else if (walls.wmode[0] == 2)
       +      cout << "Velocity\n";
       +    else {
       +      cerr << "Top BC not recognized!\n";
       +      exit(1);
       +    }
       +
       +    cout << "  - Grid: ";
       +    if (nd == 1)
       +      cout << grid.num[0];
       +    else if (nd == 2)
       +      cout << grid.num[0] << " * " << grid.num[1];
       +    else 
       +      cout << grid.num[0] << " * " 
       +        << grid.num[1] << " * "
       +        << grid.num[2];
       +    cout << " cells\n";
       +  }
       +}
       +
       +// Destructor: Liberates dynamically allocated host memory
       +DEM::~DEM(void)
       +{
       +  delete[] k.x;
       +  delete[] k.xysum;
       +  delete[] k.vel;
       +  delete[] k.force;
       +  delete[] k.angpos;
       +  delete[] k.angvel;
       +  delete[] k.torque;
       +  delete[] e.es_dot;
       +  delete[] e.es;
       +  delete[] e.ev_dot;
       +  delete[] e.ev;
       +  delete[] e.p;
       +  delete[] walls.nx;
       +  delete[] walls.mvfd;
       +}
       +
       +
       +// Check numeric values of selected parameters
       +void DEM::checkValues(void)
       +{
       +  using std::cerr;
       +
       +  unsigned int i;
       +
       +  // Check the number of dimensions
       +  if (nd != ND) {
       +    cerr << "Error: nd = " << nd << ", ND = " << ND << '\n';
       +    exit(1);
       +  }
       +
       +  // Check the number of possible contacts
       +  if (NC < 1) {
       +    cerr << "Error: NC = " << NC << '\n';
       +    exit(1);
       +  } else if (NC < 8) {
       +    cerr << "Warning: NC has a low value (" << NC << "). "
       +     << "Consider increasing it in 'constants.h'\n";
       +  }
       +
       +  // Check that we have a positive number of particles
       +  if (np < 1) {
       +    cerr << "Error: np = " << np << '\n';
       +    exit(1);
       +  }
       +
       +  // Check that the current time
       +  if (time.current < time.total || time.current < 0.0) {
       +    cerr << "Error: time.current = " << time.current
       +      << " s, time.total = " << time.total << " s\n";
       +    exit(1);
       +  }
       +
       +  // Check world size
       +  if (grid.L[0] <= 0.0 || grid.L[1] <= 0.0 || grid.L[2] <= 0.0) {
       +    cerr << "Error: grid.L[0] = " << grid.L[0] << " m, "
       +      << "grid.L[1] = " << grid.L[1] << " m, "
       +      << "grid.L[2] = " << grid.L[2] << " m.\n";
       +    exit(1);
       +  }
       +   
       +  // Check grid size
       +  if (grid.num[0] <= 0 || grid.num[1] <= 0 || grid.num[2] <= 0) {
       +    cerr << "Error: grid.num[0] = " << grid.num[0] << ", "
       +      << "grid.num[1] = " << grid.num[1] << ", "
       +      << "grid.num[2] = " << grid.num[2] << ".\n";
       +    exit(1);
       +  }
       +
       +  // Check grid size again
       +  if (grid.periodic == 2 && grid.num[0] < 3) {
       +    cerr << "Error: When 1st dimension boundaries are periodic, "
       +      << "there must be at least 3 cells in that dimension.";
       +    exit(1);
       +  }
       +
       +  if (grid.periodic == 1 && (grid.num[0] < 3 || grid.num[1] < 3)) {
       +    cerr << "Error: When 1st and 2nd dimension boundaries are periodic, "
       +      << "there must be at least 3 cells in each of those dimensions.";
       +    exit(1);
       +  }
       +
       +  // Check that radii are positive values
       +  for (i = 0; i < np; ++i) {
       +    if (k.x[i].w <= 0.0) {
       +      cerr << "Error: Particle " << i << " has a radius of "
       +        << k.x[i].w << " m.";
       +      exit(1);
       +    }
       +  }
       +
       +  // Check constant, global parameters
       +  if (params.k_n <= 0.0) {
       +    cerr << "Error: k_n = " << params.k_n << " N/m\n";
       +    exit(1);
       +  }
       +  
       +  if (params.rho <= 0.0) {
       +    cerr << "Error: rho = " << params.rho << " kg/m3\n";
       +    exit(1);
       +  }
       +}
 (DIR) diff --git a/src/sphere.h b/src/sphere.h
       t@@ -0,0 +1,97 @@
       +// Make sure the header is only included once
       +#ifndef SPHERE_H_
       +#define SPHERE_H_
       +
       +#include "datatypes.h"
       +
       +// DEM class
       +class DEM {
       +
       +  // Values and functions only accessible from the class internally
       +  private:
       +
       +    // Output level
       +    int verbose;
       +
       +    // Number of dimensions
       +    unsigned int nd;
       +
       +    // Number of particles
       +    unsigned int np;
       +
       +    // Structure containing individual particle kinematics
       +    Kinematics k;
       +    Kinematics dev_k;
       +
       +    // Structure containing energy values
       +    Energies e;
       +    Energies dev_e;
       +
       +    // Structure of global parameters
       +    Params params;
       +    Params dev_params;
       +
       +    // Structure containing spatial parameters
       +    Grid grid;
       +    Grid dev_grid;
       +
       +    // Structure of temporal parameters
       +    Time time;
       +    Time dev_time;
       +
       +    // Structure of wall parameters
       +    Walls walls;
       +    Walls dev_walls;
       +
       +    // Wall force arrays
       +    Float* dev_w_force; 
       +
       +    // GPU initialization, must be called before startTime()
       +    void initializeGPU(void);
       +
       +    // Copy all constant data to constant device memory
       +    void transferToConstantDeviceMemory(void);
       +
       +    // Check values stored in constant device memory
       +    void checkConstantMemory(void);
       +
       +    // Allocate global device memory to hold data
       +    void allocateGlobalDeviceMemory(void);
       +
       +    // Copy non-constant data to global GPU memory
       +    void transferToGlobalDeviceMemory(void);
       +
       +    // Copy non-constant data from global GPU memory to host RAM
       +    void transferFromGlobalDeviceMemory(void);
       +
       +
       +  // Values and functions accessible from the outside
       +  public:
       +
       +    // Constructor, some parameters with default values
       +    DEM(const char *inputbin, 
       +        const int verbosity = 1,
       +        const int checkVals = 1);
       +
       +    // Read binary input file
       +    void readbin(const char *target);
       +
       +    // Write binary output file
       +    void writebin(const char *target);
       +
       +    // Check numeric values of selected parameters
       +    void checkValues(void);
       +
       +    // Iterate through time, using temporal limits
       +    // described in "time" struct.
       +    void startTime(void);
       +
       +    // Render particles using raytracing
       +    // render(const char *target,
       +    //        Float3 lookat,
       +    //        Float3 eye,
       +    //        Float  focalLength);
       +
       +};
       +
       +#endif
 (DIR) diff --git a/src/typedefs.h b/src/typedefs.h
       t@@ -0,0 +1,34 @@
       +#ifndef TYPEDEFS_H_
       +#define TYPEDEFS_H_
       +
       +#include "vector_functions.h"
       +
       +//////////////////////
       +// TYPE DEFINITIONS //
       +//////////////////////
       +
       +
       +// Uncomment all five lines below for single precision
       +/*
       +typedef Float Float;
       +typedef Float3 Float3;
       +typedef Float4 Float4;
       +#define MAKE_FLOAT3(x, y, z) make_Float3(x, y, z)
       +#define MAKE_FLOAT4(x, y, z, w) make_Float4(x, y, z, w)
       +*/
       +
       +
       +/////// BEWARE: single precision is non-functional at the moment,
       +//        due to the input algorithm.
       +
       +// Uncomment all five lines below for double precision
       +///*
       +typedef double Float;
       +typedef double2 Float2;
       +typedef double3 Float3;
       +typedef double4 Float4;
       +#define MAKE_FLOAT3(x, y, z) make_double3(x, y, z)
       +#define MAKE_FLOAT4(x, y, z, w) make_double4(x, y, z, w)
       +//*/
       +
       +#endif
 (DIR) diff --git a/src/utility.cuh b/src/utility.cuh
       t@@ -0,0 +1,9 @@
       +// Avoiding multiple inclusions of header file
       +#ifndef UTILITY_CUH_
       +#define UTILITY_CUH_
       +
       +unsigned int iDivUp(unsigned int a, unsigned int b);
       +void checkForCudaErrors(const char* checkpoint_description);
       +void checkForCudaErrors(const char* checkpoint_description, const unsigned int iteration);
       + 
       +#endif