tAll IO routines are now working correctly - sphere - GPU-based 3D discrete element method algorithm with optional fluid coupling
 (HTM) git clone git://src.adamsgaard.dk/sphere
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) LICENSE
       ---
 (DIR) commit ae9ebb8f0258bc86ba0f8a1ad2942f5000e90d73
 (DIR) parent 129cc809350df4c3dc68952dfeb67caee785d966
 (HTM) Author: Anders Damsgaard <adc@geo.au.dk>
       Date:   Tue, 30 Oct 2012 13:35:35 +0100
       
       All IO routines are now working correctly
       
       Diffstat:
         A python/tests.py                     |      39 +++++++++++++++++++++++++++++++
         M src/Makefile                        |      15 +++++++++------
         M src/datatypes.h                     |       7 +++----
         M src/device.cu                       |      67 ++++++++++++++++++-------------
         M src/file_io.cpp                     |     538 ++++++++++++++-----------------
         M src/integration.cuh                 |       9 ++++++++-
         M src/main.cpp                        |      11 ++++++-----
         M src/sphere.cpp                      |      39 +++++++++++++++----------------
         M src/sphere.h                        |       7 +++++--
         M src/typedefs.h                      |       6 ++++--
         M src/vector_arithmetic.h             |       5 ++++-
       
       11 files changed, 374 insertions(+), 369 deletions(-)
       ---
 (DIR) diff --git a/python/tests.py b/python/tests.py
       t@@ -0,0 +1,39 @@
       +#!/usr/bin/env python
       +from sphere import *
       +
       +def compare(first, second, string):
       +  if (first == second):
       +    print(string + ":\tPassed")
       +  else:
       +    print(string + ":\tFailed")
       +
       +
       +#### Input/output tests ####
       +print("### Input/output tests ###")
       +
       +# Generate data in python
       +orig = Spherebin(100)
       +orig.generateRadii()
       +orig.defaultParams()
       +orig.initRandomGridPos()
       +orig.initTemporal(total = 0.0, current = 0.0)
       +orig.xysum = numpy.ones(orig.np*2, dtype=numpy.float64).reshape(orig.np, 2) * 1
       +orig.vel = numpy.ones(orig.np*orig.nd, dtype=numpy.float64).reshape(orig.np, orig.nd) * 2
       +orig.force = numpy.ones(orig.np*orig.nd, dtype=numpy.float64).reshape(orig.np, orig.nd) * 3
       +orig.angpos = numpy.ones(orig.np*orig.nd, dtype=numpy.float64).reshape(orig.np, orig.nd) * 4
       +orig.angvel = numpy.ones(orig.np*orig.nd, dtype=numpy.float64).reshape(orig.np, orig.nd) * 5
       +orig.torque = numpy.ones(orig.np*orig.nd, dtype=numpy.float64).reshape(orig.np, orig.nd) * 6
       +orig.writebin("orig.bin", verbose=False)
       +
       +# Test Python IO routines
       +py = Spherebin()
       +py.readbin("orig.bin", verbose=False)
       +compare(orig, py, "Python IO")
       +
       +# Test C++ IO routines
       +run("python/orig.bin")
       +cpp = Spherebin()
       +cpp.readbin("../output/orig.output0.bin", verbose=False)
       +compare(orig, cpp, "C++ IO   ")
       +
       +
 (DIR) diff --git a/src/Makefile b/src/Makefile
       t@@ -27,8 +27,8 @@ NVCCFLAGS=--use_fast_math -O3 -m64 -gencode=arch=compute_20,code=\"sm_20,compute
        
        # Debugable code? Beware that enabling this option will 
        # considerably slow down the execution.
       -#CCFLAGS+=-g
       -#NVCCFLAGS+=-g -G
       +CCFLAGS=-g -O0 -Wall
       +NVCCFLAGS=-g -G -O0 -m64 -gencode=arch=compute_20,code=\"sm_20,compute_20\" -Xcompiler "-O0 -g"
        
        DATE=`date +'%Y.%m.%d-%H:%M:%S'`
                BACKUPNAME=sphere.$(DATE).tar.gz
       t@@ -39,6 +39,9 @@ CCOBJECTS=$(CCFILES:.cpp=.o)
        CUOBJECTS=$(CUFILES:.cu=.o)
        OBJECTS=$(CCOBJECTS) $(CUOBJECTS)
        
       +# If a header-file changes, update all objects
       +DEPS=*.h *.cuh
       +
        # Detect OS
        OSUPPER=$(shell uname -s 2>/dev/null | tr [:lower:] [:upper:])
                DARWIN=$(strip $(findstring DARWIN, $(OSUPPER)))
       t@@ -75,16 +78,16 @@ $(EXECUTABLE): $(OBJECTS)
        utility.o: utility.cu
                $(NVCC) $(NVCCFLAGS) $(INCLUDES) -c $< -o $@
        
       -file_io.o: file_io.cpp datatypes.h
       +file_io.o: file_io.cpp $(DEPS)
                $(CC) $(CCFLAGS) $(INCLUDES) -c $< -o $@
        
       -device.o: device.cu datatypes.h *.cuh
       +device.o: device.cu $(DEPS)
                $(NVCC) $(NVCCFLAGS) $(INCLUDES) -c $< -o $@
        
       -main.o: main.cpp datatypes.h
       +main.o: main.cpp $(DEPS)
                $(CC) $(CCFLAGS) $(INCLUDES) -c $< -o $@
        
       -sphere.o: sphere.cpp sphere.h
       +sphere.o: sphere.cpp $(DEPS)
                $(CC) $(CCFLAGS) $(INCLUDES) -c $< -o $@
        
        ../sphere_status: sphere_status.cpp
 (DIR) diff --git a/src/datatypes.h b/src/datatypes.h
       t@@ -36,7 +36,6 @@ struct Energies {
          Float *ev_dot;        // Viscous dissipation rates
          Float *ev;                // Viscous dissipations
          Float *p;                // Pressures
       -  //uint4 *bonds;                // Cohesive bonds
        };
        
        // Structure containing grid parameters
       t@@ -90,12 +89,12 @@ struct Params {
        struct Walls {
          unsigned int nw;        // Number of walls (<= MAXWALLS)
          int wmode[MAXWALLS];        // Wall modes
       -  Float4* nx;                // Wall normal and position
       -  Float4* mvfd;                // Wall mass, velocity, force and dev. stress
       -  Float* force;                // Resulting forces on walls per particle
          Float gamma_wn;        // Wall normal viscosity
          Float gamma_wt;        // Wall tangential viscosity
          Float gamma_wr;        // Wall rolling viscosity
       +  Float4* nx;                // Wall normal and position
       +  Float4* mvfd;                // Wall mass, velocity, force and dev. stress
       +  Float* force;                // Resulting forces on walls per particle
        };
        
        #endif
 (DIR) diff --git a/src/device.cu b/src/device.cu
       t@@ -1,5 +1,6 @@
        // device.cu -- GPU specific operations utilizing the CUDA API.
        #include <iostream>
       +#include <string>
        #include <cstdio>
        #include <cuda.h>
        
       t@@ -311,15 +312,15 @@ __host__ void DEM::transferToGlobalDeviceMemory()
          if (verbose == 1)
            std::cout << "  Transfering data to the device:                 ";
        
       -  // Copy structure data from host to global device memory
       -  /*cudaMemcpy(dev_k, k, sizeof(Kinematics), cudaMemcpyHostToDevice);
       -  cudaMemcpy(dev_e, e, sizeof(Energies), cudaMemcpyHostToDevice);
       -  cudaMemcpy(dev_time, time, sizeof(Time), cudaMemcpyHostToDevice);
       -  cudaMemcpy(dev_walls, walls, sizeof(Walls), cudaMemcpyHostToDevice);*/
       -  cudaMemcpy(dev_k, &k, sizeof(k), cudaMemcpyHostToDevice);
       +  // Copy structure data from host to global device memoryj
       +  cudaMemcpy(dev_k, &k, sizeof(Kinematics), cudaMemcpyHostToDevice);
       +  cudaMemcpy(dev_e, &e, sizeof(Energies), cudaMemcpyHostToDevice);
       +  cudaMemcpy(dev_time, &time, sizeof(Time), cudaMemcpyHostToDevice);
       +  cudaMemcpy(dev_walls, &walls, sizeof(Walls), cudaMemcpyHostToDevice);
       +  /*cudaMemcpy(dev_k, &k, sizeof(k), cudaMemcpyHostToDevice);
          cudaMemcpy(dev_e, &e, sizeof(e), cudaMemcpyHostToDevice);
          cudaMemcpy(dev_time, &time, sizeof(time), cudaMemcpyHostToDevice);
       -  cudaMemcpy(dev_walls, &walls, sizeof(walls), cudaMemcpyHostToDevice);
       +  cudaMemcpy(dev_walls, &walls, sizeof(walls), cudaMemcpyHostToDevice);*/
        
          checkForCudaErrors("End of transferToGlobalDeviceMemory");
          if (verbose == 1)
       t@@ -331,10 +332,14 @@ __host__ void DEM::transferFromGlobalDeviceMemory()
          std::cout << "  Transfering data to the device:                 ";
        
          // Copy structure data from host to global device memory
       -  cudaMemcpy(&k, dev_k, sizeof(k), cudaMemcpyDeviceToHost);
       +  /*cudaMemcpy(&k, dev_k, sizeof(k), cudaMemcpyDeviceToHost);
          cudaMemcpy(&e, dev_e, sizeof(e), cudaMemcpyDeviceToHost);
          cudaMemcpy(&time, dev_time, sizeof(time), cudaMemcpyDeviceToHost);
       -  cudaMemcpy(&walls, dev_walls, sizeof(walls), cudaMemcpyDeviceToHost);
       +  cudaMemcpy(&walls, dev_walls, sizeof(walls), cudaMemcpyDeviceToHost);*/
       +  cudaMemcpy(&k, dev_k, sizeof(Kinematics), cudaMemcpyDeviceToHost);
       +  cudaMemcpy(&e, dev_e, sizeof(Energies), cudaMemcpyDeviceToHost);
       +  cudaMemcpy(&time, dev_time, sizeof(Time), cudaMemcpyDeviceToHost);
       +  cudaMemcpy(&walls, dev_walls, sizeof(Walls), cudaMemcpyDeviceToHost);
        
          checkForCudaErrors("End of transferFromGlobalDeviceMemory");
          if (verbose == 1)
       t@@ -347,7 +352,8 @@ __host__ void DEM::startTime()
        {
        
          using std::cout; // Namespace directive
       -  char file[200];  // Output filename
       +  std::string outfile;
       +  char file[200];
          FILE *fp;
        
          // Copy data to constant global device memory
       t@@ -402,8 +408,9 @@ __host__ void DEM::startTime()
          long iter = 0;
        
          // Create first status.dat
       -  sprintf(file,"output/%s.status.dat", sid);
       -  fp = fopen(file, "w");
       +  //sprintf(file,"output/%s.status.dat", sid);
       +  outfile = "output/" + sid + ".status.dat";
       +  fp = fopen(outfile.c_str(), "w");
          fprintf(fp,"%2.4e %2.4e %d\n", 
                        time.current, 
                  100.0*time.current/time.total, 
       t@@ -411,8 +418,9 @@ __host__ void DEM::startTime()
          fclose(fp);
        
          // Write first output data file: output0.bin, thus testing writing of bin files
       -  sprintf(file,"output/%s.output0.bin", sid);
       -  writebin(file);
       +  outfile = "output/" + sid + ".output0.bin";
       +  //sprintf(file,"output/%s.output0.bin", sid);
       +  writebin(outfile.c_str());
        
          if (verbose == 1) {
            cout << "\n  Entering the main calculation time loop...\n\n"
       t@@ -587,6 +595,7 @@ __host__ void DEM::startTime()
                                             dev_k->force,
                                             dev_k->torque, 
                                             dev_k->angpos,
       +                                     dev_k->xysum,
                                             dev_sort->gridParticleIndex);
        
            cudaThreadSynchronize();
       t@@ -645,43 +654,43 @@ __host__ void DEM::startTime()
        
              // Write binary output file
              time.step_count += 1;
       -      sprintf(file,"output/%s.output%d.bin", sid, time.step_count);
       +      sprintf(file,"output/%s.output%d.bin", sid.c_str(), time.step_count);
              writebin(file);
        
        
              if (CONTACTINFO == 1) {
                // Write contact information to stdout
       -        /*cout << "\n\n---------------------------\n"
       +        cout << "\n\n---------------------------\n"
                     << "t = " << time.current << " s.\n"
                     << "---------------------------\n";
        
       -        for (int n = 0; n < p.np; ++n) {
       +        for (int n = 0; n < np; ++n) {
                  cout << "\n## Particle " << n << " ##\n";
        
                  cout  << "- contacts:\n";
                  for (int nc = 0; nc < NC; ++nc) 
       -            cout << "[" << nc << "]=" << host_contacts[nc+NC*n] << '\n';
       +            cout << "[" << nc << "]=" << k.contacts[nc+NC*n] << '\n';
        
                  cout << "\n- delta_t:\n";
                  for (int nc = 0; nc < NC; ++nc) 
       -            cout << host_delta_t[nc+NC*n].x << '\t'
       -                 << host_delta_t[nc+NC*n].y << '\t'
       -                 << host_delta_t[nc+NC*n].z << '\t'
       -                 << host_delta_t[nc+NC*n].w << '\n';
       +            cout << k.delta_t[nc+NC*n].x << '\t'
       +                 << k.delta_t[nc+NC*n].y << '\t'
       +                 << k.delta_t[nc+NC*n].z << '\t'
       +                 << k.delta_t[nc+NC*n].w << '\n';
        
                  cout << "\n- distmod:\n";
                  for (int nc = 0; nc < NC; ++nc) 
       -            cout << host_distmod[nc+NC*n].x << '\t'
       -                 << host_distmod[nc+NC*n].y << '\t'
       -                 << host_distmod[nc+NC*n].z << '\t'
       -                 << host_distmod[nc+NC*n].w << '\n';
       +            cout << k.distmod[nc+NC*n].x << '\t'
       +                 << k.distmod[nc+NC*n].y << '\t'
       +                 << k.distmod[nc+NC*n].z << '\t'
       +                 << k.distmod[nc+NC*n].w << '\n';
                }
       -        cout << '\n';*/
       +        cout << '\n';
              }
        
              // Update status.dat at the interval of filetime 
       -      sprintf(file,"output/%s.status.dat", sid);
       -      fp = fopen(file, "w");
       +      outfile = "output/" + sid + ".status.dat";
       +      fp = fopen(outfile.c_str(), "w");
              fprintf(fp,"%2.4e %2.4e %d\n", 
                      time.current, 
                      100.0*time.current/time.total,
 (DIR) diff --git a/src/file_io.cpp b/src/file_io.cpp
       t@@ -1,4 +1,5 @@
        #include <iostream>
       +#include <fstream>
        #include <cstdio>
        #include <cstdlib>
        
       t@@ -7,34 +8,42 @@
        #include "constants.h"
        #include "sphere.h"
        
       +// Get the address of the first byte of an object's representation
       +// See Stroustrup (2008) p. 388
       +template<class T>
       +char* as_bytes(T& i)        // treat a T as a sequence of bytes
       +{
       +  // get the address of the first byte of memory used
       +  // to store the object
       +  void* addr = &i;
       +
       +  // treat the object as bytes
       +  return static_cast<char*>(addr);
       +}
       +
        // Read DEM data from binary file
       +// Note: Static-size arrays can be bulk-read with e.g.
       +//   ifs.read(as_bytes(grid.L), sizeof(grid.L))
       +// while dynamic, and vector arrays (e.g. Float4) must
       +// be read one value at a time.
        void DEM::readbin(const char *target)
        {
          using std::cout;  // stdout
          using std::cerr;  // stderr
          using std::endl;  // endline. Implicitly flushes buffer
       -
       -  if (verbose == 1)
       -    std::cout << "reading binary: " << target << '\n';
       -
       -  int err = 0;
       +  unsigned int i;
        
          // Open input file
       -  FILE *fp;
       -  ++err;
       -  if ((fp = fopen(target, "rb")) == NULL) {
       +  // if target is string: std::ifstream ifs(target.c_str(), std::ios_base::binary);
       +  std::ifstream ifs(target, std::ios_base::binary);
       +  if (!ifs) {
            cerr << "Could not read input binary file '"
              << target << endl;
       -    exit(err);
       +    exit(1);
          }
        
       -  // Read data
       -  ++err;
       -  if(fread(&nd, sizeof(nd), 1, fp) != 1) {
       -    cerr << "nd" << endl; exit(err); } // Return unsuccessful exit status
       -  ++err;
       -  if (fread(&np, sizeof(np), 1, fp) != 1) {
       -    cerr << "np" << endl; exit(err); } // Return unsuccessful exit status
       +  ifs.read(as_bytes(nd), sizeof(nd));
       +  ifs.read(as_bytes(np), sizeof(np));
          if (verbose == 1) {
            cout << "  - Number of dimensions: nd = " << nd << "\n"
              << "  - Number of particles:  np = " << np << "\n";
       t@@ -59,27 +68,17 @@ void DEM::readbin(const char *target)
              cout << "double";
          } else {
            cerr << "Error! Chosen precision not available. Check datatypes.h\n";
       -    exit(err);
       +    exit(1);
          }
          if (verbose == 1)
            cout << " precision\n";
        
          // Read time parameters
       -  ++err;
       -  if (fread(&time.dt, sizeof(time.dt), 1, fp) != 1) {
       -    cerr << "time.dt" << endl; exit(err); }
       -  ++err;
       -  if (fread(&time.current, sizeof(time.current), 1, fp) != 1) {
       -    cerr << "time.current" << endl; exit(err); }
       -  ++err;
       -  if (fread(&time.total, sizeof(time.total), 1, fp) != 1) {
       -    cerr << "time.total" << endl; exit(err); }
       -  ++err;
       -  if (fread(&time.file_dt, sizeof(time.file_dt), 1, fp) != 1) {
       -    cerr << "time.file_dt" << endl; exit(err); }
       -  ++err;
       -  if (fread(&time.step_count, sizeof(time.step_count), 1, fp) != 1) {
       -    cerr << "time.step_count" << endl; exit(err); }
       +  ifs.read(as_bytes(time.dt), sizeof(time.dt));
       +  ifs.read(as_bytes(time.current), sizeof(time.current));
       +  ifs.read(as_bytes(time.total), sizeof(time.total));
       +  ifs.read(as_bytes(time.file_dt), sizeof(time.file_dt));
       +  ifs.read(as_bytes(time.step_count), sizeof(time.step_count));
        
          // Output display parameters to screen
          if (verbose == 1) {
       t@@ -124,143 +123,118 @@ void DEM::readbin(const char *target)
            cout << "  Reading remaining data from input binary:       ";
        
          // Read grid parameters
       -  ++err;
       -  if (fread(&grid.origo, sizeof(grid.origo[0]), nd, fp) != nd) {
       -    cerr << "grid.origo" << endl; exit(err); }
       -  ++err;
       -  if (fread(&grid.L, sizeof(grid.L[0]), nd, fp) != nd) {
       -    cerr << "grid.L" << endl; exit(err); }
       -  ++err;
       -  if (fread(&grid.num, sizeof(grid.num[0]), nd, fp) != nd) {
       -    cerr << "grid.num" << endl; exit(err); }
       -  ++err;
       -  if (fread(&grid.periodic, sizeof(grid.periodic), 1, fp) != 1) {
       -    cerr << "grid.periodic" << endl; exit(err); }
       +  ifs.read(as_bytes(grid.origo), sizeof(grid.origo));
       +  ifs.read(as_bytes(grid.L), sizeof(grid.L));
       +  ifs.read(as_bytes(grid.num), sizeof(grid.num));
       +  ifs.read(as_bytes(grid.periodic), sizeof(grid.periodic));
        
          // Read kinematic values
       -  ++err;
       -  if (fread(&k.x, sizeof(Float4), np, fp) != np) {
       -    cerr << "k.x" << endl; exit(err); }
       -  ++err;
       -  if (fread(&k.xysum, sizeof(Float2), np, fp) != np) {
       -    cerr << "k.xysum" << endl; exit(err); }
       -  ++err;
       -  if (fread(&k.vel, sizeof(Float4), np, fp) != np) {
       -    cerr << "k.vel" << endl; exit(err); }
       -  ++err;
       -  if (fread(&k.force, sizeof(Float4), np, fp) != np) {
       -    cerr << "k.force" << endl; exit(err); }
       -  ++err;
       -  if (fread(&k.angpos, sizeof(Float4), np, fp) != np) {
       -    cerr << "k.angpos" << endl; exit(err); }
       -  ++err;
       -  if (fread(&k.angvel, sizeof(Float4), np, fp) != np) {
       -    cerr << "k.angvel" << endl; exit(err); }
       -  ++err;
       -  if (fread(&k.torque, sizeof(Float4), np, fp) != np) {
       -    cerr << "k.torque" << endl; exit(err); }
       -  // mass (m) and inertia (I) are calculated on device
       +  for (i = 0; i<np; ++i) {
       +    ifs.read(as_bytes(k.x[i].x), sizeof(Float));
       +    ifs.read(as_bytes(k.x[i].y), sizeof(Float));
       +    ifs.read(as_bytes(k.x[i].z), sizeof(Float));
       +    ifs.read(as_bytes(k.x[i].w), sizeof(Float));
       +  }
       +  for (i = 0; i<np; ++i) {
       +    ifs.read(as_bytes(k.xysum[i].x), sizeof(Float));
       +    ifs.read(as_bytes(k.xysum[i].y), sizeof(Float));
       +  }
       +  for (i = 0; i<np; ++i) {
       +    ifs.read(as_bytes(k.vel[i].x), sizeof(Float));
       +    ifs.read(as_bytes(k.vel[i].y), sizeof(Float));
       +    ifs.read(as_bytes(k.vel[i].z), sizeof(Float));
       +    ifs.read(as_bytes(k.vel[i].w), sizeof(Float));
       +  }
       +  for (i = 0; i<np; ++i) {
       +    ifs.read(as_bytes(k.force[i].x), sizeof(Float));
       +    ifs.read(as_bytes(k.force[i].y), sizeof(Float));
       +    ifs.read(as_bytes(k.force[i].z), sizeof(Float));
       +    //ifs.read(as_bytes(k.force[i].w), sizeof(Float));
       +  }
       +  for (i = 0; i<np; ++i) {
       +    ifs.read(as_bytes(k.angpos[i].x), sizeof(Float));
       +    ifs.read(as_bytes(k.angpos[i].y), sizeof(Float));
       +    ifs.read(as_bytes(k.angpos[i].z), sizeof(Float));
       +    //ifs.read(as_bytes(k.angpos[i].w), sizeof(Float));
       +  }
       +  for (i = 0; i<np; ++i) {
       +    ifs.read(as_bytes(k.angvel[i].x), sizeof(Float));
       +    ifs.read(as_bytes(k.angvel[i].y), sizeof(Float));
       +    ifs.read(as_bytes(k.angvel[i].z), sizeof(Float));
       +    //ifs.read(as_bytes(k.angvel[i].w), sizeof(Float));
       +  }
       +  for (i = 0; i<np; ++i) {
       +    ifs.read(as_bytes(k.torque[i].x), sizeof(Float));
       +    ifs.read(as_bytes(k.torque[i].y), sizeof(Float));
       +    ifs.read(as_bytes(k.torque[i].z), sizeof(Float));
       +    //ifs.read(as_bytes(k.torque[i].w), sizeof(Float));
       +  }
        
          // Read energies
       -  ++err;
       -  if (fread(&e.es_dot, sizeof(e.es_dot[0]), np, fp) != np) {
       -    cerr << "e.es_dot" << endl; exit(err); }
       -  ++err;
       -  if (fread(&e.es, sizeof(e.es[0]), np, fp) != np) {
       -    cerr << "e.es" << endl; exit(err); }
       -  ++err;
       -  if (fread(&e.ev_dot, sizeof(e.ev_dot[0]), np, fp) != np) {
       -    cerr << "e.ev_dot" << endl; exit(err); }
       -  ++err;
       -  if (fread(&e.ev, sizeof(e.ev[0]), np, fp) != np) {
       -    cerr << "e.ev" << endl; exit(err); }
       -  ++err;
       -  if (fread(&e.p, sizeof(e.p[0]), np, fp) != np) {
       -    cerr << "e.p" << endl; exit(err); }
       -
       -  // Read constant, global physical parameters
       -  ++err;
       -  if (fread(&params.g, sizeof(params.g[0]), nd, fp) != nd) {
       -    cerr << "params.g" << endl; exit(err); }
       -  ++err;
       -  if (fread(&params.k_n, sizeof(params.k_n), 1, fp) != 1) {
       -    cerr << "params.k_n" << endl; exit(err); }
       -  ++err;
       -  if (fread(&params.k_t, sizeof(params.k_t), 1, fp) != 1) {
       -    cerr << "params.k_t" << endl; exit(err); }
       -  ++err;
       -  if (fread(&params.k_r, sizeof(params.k_r), 1, fp) != 1) {
       -    cerr << "params.k_r" << endl; exit(err); }
       -  ++err;
       -  if (fread(&params.gamma_n, sizeof(params.gamma_n), 1, fp) != 1) {
       -    cerr << "params.gamma_n" << endl; exit(err); }
       -  ++err;
       -  if (fread(&params.gamma_t, sizeof(params.gamma_t), 1, fp) != 1) {
       -    cerr << "params.gamma_t" << endl; exit(err); }
       -  ++err;
       -  if (fread(&params.gamma_r, sizeof(params.gamma_r), 1, fp) != 1) {
       -    cerr << "params.gamma_r" << endl; exit(err); }
       -  ++err;
       -  if (fread(&params.mu_s, sizeof(params.mu_s), 1, fp) != 1) {
       -    cerr << "params.mu_s" << endl; exit(err); }
       -  ++err;
       -  if (fread(&params.mu_d, sizeof(params.mu_d), 1, fp) != 1) {
       -    cerr << "params.mu_d" << endl; exit(err); }
       -  ++err;
       -  if (fread(&params.mu_r, sizeof(params.mu_r), 1, fp) != 1) {
       -    cerr << "params.mu_r" << endl; exit(err); }
       -  ++err;
       -  if (fread(&params.rho, sizeof(params.rho), 1, fp) != 1) {
       -    cerr << "params.rho" << endl; exit(err); }
       -  ++err;
       -  if (fread(&params.contactmodel, sizeof(params.contactmodel), 1, fp) != 1) {
       -    cerr << "params.contactmodel" << endl; exit(err); }
       -  ++err;
       -  if (fread(&params.kappa, sizeof(params.kappa), 1, fp) != 1) {
       -    cerr << "params.kappa" << endl; exit(err); }
       -  ++err;
       -  if (fread(&params.db, sizeof(params.db), 1, fp) != 1) {
       -    cerr << "params.db" << endl; exit(err); }
       -  ++err;
       -  if (fread(&params.V_b, sizeof(params.V_b), 1, fp) != 1) {
       -  cerr << "params.V_b" << endl; exit(err); }
       +  for (i = 0; i<np; ++i)
       +    ifs.read(as_bytes(e.es_dot[i]), sizeof(Float));
       +  for (i = 0; i<np; ++i)
       +    ifs.read(as_bytes(e.es[i]), sizeof(Float));
       +  for (i = 0; i<np; ++i)
       +    ifs.read(as_bytes(e.ev_dot[i]), sizeof(Float));
       +  for (i = 0; i<np; ++i)
       +    ifs.read(as_bytes(e.ev[i]), sizeof(Float));
       +  for (i = 0; i<np; ++i)
       +    ifs.read(as_bytes(e.p[i]), sizeof(Float));
       +
       +  // Read constant parameters
       +  ifs.read(as_bytes(params.g), sizeof(params.g));
       +  ifs.read(as_bytes(params.k_n), sizeof(params.k_n));
       +  ifs.read(as_bytes(params.k_t), sizeof(params.k_t));
       +  ifs.read(as_bytes(params.k_r), sizeof(params.k_r));
       +  ifs.read(as_bytes(params.gamma_n), sizeof(params.gamma_n));
       +  ifs.read(as_bytes(params.gamma_t), sizeof(params.gamma_t));
       +  ifs.read(as_bytes(params.gamma_r), sizeof(params.gamma_r));
       +  ifs.read(as_bytes(params.mu_s), sizeof(params.mu_s));
       +  ifs.read(as_bytes(params.mu_d), sizeof(params.mu_d));
       +  ifs.read(as_bytes(params.mu_r), sizeof(params.mu_r));
       +  ifs.read(as_bytes(params.rho), sizeof(params.rho));
       +  ifs.read(as_bytes(params.contactmodel), sizeof(params.contactmodel));
       +  ifs.read(as_bytes(params.kappa), sizeof(params.kappa));
       +  ifs.read(as_bytes(params.db), sizeof(params.db));
       +  ifs.read(as_bytes(params.V_b), sizeof(params.V_b));
        
          // Read wall parameters
       -  ++err;
       -  if (fread(&walls.nw, sizeof(walls.nw), 1, fp) != 1) {
       -    cerr << "walls.nw" << endl; exit(err); }
       -  // Allocate host memory for walls
       -  // Wall normal (x,y,z), w: wall position on axis parallel to wall normal
       -  // Wall mass (x), velocity (y), force (z), and deviatoric stress (w)
       -  walls.nx   = new Float4[walls.nw];
       -  walls.mvfd = new Float4[walls.nw]; 
       -
       -  ++err;
       -  if (fread(&walls.wmode, sizeof(walls.wmode[0]), walls.nw, fp) != walls.nw) {
       -    cerr << "walls.wmode" << endl; exit(err); }
       -  ++err;
       -  if (fread(&walls.nx, sizeof(Float4), walls.nw, fp) != 1) {
       -    cerr << "walls.nx" << endl; exit(err); }
       -  ++err;
       -  if (fread(&walls.mvfd, sizeof(Float4), walls.nw, fp) != 1) {
       -    cerr << "walls.mvfd" << endl; exit(err); }
       -  ++err;
       -  if (fread(&walls.gamma_wn, sizeof(walls.gamma_wn), 1, fp) != 1) {
       -    cerr << "walls.gamma_wn" << endl; exit(err); }
       -  ++err;
       -  if (fread(&walls.gamma_wt, sizeof(walls.gamma_wt), 1, fp) != 1) {
       -    cerr << "walls.gamma_wt" << endl; exit(err); }
       -  ++err;
       -  if (fread(&walls.gamma_wr, sizeof(walls.gamma_wr), 1, fp) != 1) {
       -    cerr << "walls.gamma_wr" << endl; exit(err); }
       -
       +  ifs.read(as_bytes(walls.nw), sizeof(walls.nw));
          if (walls.nw > MAXWALLS) {
            cerr << "Error; MAXWALLS (" << MAXWALLS << ") in datatypes.h "
              << "is smaller than the number of walls specified in the "
              << "input file (" << walls.nw << ").\n";
       +    exit(1);
       +  }
       +
       +  // Allocate host memory for walls
       +  // Wall normal (x,y,z), w: wall position on axis parallel to wall normal
       +  // Wall mass (x), velocity (y), force (z), and deviatoric stress (w)
       +  walls.nx    = new Float4[walls.nw];
       +  walls.mvfd  = new Float4[walls.nw];
       +  walls.force = new Float[walls.nw*np];
       +
       +  ifs.read(as_bytes(walls.wmode), sizeof(walls.wmode));
       +  for (i = 0; i<walls.nw; ++i) {
       +    ifs.read(as_bytes(walls.nx[i].x), sizeof(Float));
       +    ifs.read(as_bytes(walls.nx[i].y), sizeof(Float));
       +    ifs.read(as_bytes(walls.nx[i].z), sizeof(Float));
       +    ifs.read(as_bytes(walls.nx[i].w), sizeof(Float));
       +  }
       +  for (i = 0; i<walls.nw; ++i) {
       +    ifs.read(as_bytes(walls.mvfd[i].x), sizeof(Float));
       +    ifs.read(as_bytes(walls.mvfd[i].y), sizeof(Float));
       +    ifs.read(as_bytes(walls.mvfd[i].z), sizeof(Float));
       +    ifs.read(as_bytes(walls.mvfd[i].w), sizeof(Float));
          }
       +  ifs.read(as_bytes(walls.gamma_wn), sizeof(walls.gamma_wn));
       +  ifs.read(as_bytes(walls.gamma_wt), sizeof(walls.gamma_wt));
       +  ifs.read(as_bytes(walls.gamma_wr), sizeof(walls.gamma_wr));
        
       -  fclose(fp);
       +  // Close file if it is still open
       +  if (ifs.is_open())
       +    ifs.close();
        
          if (verbose == 1)
            cout << "Done\n";
       t@@ -270,162 +244,128 @@ void DEM::readbin(const char *target)
        // Write DEM data to binary file
        void DEM::writebin(const char *target)
        {
       -  int err = 0;
       +  unsigned int i;
        
          // Open output file
       -  FILE *fp;
       -  if ((fp = fopen(target, "wb")) == NULL) {
       +  std::ofstream ofs(target, std::ios_base::binary);
       +  if (!ofs) {
            std::cerr << "could create output binary file '"
       -      << target << "'.\n";
       -    exit(err); // Return unsuccessful exit status
       +      << target << std::endl;
       +    exit(1); // Return unsuccessful exit status
          }
        
          // If double precision: Values can be written directly
          if (sizeof(Float) == sizeof(double)) {
        
       -    fwrite(&nd, sizeof(nd), 1, fp);
       -    fwrite(&np, sizeof(np), 1, fp);
       -
       -    // Write temporal parameters
       -    ++err;
       -    if (fwrite(&time.dt, sizeof(time.dt), 1, fp) != 1)
       -      exit(err);
       -    ++err;
       -    if (fwrite(&time.current, sizeof(time.current), 1, fp) != 1)
       -      exit(err);
       -    ++err;
       -    if (fwrite(&time.total, sizeof(time.total), 1, fp) != 1)
       -      exit(err);
       -    ++err;
       -    if (fwrite(&time.file_dt, sizeof(time.file_dt), 1, fp) != 1)
       -      exit(err);
       -    ++err;
       -    if (fwrite(&time.step_count, sizeof(time.step_count), 1, fp) != 1)
       -      exit(err);
       +    ofs.write(as_bytes(nd), sizeof(nd));
       +    ofs.write(as_bytes(np), sizeof(np));
       +
       +    // Write time parameters
       +    ofs.write(as_bytes(time.dt), sizeof(time.dt));
       +    ofs.write(as_bytes(time.current), sizeof(time.current));
       +    ofs.write(as_bytes(time.total), sizeof(time.total));
       +    ofs.write(as_bytes(time.file_dt), sizeof(time.file_dt));
       +    ofs.write(as_bytes(time.step_count), sizeof(time.step_count));
        
            // Write grid parameters
       -    ++err;
       -    if (fwrite(&grid.origo, sizeof(grid.origo[0]), nd, fp) != nd)
       -      exit(err);
       -    ++err;
       -    if (fwrite(&grid.L, sizeof(grid.L[0]), nd, fp) != nd)
       -      exit(err);
       -    ++err;
       -    if (fwrite(&grid.num, sizeof(grid.num[0]), nd, fp) != nd)
       -      exit(err);
       -    ++err;
       -    if (fwrite(&grid.periodic, sizeof(grid.periodic), 1, fp) != 1)
       -      exit(err);
       +    ofs.write(as_bytes(grid.origo), sizeof(grid.origo));
       +    ofs.write(as_bytes(grid.L), sizeof(grid.L));
       +    ofs.write(as_bytes(grid.num), sizeof(grid.num));
       +    ofs.write(as_bytes(grid.periodic), sizeof(grid.periodic));
        
            // Write kinematic values
       -    ++err;
       -    if (fwrite(&k.x, sizeof(Float4), np, fp) != np)
       -      exit(err);
       -    ++err;
       -    if (fwrite(&k.xysum, sizeof(Float2), np, fp) != np)
       -      exit(err);
       -    ++err;
       -    if (fwrite(&k.vel, sizeof(Float4), np, fp) != np)
       -      exit(err);
       -    ++err;
       -    if (fwrite(&k.force, sizeof(Float4), np, fp) != np)
       -      exit(err);
       -    ++err;
       -    if (fwrite(&k.angpos, sizeof(Float4), np, fp) != np)
       -      exit(err);
       -    ++err;
       -    if (fwrite(&k.angvel, sizeof(Float4), np, fp) != np)
       -      exit(err);
       -    ++err;
       -    if (fwrite(&k.torque, sizeof(Float4), np, fp) != np)
       -      exit(err);
       +    for (i = 0; i<np; ++i) {
       +      ofs.write(as_bytes(k.x[i].x), sizeof(Float));
       +      ofs.write(as_bytes(k.x[i].y), sizeof(Float));
       +      ofs.write(as_bytes(k.x[i].z), sizeof(Float));
       +      ofs.write(as_bytes(k.x[i].w), sizeof(Float));
       +    }
       +    for (i = 0; i<np; ++i) {
       +      ofs.write(as_bytes(k.xysum[i].x), sizeof(Float));
       +      ofs.write(as_bytes(k.xysum[i].y), sizeof(Float));
       +    }
       +    for (i = 0; i<np; ++i) {
       +      ofs.write(as_bytes(k.vel[i].x), sizeof(Float));
       +      ofs.write(as_bytes(k.vel[i].y), sizeof(Float));
       +      ofs.write(as_bytes(k.vel[i].z), sizeof(Float));
       +      ofs.write(as_bytes(k.vel[i].w), sizeof(Float));
       +    }
       +    for (i = 0; i<np; ++i) {
       +      ofs.write(as_bytes(k.force[i].x), sizeof(Float));
       +      ofs.write(as_bytes(k.force[i].y), sizeof(Float));
       +      ofs.write(as_bytes(k.force[i].z), sizeof(Float));
       +      //ofs.write(as_bytes(k.force[i].w), sizeof(Float));
       +    }
       +    for (i = 0; i<np; ++i) {
       +      ofs.write(as_bytes(k.angpos[i].x), sizeof(Float));
       +      ofs.write(as_bytes(k.angpos[i].y), sizeof(Float));
       +      ofs.write(as_bytes(k.angpos[i].z), sizeof(Float));
       +      //ofs.write(as_bytes(k.angpos[i].w), sizeof(Float));
       +    }
       +    for (i = 0; i<np; ++i) {
       +      ofs.write(as_bytes(k.angvel[i].x), sizeof(Float));
       +      ofs.write(as_bytes(k.angvel[i].y), sizeof(Float));
       +      ofs.write(as_bytes(k.angvel[i].z), sizeof(Float));
       +      //ofs.write(as_bytes(k.angvel[i].w), sizeof(Float));
       +    }
       +    for (i = 0; i<np; ++i) {
       +      ofs.write(as_bytes(k.torque[i].x), sizeof(Float));
       +      ofs.write(as_bytes(k.torque[i].y), sizeof(Float));
       +      ofs.write(as_bytes(k.torque[i].z), sizeof(Float));
       +      //ofs.write(as_bytes(k.torque[i].w), sizeof(Float));
       +    }
        
            // Write energies
       -    ++err;
       -    if (fwrite(&e.es_dot, sizeof(e.es_dot[0]), np, fp) != np)
       -      exit(err);
       -    ++err;
       -    if (fwrite(&e.es, sizeof(e.es[0]), np, fp) != np)
       -      exit(err);
       -    ++err;
       -    if (fwrite(&e.ev_dot, sizeof(e.ev_dot[0]), np, fp) != np)
       -      exit(err);
       -    ++err;
       -    if (fwrite(&e.ev, sizeof(e.ev[0]), np, fp) != np)
       -      exit(err);
       -    ++err;
       -    if (fwrite(&e.p, sizeof(e.p[0]), np, fp) != np)
       -      exit(err);
       -
       -    // Write constant, global physical parameters
       -    ++err;
       -    if (fwrite(&params.g, sizeof(params.g[0]), nd, fp) != nd)
       -      exit(err);
       -    ++err;
       -    if (fwrite(&params.k_n, sizeof(params.k_n), 1, fp) != 1)
       -      exit(err);
       -    ++err;
       -    if (fwrite(&params.k_t, sizeof(params.k_t), 1, fp) != 1)
       -      exit(err);
       -    ++err;
       -    if (fwrite(&params.k_r, sizeof(params.k_r), 1, fp) != 1)
       -      exit(err);
       -    ++err;
       -    if (fwrite(&params.gamma_n, sizeof(params.gamma_n), 1, fp) != 1)
       -      exit(err);
       -    ++err;
       -    if (fwrite(&params.gamma_t, sizeof(params.gamma_t), 1, fp) != 1)
       -      exit(err);
       -    ++err;
       -    if (fwrite(&params.gamma_r, sizeof(params.gamma_r), 1, fp) != 1)
       -      exit(err);
       -    ++err;
       -    if (fwrite(&params.mu_s, sizeof(params.mu_s), 1, fp) != 1)
       -      exit(err);
       -    ++err;
       -    if (fwrite(&params.mu_d, sizeof(params.mu_d), 1, fp) != 1)
       -      exit(err);
       -    ++err;
       -    if (fwrite(&params.mu_r, sizeof(params.mu_r), 1, fp) != 1)
       -      exit(err);
       -    ++err;
       -    if (fwrite(&params.rho, sizeof(params.rho), 1, fp) != 1)
       -      exit(err);
       -    ++err;
       -    if (fwrite(&params.contactmodel, sizeof(params.contactmodel), 1, fp) != 1)
       -      exit(err);
       -    ++err;
       -    if (fwrite(&params.kappa, sizeof(params.kappa), 1, fp) != 1)
       -      exit(err);
       -    ++err;
       -    if (fwrite(&params.db, sizeof(params.db), 1, fp) != 1)
       -      exit(err);
       -    ++err;
       -    if (fwrite(&params.V_b, sizeof(params.V_b), 1, fp) != 1)
       -      exit(err);
       -
       -    // Write walls parameters
       -    ++err;
       -    if (fwrite(&walls.nw, sizeof(walls.nw), 1, fp) != 1)
       -      exit(err);
       -    ++err;
       -    if (fwrite(&walls.wmode, sizeof(walls.wmode[0]), walls.nw, fp) != walls.nw)
       -      exit(err);
       -    ++err;
       -    if (fwrite(&walls.nx, sizeof(Float4), walls.nw, fp) != walls.nw)
       -      exit(err);
       -    ++err;
       -    if (fwrite(&walls.mvfd, sizeof(Float4), walls.nw, fp) != walls.nw)
       -      exit(err);
       -    ++err;
       -    if (fwrite(&walls.gamma_wn, sizeof(walls.gamma_wn), 1, fp) != 1)
       -      exit(err);
       -    ++err;
       -    if (fwrite(&walls.gamma_wt, sizeof(walls.gamma_wt), 1, fp) != 1)
       -      exit(err);
       -    ++err;
       -    if (fwrite(&walls.gamma_wr, sizeof(walls.gamma_wr), 1, fp) != 1)
       -      exit(err);
       +    for (i = 0; i<np; ++i)
       +      ofs.write(as_bytes(e.es_dot[i]), sizeof(Float));
       +    for (i = 0; i<np; ++i)
       +      ofs.write(as_bytes(e.es[i]), sizeof(Float));
       +    for (i = 0; i<np; ++i)
       +      ofs.write(as_bytes(e.ev_dot[i]), sizeof(Float));
       +    for (i = 0; i<np; ++i)
       +      ofs.write(as_bytes(e.ev[i]), sizeof(Float));
       +    for (i = 0; i<np; ++i)
       +      ofs.write(as_bytes(e.p[i]), sizeof(Float));
       +
       +    // Write constant parameters
       +    ofs.write(as_bytes(params.g), sizeof(params.g));
       +    ofs.write(as_bytes(params.k_n), sizeof(params.k_n));
       +    ofs.write(as_bytes(params.k_t), sizeof(params.k_t));
       +    ofs.write(as_bytes(params.k_r), sizeof(params.k_r));
       +    ofs.write(as_bytes(params.gamma_n), sizeof(params.gamma_n));
       +    ofs.write(as_bytes(params.gamma_t), sizeof(params.gamma_t));
       +    ofs.write(as_bytes(params.gamma_r), sizeof(params.gamma_r));
       +    ofs.write(as_bytes(params.mu_s), sizeof(params.mu_s));
       +    ofs.write(as_bytes(params.mu_d), sizeof(params.mu_d));
       +    ofs.write(as_bytes(params.mu_r), sizeof(params.mu_r));
       +    ofs.write(as_bytes(params.rho), sizeof(params.rho));
       +    ofs.write(as_bytes(params.contactmodel), sizeof(params.contactmodel));
       +    ofs.write(as_bytes(params.kappa), sizeof(params.kappa));
       +    ofs.write(as_bytes(params.db), sizeof(params.db));
       +    ofs.write(as_bytes(params.V_b), sizeof(params.V_b));
       +
       +    // Write wall parameters
       +    ofs.write(as_bytes(walls.nw), sizeof(walls.nw));
       +    ofs.write(as_bytes(walls.wmode), sizeof(walls.wmode));
       +    for (i = 0; i<walls.nw; ++i) {
       +      ofs.write(as_bytes(walls.nx[i].x), sizeof(Float));
       +      ofs.write(as_bytes(walls.nx[i].y), sizeof(Float));
       +      ofs.write(as_bytes(walls.nx[i].z), sizeof(Float));
       +      ofs.write(as_bytes(walls.nx[i].w), sizeof(Float));
       +    }
       +    for (i = 0; i<walls.nw; ++i) {
       +      ofs.write(as_bytes(walls.mvfd[i].x), sizeof(Float));
       +      ofs.write(as_bytes(walls.mvfd[i].y), sizeof(Float));
       +      ofs.write(as_bytes(walls.mvfd[i].z), sizeof(Float));
       +      ofs.write(as_bytes(walls.mvfd[i].w), sizeof(Float));
       +    }
       +    ofs.write(as_bytes(walls.gamma_wn), sizeof(walls.gamma_wn));
       +    ofs.write(as_bytes(walls.gamma_wt), sizeof(walls.gamma_wt));
       +    ofs.write(as_bytes(walls.gamma_wr), sizeof(walls.gamma_wr));
       +
       +    // Close file if it is still open
       +    if (ofs.is_open())
       +      ofs.close();
        
          } else {
            std::cerr << "Can't write output when in single precision mode.\n";
 (DIR) diff --git a/src/integration.cuh b/src/integration.cuh
       t@@ -11,6 +11,7 @@ __global__ void integrate(Float4* dev_x_sorted, Float4* dev_vel_sorted, // Input
                                  Float4* dev_angvel_sorted,
                                  Float4* dev_x, Float4* dev_vel, Float4* dev_angvel, // Output
                                  Float4* dev_force, Float4* dev_torque, Float4* dev_angpos, // Input
       +                          Float2* dev_xysum,
                                  unsigned int* dev_gridParticleIndex) // Input: Sorted-Unsorted key
        {
          unsigned int idx = threadIdx.x + blockIdx.x * blockDim.x; // Thread id
       t@@ -24,6 +25,8 @@ __global__ void integrate(Float4* dev_x_sorted, Float4* dev_vel_sorted, // Input
            Float4 torque = dev_torque[orig_idx];
            Float4 angpos = dev_angpos[orig_idx];
        
       +    Float2 xysum  = MAKE_FLOAT2(0.0f, 0.0f);
       +
            // Initialize acceleration vectors to zero
            Float4 acc    = MAKE_FLOAT4(0.0f, 0.0f, 0.0f, 0.0f);
            Float4 angacc = MAKE_FLOAT4(0.0f, 0.0f, 0.0f, 0.0f);
       t@@ -103,7 +106,10 @@ __global__ void integrate(Float4* dev_x_sorted, Float4* dev_vel_sorted, // Input
        
            // Add x-displacement for this time step to 
            // sum of x-displacements
       -    x.w += vel.x * dt + (acc.x * dt*dt)/2.0f;
       +    //x.w += vel.x * dt + (acc.x * dt*dt)/2.0f;
       +    xysum.x += vel.x * dt + (acc.x * dt*dt)/2.0f;
       +    xysum.y += vel.y * dt + (acc.y * dt*dt)/2.0f;
       +
        
            // Move particle across boundary if it is periodic
            if (devC_grid.periodic == 1) {
       t@@ -126,6 +132,7 @@ __global__ void integrate(Float4* dev_x_sorted, Float4* dev_vel_sorted, // Input
            __syncthreads();
        
            // Store data in global memory at original, pre-sort positions
       +    dev_xysum[orig_idx] += xysum;
            dev_angvel[orig_idx] = angvel;
            dev_vel[orig_idx]    = vel;
            dev_angpos[orig_idx] = angpos;
 (DIR) diff --git a/src/main.cpp b/src/main.cpp
       t@@ -11,6 +11,7 @@
        
        // Including library files
        #include <iostream>
       +#include <string>
        
        // Including user files
        #include "constants.h"
       t@@ -23,16 +24,17 @@
        //////////////////
        // The main loop returns the value 0 to the shell, if the program terminated
        // successfully, and 1 if an error occured which caused the program to crash.
       -int main(int argc, char *argv[]) 
       +int main(const int argc, const char *argv[]) 
        {
        
          // LOCAL VARIABLE DECLARATIONS
          if(!argv[1] || argc != 2) {
            std::cerr << "Error: Specify input binary file, e.g. "
       -      << argv[0] << " input/test.bin\n";
       +      << argv[0] << " input/test.bin" << std::endl;
            return 1; // Return unsuccessful exit status
          }
       -  char *inputbin = argv[1]; // Input binary file read from command line argument
       +  //char *inputbin = argv[1]; // Input binary file read from command line argument
       +  std::string inputbin = argv[1];
        
          int verbose = 1;
          int checkVals = 1;
       t@@ -51,13 +53,12 @@ int main(int argc, char *argv[])
              << "`-------------------------------------ยด\n";
          }
        
       -  std::cout << "Input file: " << inputbin << "\n";
       +  std::cout << "Input file: " << inputbin << std::endl;
          
          // Create DEM class, read data from input binary, check values
          DEM dem(inputbin, verbose, checkVals);
        
          // Start iterating through time
       -  std::cout << "\nStarting time loop...\n";
          dem.startTime();
        
          // Terminate execution
 (DIR) diff --git a/src/sphere.cpp b/src/sphere.cpp
       t@@ -1,7 +1,7 @@
        #include <iostream>
       +#include <string>
        #include <cstdio>
        #include <cstdlib>
       -#include <cstring>
        
        #include "typedefs.h"
        #include "datatypes.h"
       t@@ -10,7 +10,7 @@
        
        // Constructor: Reads an input binary, and optionally checks
        // and reports the values
       -DEM::DEM(char *inputbin, 
       +DEM::DEM(const std::string inputbin, 
            const int verbosity,
            const int checkVals)
        : verbose(verbosity)
       t@@ -18,24 +18,17 @@ DEM::DEM(char *inputbin,
          using std::cout;
          using std::cerr;
        
       -  // Get basename from input file
       -  char *name = inputbin;
       -  char *sid = name;
       -  while (*name) {
       -    if (*name++ == '/') {
       -      sid = name;
       -    }
       -  } // sid is now everything after the last '/' char
       -  char *lastdot = strrchr(sid, '.');
       -  if (lastdot != NULL)
       -    *lastdot = '\0'; // Put in string-end char. at last dot
       -
       -
       -  // Initialize CUDA
       -  initializeGPU();
       +  // Extract sid from input binary filename 
       +  size_t dotpos = inputbin.rfind('.');
       +  size_t slashpos = inputbin.rfind('/');
       +  if (slashpos - dotpos < 1) {
       +    std::cerr << "Error! Unable to extract simulation id "
       +      << "from input file name.\n";
       +  }
       +  sid = inputbin.substr(slashpos+1, dotpos-slashpos-1);
        
          // Read target input binary
       -  readbin(inputbin);
       +  readbin(inputbin.c_str());
        
          // Check numeric values of chosen parameters
          if (checkVals == 1)
       t@@ -45,9 +38,9 @@ DEM::DEM(char *inputbin,
          if (verbose == 1) {
            if (params.contactmodel == 1)
              cout << "  - Contact model: Linear-elastic-viscous (n), visco-frictional (t)\n";
       -    if (params.contactmodel == 2)
       +    else if (params.contactmodel == 2)
              cout << "  - Contact model: Linear-elastic-visco-frictional\n";
       -    if (params.contactmodel == 3)
       +    else if (params.contactmodel == 3)
              cout << "  - Contact model: Nonlinear-elastic-visco-frictional\n";
            else {
              cerr << "Error: Contact model value not understood.\n";
       t@@ -86,6 +79,12 @@ DEM::DEM(char *inputbin,
                << grid.num[2];
            cout << " cells\n";
          }
       +
       +  writebin(("output/" + sid + ".output0.bin").c_str());
       +
       +  // Initialize CUDA
       +  initializeGPU();
       +
        }
        
        // Destructor: Liberates dynamically allocated host memory
 (DIR) diff --git a/src/sphere.h b/src/sphere.h
       t@@ -10,8 +10,11 @@ class DEM {
          // Values and functions only accessible from the class internally
          private:
        
       +    // Input filename (full path)
       +    std::string inputbin;
       +
            // Simulation ID
       -    char *sid;
       +    std::string sid;
        
            // Output level
            int verbose;
       t@@ -73,7 +76,7 @@ class DEM {
          public:
        
            // Constructor, some parameters with default values
       -    DEM(char *inputbin, 
       +    DEM(std::string inputbin, 
                const int verbosity = 1,
                const int checkVals = 1);
        
 (DIR) diff --git a/src/typedefs.h b/src/typedefs.h
       t@@ -13,8 +13,9 @@
        typedef Float Float;
        typedef Float3 Float3;
        typedef Float4 Float4;
       -#define MAKE_FLOAT3(x, y, z) make_Float3(x, y, z)
       -#define MAKE_FLOAT4(x, y, z, w) make_Float4(x, y, z, w)
       +#define MAKE_FLOAT2(x, y) make_float2(x, y)
       +#define MAKE_FLOAT3(x, y, z) make_float3(x, y, z)
       +#define MAKE_FLOAT4(x, y, z, w) make_float4(x, y, z, w)
        */
        
        
       t@@ -27,6 +28,7 @@ typedef double Float;
        typedef double2 Float2;
        typedef double3 Float3;
        typedef double4 Float4;
       +#define MAKE_FLOAT2(x, y) make_double2(x, y)
        #define MAKE_FLOAT3(x, y, z) make_double3(x, y, z)
        #define MAKE_FLOAT4(x, y, z, w) make_double4(x, y, z, w)
        //*/
 (DIR) diff --git a/src/vector_arithmetic.h b/src/vector_arithmetic.h
       t@@ -140,7 +140,10 @@ inline __host__ __device__ void operator+=(uint2 &a, uint b)
            a.x += b; a.y += b;
        }
        
       -
       +inline __host__ __device__ void operator+=(Float2 &a, Float2 b)
       +{
       +    a.x += b.x; a.y += b.y;
       +}
        inline __host__ __device__ Float3 operator+(Float3 a, Float3 b)
        {
            return MAKE_FLOAT3(a.x + b.x, a.y + b.y, a.z + b.z);