tRaytracer incorporated into sphere program, and is working - sphere - GPU-based 3D discrete element method algorithm with optional fluid coupling
 (HTM) git clone git://src.adamsgaard.dk/sphere
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) LICENSE
       ---
 (DIR) commit ede3f29203f93318355cd254be5204f7fe888bb6
 (DIR) parent 490068513c59308c71cd7a008d70b9f532280e60
 (HTM) Author: Anders Damsgaard <adc@geo.au.dk>
       Date:   Fri,  9 Nov 2012 21:23:30 +0100
       
       Raytracer incorporated into sphere program, and is working
       
       Diffstat:
         M src/Makefile                        |       1 +
         A src/colorbar.h                      |      22 ++++++++++++++++++++++
         M src/device.cu                       |       5 ++++-
         M src/main.cpp                        |      36 ++++++++++++++++++++++++-------
         A src/raytracer.cuh                   |     475 +++++++++++++++++++++++++++++++
         M src/sphere.cpp                      |      19 +++----------------
         M src/sphere.h                        |       8 ++++----
       
       7 files changed, 537 insertions(+), 29 deletions(-)
       ---
 (DIR) diff --git a/src/Makefile b/src/Makefile
       t@@ -66,6 +66,7 @@ INCLUDES+=-I. -I$(SDKPATH)/shared/inc -I$(SDKPATH)/inc -I$(CUDA_INSTALL_PATH)/in
        LDFLAGS+=-L$(CUDA_INSTALL_PATH)/lib
        LDFLAGS+=-L$(SDKPATH)/../../shared/lib -L$(SDKPATH)/../lib 
        LDFLAGS+=-lcutil_x86_64 -lcuda -lcudart
       +LDFLAGS+=-fopenmp
        
        #all: $(CCFILES) $(CUFILES) $(EXECUTABLE) raytracer
        all: $(CCFILES) $(CUFILES) $(EXECUTABLE)
 (DIR) diff --git a/src/colorbar.h b/src/colorbar.h
       t@@ -0,0 +1,22 @@
       +#ifndef COLORBAR_H_
       +#define COLORBAR_H_
       +
       +// Functions that determine red-, green- and blue color components
       +// in a blue-white-red colormap. Ratio should be between 0.0-1.0.
       +
       +__inline__ __host__ __device__ float red(float ratio)
       +{
       +  return fmin(1.0f, 0.209f*ratio*ratio*ratio - 2.49f*ratio*ratio + 3.0f*ratio + 0.0109f);
       +};
       +
       +__inline__ __host__ __device__ float green(float ratio)
       +{
       +  return fmin(1.0f, -2.44f*ratio*ratio + 2.15f*ratio + 0.369f);
       +};
       +
       +__inline__ __host__ __device__ float blue(float ratio)
       +{
       +  return fmin(1.0f, -2.21f*ratio*ratio + 1.61f*ratio + 0.573f);
       +};
       +
       +#endif
 (DIR) diff --git a/src/device.cu b/src/device.cu
       t@@ -195,7 +195,7 @@ __host__ void DEM::transferToConstantDeviceMemory()
          using std::cout;
        
          if (verbose == 1)
       -    cout << "\n  Transfering data to constant device memory:     ";
       +    cout << "  Transfering data to constant device memory:     ";
        
          cudaMemcpyToSymbol("devC_nd", &nd, sizeof(nd));
          cudaMemcpyToSymbol("devC_np", &np, sizeof(np));
       t@@ -463,6 +463,9 @@ __host__ void DEM::startTime()
          cudaThreadSynchronize();
          checkForCudaErrors("Start of startTime()");
        
       +  // Write initial data to output/<sid>.output00000.bin
       +  writebin(("output/" + sid + ".output00000.bin").c_str());
       +
          // Model world variables
          float tic, toc, filetimeclock, time_spent, dev_time_spent;
        
 (DIR) diff --git a/src/main.cpp b/src/main.cpp
       t@@ -12,6 +12,7 @@
        // Including library files
        #include <iostream>
        #include <string>
       +#include <cstdlib>
        
        // Including user files
        #include "constants.h"
       t@@ -28,9 +29,11 @@ int main(const int argc, const char *argv[])
        {
          // Default values
          int verbose = 1;
       -  int checkConstantVals = 0;
       -  int render = 0;
       +  int checkVals = 1;
       +  int render = 0; // whether to render an image
       +  int method = 0; // visualization method
          int nfiles = 0; // number of input files
       +  float max_val = 0.0f;
        
          // Process input parameters
          int i;
       t@@ -46,7 +49,11 @@ int main(const int argc, const char *argv[])
                << "-V, --version\t\tprint version information and exit\n"
                << "-q, --quiet\t\tsuppress status messages to stdout\n"
                << "-r, --render\t\trender input files instead of simulating temporal evolution\n"
       -        << "-cc, --check-constants\t\tcheck values in constant GPU memory" << std::endl;
       +        << "-dcv, --dpnt-check-values\t\tdon't check values before running\n" 
       +        << "Raytracer (-r) specific options:\n"
       +        << "-m <method> <maxval>, --method <method> <maxval>\tcolor visualization method, possible values:\n"
       +        << "\t\t\t\tpres, vel\n"
       +        << std::endl;
              return 0; // Exit with success
            }
        
       t@@ -71,8 +78,17 @@ int main(const int argc, const char *argv[])
            else if (argvi == "-r" || argvi == "--render")
              render = 1;
        
       -    else if (argvi == "-cc" || argvi == "--check-constants")
       -      checkConstantVals = 1;
       +    else if (argvi == "-dcv" || argvi == "--dont-check-values")
       +      checkVals = 0;
       +
       +    else if (argvi == "-m" || argvi == "--method") {
       +      if (std::string(argv[i+1]) == "pres")
       +        method = 1;
       +      //max_val = std::strtof(argv[i+2]);
       +      max_val = atof(argv[i+2]);
       +      i += 2; // skip ahead
       +    }
       +
        
            // The rest of the values must be input binary files
            else {
       t@@ -81,10 +97,14 @@ int main(const int argc, const char *argv[])
              std::cout << argv[0] << ": processing input file: " << argvi << std::endl;
        
              // Create DEM class, read data from input binary, check values
       -      DEM dem(argvi, verbose, checkConstantVals, render);
       +      DEM dem(argvi, verbose, checkVals);
       +
       +      // Render image if requested
       +      if (render == 1)
       +        dem.render(method, max_val);
        
       -      // Start iterating through time, unless user chose to render image
       -      if (render == 0)
       +      // Otherwise, start iterating through time
       +      else
                dem.startTime();
            }
          }
 (DIR) diff --git a/src/raytracer.cuh b/src/raytracer.cuh
       t@@ -0,0 +1,475 @@
       +#ifndef RAYTRACER_CUH_
       +#define RAYTRACER_CUH_
       +
       +#include "colorbar.h"
       +
       +//#include "cuPrintf.cu"
       +
       +// Template for discarding the last term in four-component vector structs
       +__device__ __inline__ float3 f4_to_f3(float4 in) {
       +  return make_float3(in.x, in.y, in.z);
       +}
       +
       +// Kernel for initializing image data
       +__global__ void imageInit(unsigned char* dev_img, unsigned int pixels)
       +{
       +  // Compute pixel position from threadIdx/blockIdx
       +  unsigned int mempos = threadIdx.x + blockIdx.x * blockDim.x;
       +  if (mempos > pixels)
       +    return;
       +
       +  dev_img[mempos*4]     = 255;        // Red channel
       +  dev_img[mempos*4 + 1] = 255;        // Green channel
       +  dev_img[mempos*4 + 2] = 255;        // Blue channel
       +}
       +
       +// Calculate ray origins and directions
       +__global__ void rayInitPerspective(float4* dev_ray_origo, 
       +                                       float4* dev_ray_direction, 
       +                                   float4 eye, 
       +                                   unsigned int width,
       +                                   unsigned int height)
       +{
       +  // Compute pixel position from threadIdx/blockIdx
       +  unsigned int mempos = threadIdx.x + blockIdx.x * blockDim.x;
       +  if (mempos > width*height) 
       +    return;
       +
       +  // Calculate 2D position from linear index
       +  unsigned int i = mempos % width;
       +  unsigned int j = (int)floor((float)mempos/width) % width;
       + 
       +  // Calculate pixel coordinates in image plane
       +  float p_u = devC_imgplane.x + (devC_imgplane.y - devC_imgplane.x)
       +              * (i + 0.5f) / width;
       +  float p_v = devC_imgplane.z + (devC_imgplane.w - devC_imgplane.z)
       +              * (j + 0.5f) / height;
       +
       +  // Write ray origo and direction to global memory
       +  dev_ray_origo[mempos]     = make_float4(devC_eye, 0.0f);
       +  dev_ray_direction[mempos] = make_float4(-devC_d*devC_w + p_u*devC_u + p_v*devC_v, 0.0f);
       +}
       +
       +// Check wether the pixel's viewing ray intersects with the spheres,
       +// and shade the pixel correspondingly
       +__global__ void rayIntersectSpheres(float4* dev_ray_origo, 
       +                                    float4* dev_ray_direction,
       +                                    Float4* dev_x, 
       +                                    unsigned char* dev_img)
       +{
       +  // Compute pixel position from threadIdx/blockIdx
       +  unsigned int mempos = threadIdx.x + blockIdx.x * blockDim.x;
       +  if (mempos > devC_pixels)
       +    return;
       +
       +  // Read ray data from global memory
       +  float3 e = f4_to_f3(dev_ray_origo[mempos]);
       +  float3 d = f4_to_f3(dev_ray_direction[mempos]);
       +  //float  step = length(d);
       +
       +  // Distance, in ray steps, between object and eye initialized with a large value
       +  float tdist = 1e10f;
       +
       +  // Surface normal at closest sphere intersection
       +  float3 n;
       +
       +  // Intersection point coordinates
       +  float3 p;
       +
       +  //cuPrintf("mepos %d\n", mempos);
       +
       +  // Iterate through all particles
       +  for (unsigned int i=0; i<devC_np; ++i) {
       +
       +    // Read sphere coordinate and radius
       +    Float4 x = dev_x[i];
       +    float3 c = make_float3(x.x, x.y, x.z);
       +    float  R = x.w;
       +
       +    //cuPrintf("particle %d at: %f, %f, %f, radius: %f\n", i, c.x, c.y, c.z, R);
       +
       +    // Calculate the discriminant: d = B^2 - 4AC
       +    float Delta = (2.0f*dot(d,(e-c)))*(2.0f*dot(d,(e-c)))  // B^2
       +                  - 4.0f*dot(d,d)        // -4*A
       +                  * (dot((e-c),(e-c)) - R*R);  // C
       +
       +    // If the determinant is positive, there are two solutions
       +    // One where the line enters the sphere, and one where it exits
       +    if (Delta > 0.0f) { 
       +      
       +      // Calculate roots, Shirley 2009 p. 77
       +      float t_minus = ((dot(-d,(e-c)) - sqrt( dot(d,(e-c))*dot(d,(e-c)) - dot(d,d)
       +                      * (dot((e-c),(e-c)) - R*R) ) ) / dot(d,d));
       +
       +      // Check wether intersection is closer than previous values
       +      if (fabs(t_minus) < tdist) {
       +        p = e + t_minus*d;
       +        tdist = fabs(t_minus);
       +        n = normalize(2.0f * (p - c));   // Surface normal
       +      }
       +
       +    } // End of solution branch
       +
       +  } // End of particle loop
       +
       +  // Write pixel color
       +  if (tdist < 1e10f) {
       +
       +    // Lambertian shading parameters
       +    float dotprod = fmax(0.0f,dot(n, devC_light));
       +    float I_d = 40.0f;  // Light intensity
       +    float k_d = 5.0f;  // Diffuse coefficient
       +    
       +    // Ambient shading
       +    float k_a = 10.0f;
       +    float I_a = 5.0f; // 5.0 for black background
       +
       +    // Write shading model values to pixel color channels
       +    dev_img[mempos*4]     = 
       +      (unsigned char) ((k_d * I_d * dotprod + k_a * I_a)*0.48f);
       +    dev_img[mempos*4 + 1] = 
       +      (unsigned char) ((k_d * I_d * dotprod + k_a * I_a)*0.41f);
       +    dev_img[mempos*4 + 2] = 
       +      (unsigned char) ((k_d * I_d * dotprod + k_a * I_a)*0.27f);
       +
       +  }
       +}
       +
       +// Check wether the pixel's viewing ray intersects with the spheres,
       +// and shade the pixel correspondingly using a colormap
       +__global__ void rayIntersectSpheresColormap(float4* dev_ray_origo, 
       +                                            float4* dev_ray_direction,
       +                                            Float4* dev_x, 
       +                                            Float4* dev_vel,
       +                                            Float*  dev_linarr,
       +                                            float max_val,
       +                                            unsigned char* dev_img)
       +{
       +  // Compute pixel position from threadIdx/blockIdx
       +  unsigned int mempos = threadIdx.x + blockIdx.x * blockDim.x;
       +  if (mempos > devC_pixels)
       +    return;
       +
       +  // Read ray data from global memory
       +  float3 e = f4_to_f3(dev_ray_origo[mempos]);
       +  float3 d = f4_to_f3(dev_ray_direction[mempos]);
       +
       +  // Distance, in ray steps, between object and eye initialized with a large value
       +  float tdist = 1e10f;
       +
       +  // Surface normal at closest sphere intersection
       +  float3 n;
       +
       +  // Intersection point coordinates
       +  float3 p;
       +
       +  unsigned int hitidx;
       +
       +  // Iterate through all particles
       +  for (unsigned int i=0; i<devC_np; ++i) {
       +
       +    // Read sphere coordinate and radius
       +    Float4 x = dev_x[i];
       +    float3 c = make_float3(x.x, x.y, x.z);
       +    float  R = x.w;
       +
       +    // Calculate the discriminant: d = B^2 - 4AC
       +    float Delta = (2.0f*dot(d,(e-c)))*(2.0f*dot(d,(e-c)))  // B^2
       +                  - 4.0f*dot(d,d)        // -4*A
       +                  * (dot((e-c),(e-c)) - R*R);  // C
       +
       +    // If the determinant is positive, there are two solutions
       +    // One where the line enters the sphere, and one where it exits
       +    if (Delta > 0.0f) {
       +      
       +      // Calculate roots, Shirley 2009 p. 77
       +      float t_minus = ((dot(-d,(e-c)) - sqrt( dot(d,(e-c))*dot(d,(e-c)) - dot(d,d)
       +                      * (dot((e-c),(e-c)) - R*R) ) ) / dot(d,d));
       +
       +      // Check wether intersection is closer than previous values
       +      if (fabs(t_minus) < tdist) {
       +        p = e + t_minus*d;
       +        tdist = fabs(t_minus);
       +        n = normalize(2.0f * (p - c));   // Surface normal
       +        hitidx = i;
       +      }
       +
       +    } // End of solution branch
       +
       +  } // End of particle loop
       +
       +  // Write pixel color
       +  if (tdist < 1e10) {
       +
       +    // Fetch particle data used for color
       +    float ratio = dev_linarr[hitidx] / max_val;
       +    float fixvel = dev_vel[hitidx].w;
       +
       +    // Make sure the ratio doesn't exceed the 0.0-1.0 interval
       +    if (ratio < 0.01f)
       +      ratio = 0.01f;
       +    if (ratio > 0.99f)
       +      ratio = 0.99f;
       +
       +    // Lambertian shading parameters
       +    float dotprod = fmax(0.0f,dot(n, devC_light));
       +    float I_d = 40.0f;  // Light intensity
       +    float k_d = 5.0f;  // Diffuse coefficient
       +    
       +    // Ambient shading
       +    float k_a = 10.0f;
       +    float I_a = 5.0f;
       +
       +    float redv   = red(ratio);
       +    float greenv = green(ratio);
       +    float bluev  = blue(ratio);
       +
       +    // Make particle dark grey if the horizontal velocity is fixed
       +    if (fixvel > 0.f) {
       +      redv = 0.5;
       +      greenv = 0.5;
       +      bluev = 0.5;
       +    }
       +
       +    // Write shading model values to pixel color channels
       +    dev_img[mempos*4]     = (unsigned char) ((k_d * I_d * dotprod 
       +                            + k_a * I_a)*redv);
       +    dev_img[mempos*4 + 1] = (unsigned char) ((k_d * I_d * dotprod
       +                            + k_a * I_a)*greenv);
       +    dev_img[mempos*4 + 2] = (unsigned char) ((k_d * I_d * dotprod
       +                            + k_a * I_a)*bluev);
       +  }
       +}
       +
       +
       +__host__ void DEM::cameraInit(
       +    const float3 eye,
       +    const float3 lookat, 
       +    const float imgw,
       +    const float focalLength)
       +{
       +  float hw_ratio = height/width;
       +
       +  // Image dimensions in world space (l, r, b, t)
       +  float4 imgplane = make_float4(-0.5f*imgw, 0.5f*imgw, -0.5f*imgw*hw_ratio, 0.5f*imgw*hw_ratio);
       +
       +  // The view vector
       +  float3 view = eye - lookat;
       +
       +  // Construct the camera view orthonormal base
       +  float3 up = make_float3(0.0f, 0.0f, 1.0f);  // Pointing upward along +z
       +  float3 w = -view/length(view);                   // w: Pointing backwards
       +  float3 u = cross(up, w) / length(cross(up, w));
       +  float3 v = cross(w, u);
       +
       +  unsigned int pixels = width*height;
       +
       +  // Focal length 20% of eye vector length
       +  float d = length(view)*0.8f;
       +
       +  // Light direction (points towards light source)
       +  float3 light = normalize(-1.0f*eye*make_float3(1.0f, 0.2f, 0.6f));
       +
       +  if (verbose == 1)
       +    std::cout << "  Transfering camera values to constant memory: ";
       +
       +  cudaMemcpyToSymbol("devC_u", &u, sizeof(u));
       +  cudaMemcpyToSymbol("devC_v", &v, sizeof(v));
       +  cudaMemcpyToSymbol("devC_w", &w, sizeof(w));
       +  cudaMemcpyToSymbol("devC_eye", &eye, sizeof(eye));
       +  cudaMemcpyToSymbol("devC_imgplane", &imgplane, sizeof(imgplane));
       +  cudaMemcpyToSymbol("devC_d", &d, sizeof(d));
       +  cudaMemcpyToSymbol("devC_light", &light, sizeof(light));
       +  cudaMemcpyToSymbol("devC_pixels", &pixels, sizeof(pixels));
       +
       +  if (verbose == 1)
       +    std::cout << "Done" << std::endl;
       +  checkForCudaErrors("During cameraInit");
       +}
       +
       +// Allocate global device memory
       +__host__ void DEM::rt_allocateGlobalDeviceMemory(void)
       +{
       +  if (verbose == 1)
       +    std::cout << "  Allocating device memory: ";
       +  cudaMalloc((void**)&dev_img, width*height*4*sizeof(unsigned char));
       +  cudaMalloc((void**)&dev_ray_origo, width*height*sizeof(float4));
       +  cudaMalloc((void**)&dev_ray_direction, width*height*sizeof(float4));
       +  if (verbose == 1)
       +    std::cout << "Done" << std::endl;
       +  checkForCudaErrors("During rt_allocateGlobalDeviceMemory()");
       +}
       +
       +
       +  // Free dynamically allocated device memory
       +__host__ void DEM::rt_freeGlobalDeviceMemory(void)
       +{
       +  if (verbose == 1)
       +    std::cout << "  Freeing device memory: ";
       +  cudaFree(dev_img);
       +  cudaFree(dev_ray_origo);
       +  cudaFree(dev_ray_direction);
       +  if (verbose == 1)
       +    std::cout << "Done" << std::endl;
       +  checkForCudaErrors("During rt_freeGlobalDeviceMemory()");
       +}
       +
       +// Transfer image data from device to host
       +__host__ void DEM::rt_transferFromGlobalDeviceMemory(void)
       +{
       +  if (verbose == 1)
       +    std::cout << "  Transfering image data: device -> host: ";
       +  cudaMemcpy(img, dev_img, width*height*4*sizeof(unsigned char), cudaMemcpyDeviceToHost);
       +  if (verbose == 1)
       +    std::cout << "Done" << std::endl;
       +  checkForCudaErrors("During rt_transferFromGlobalDeviceMemory()");
       +}
       +
       +// Find the max. spatial positions of the particles, and return these as a vector
       +__host__ float3 DEM::maxPos()
       +{
       +  int i;
       +  float3 shared_max = make_float3(0.0f, 0.0f, 0.0f);
       +
       +#pragma omp parallel if(np > 100)
       +  {
       +    // Max. val per thread
       +    float3 max = make_float3(0.0f, 0.0f, 0.0f);
       +
       +#pragma omp for nowait
       +    // Find max val. per thread
       +    for (i = 0; i<np; ++i) {
       +      max.x = std::max(max.x, (float)k.x[i].x);
       +      max.y = std::max(max.y, (float)k.x[i].y);
       +      max.z = std::max(max.z, (float)k.x[i].z);
       +    }
       +
       +    // Find total max, by comparing one thread with the
       +    // shared result, one at a time
       +#pragma omp critical
       +    {
       +      shared_max.x = std::max(shared_max.x, max.x);
       +      shared_max.y = std::max(shared_max.y, max.y);
       +      shared_max.z = std::max(shared_max.z, max.z);
       +    }
       +  }
       +
       +  // Return final result
       +  return shared_max;
       +}
       +
       +// Wrapper for the rt kernel
       +__host__ void DEM::render(
       +    const int method,
       +    const float maxval,
       +    const float focalLength,
       +    const unsigned int img_width,
       +    const unsigned int img_height)
       +    /*float4* p, unsigned int np,
       +    rgba* img, unsigned int width, unsigned int height,
       +    f3 origo, f3 L, f3 eye, f3 lookat, float imgw,
       +    int visualize, float max_val,
       +    float* fixvel,
       +    float* xsum,
       +    float* pres,
       +    float* es_dot,
       +    float* es,
       +    float* vel)*/
       +{
       +  // Namespace directives
       +  using std::cout;
       +  using std::cerr;
       +  using std::endl;
       +
       +  //cudaPrintfInit();
       +
       +  // Save image dimensions in class object
       +  width = img_width;
       +  height = img_height;
       +
       +  // Allocate memory for output image
       +  img = new rgba[height*width];
       +  rt_allocateGlobalDeviceMemory();
       +
       +  // Arrange thread/block structure
       +  unsigned int pixels = width*height;
       +  const unsigned int threadsPerBlock = 256;
       +  const unsigned int blocksPerGrid = iDivUp(pixels, threadsPerBlock);
       +
       +  // Initialize image to background color
       +  imageInit<<< blocksPerGrid, threadsPerBlock >>>(dev_img, pixels);
       +
       +  // Initialize camera values and transfer to constant memory
       +  float imgw = grid.L[0]*1.35f; // Screen width in world coordinates
       +  float3 lookat = maxPos() / 2.0f; // Look at the centre of the mean positions
       +  float3 eye = make_float3(grid.L[0] * 0.5f,
       +                                 grid.L[1] * -5.0f,
       +                           grid.L[2] * 0.5f);
       +  cameraInit(eye, lookat, imgw, focalLength);
       +
       +  // Construct rays for perspective projection
       +  rayInitPerspective<<< blocksPerGrid, threadsPerBlock >>>(
       +      dev_ray_origo, dev_ray_direction, 
       +      make_float4(eye.x, eye.y, eye.z, 0.0f), width, height);
       +  cudaThreadSynchronize();
       +
       +  // Find closest intersection between rays and spheres
       +  if (method == 1) { // Visualize pressure
       +    if (verbose == 1)
       +    cout << "  Pressure color map range: [0, " << maxval << "] Pa\n"; 
       +    rayIntersectSpheresColormap<<< blocksPerGrid, threadsPerBlock >>>(
       +        dev_ray_origo, dev_ray_direction,
       +        dev_x, dev_vel,
       +        dev_p, maxval,
       +        dev_img);
       +
       +  /*} else if (visualize == 2) { // es_dot visualization
       +    if (verbose == 1)
       +      cout << "  Shear heat production rate color map range: [0, " << maxval << "] W\n";
       +    rayIntersectSpheresColormap<<< blocksPerGrid, threadsPerBlock >>>(
       +        _ray_origo, _ray_direction,
       +        _p, _fixvel, _linarr, maxval, _img);
       +  } else if (visualize == 3) { // es visualization
       +    if (verbose == 1)
       +      cout << "  Total shear heat color map range: [0, " << maxval << "] J\n";
       +    rayIntersectSpheresColormap<<< blocksPerGrid, threadsPerBlock >>>(
       +        _ray_origo, _ray_direction,
       +        _p, _fixvel, _linarr, maxval, _img);
       +  } else if (visualize == 4) { // velocity visualization
       +    if (verbose == 1)
       +      cout << "  Velocity color map range: [0, " << maxval << "] m/s\n";
       +    rayIntersectSpheresColormap<<< blocksPerGrid, threadsPerBlock >>>(
       +        _ray_origo, _ray_direction,
       +        _p, _fixvel, _linarr, maxval, _img);
       +  } else if (visualize == 5) { // xsum visualization
       +    if (verbose == 1)
       +      cout << "  XSum color map range: [0, " << maxval << "] m\n";
       +    rayIntersectSpheresColormap<<< blocksPerGrid, threadsPerBlock >>>(
       +        _ray_origo, _ray_direction,
       +        _p, _fixvel, _linarr, maxval, _img); */
       +  } else { // Normal visualization
       +    rayIntersectSpheres<<< blocksPerGrid, threadsPerBlock >>>(
       +        dev_ray_origo, dev_ray_direction,
       +        dev_x, dev_img);
       +  }
       +
       +  // Make sure all threads are done before continuing CPU control sequence
       +  cudaThreadSynchronize();
       +  
       +  // Check for errors
       +  checkForCudaErrors("CUDA error after kernel execution");
       +
       +  // Copy image data from global device memory to host memory
       +  rt_transferFromGlobalDeviceMemory();
       +
       +  // Free dynamically allocated global device memory
       +  rt_freeGlobalDeviceMemory();
       +
       +  //cudaPrintfDisplay(stdout, true);
       +
       +  // Write image to PPM file
       +  writePPM(("img_out/" + sid + ".ppm").c_str());
       +
       +}
       +
       +#endif
 (DIR) diff --git a/src/sphere.cpp b/src/sphere.cpp
       t@@ -2,6 +2,7 @@
        #include <string>
        #include <cstdio>
        #include <cstdlib>
       +#include <cmath>
        
        #include "typedefs.h"
        #include "datatypes.h"
       t@@ -12,8 +13,7 @@
        // and reports the values
        DEM::DEM(const std::string inputbin, 
            const int verbosity,
       -    const int checkVals,
       -    const int render_img)
       +    const int checkVals)
        : verbose(verbosity)
        {
          using std::cout;
       t@@ -39,9 +39,6 @@ DEM::DEM(const std::string inputbin,
          if (verbose == 1)
            reportValues();
            
       -  // Write initial data to output/<sid>.output00000.bin
       -  writebin(("output/" + sid + ".output00000.bin").c_str());
       -
          // Initialize CUDA
          initializeGPU();
        
       t@@ -55,16 +52,6 @@ DEM::DEM(const std::string inputbin,
          // Transfer data from host to gpu device memory
          transferToGlobalDeviceMemory();
        
       -  // Render image using raytracer if requested
       -  if (render_img == 1) {
       -    float3 eye = make_float3( 2.5f * grid.L[0],
       -                             -5.0f * grid.L[1],
       -                              0.5f * grid.L[2]);
       -    //float focalLength = 0.8f*grid.L[0];
       -    //float3 eye = make_float3(0.0f, 0.0f, 0.0f);
       -    render(eye);
       -  }
       -
        }
        
        // Destructor: Liberates dynamically allocated host memory
       t@@ -116,7 +103,7 @@ void DEM::checkValues(void)
          }
        
          // Check that the current time
       -  if (time.current < time.total || time.current < 0.0) {
       +  if (time.current > time.total || time.current < 0.0) {
            cerr << "Error: time.current = " << time.current
              << " s, time.total = " << time.total << " s\n";
            exit(1);
 (DIR) diff --git a/src/sphere.h b/src/sphere.h
       t@@ -116,6 +116,8 @@ class DEM {
            void transferFromGlobalDeviceMemory(void);
            void rt_transferFromGlobalDeviceMemory(void);
        
       +    // Find and return the max. position of any particle in each dimension
       +    float3 maxPos(void);
            
        
          // Values and functions accessible from the outside
       t@@ -124,8 +126,7 @@ class DEM {
            // Constructor, some parameters with default values
            DEM(std::string inputbin, 
                const int verbosity = 1,
       -        const int checkVals = 1,
       -        const int render = 0);
       +        const int checkVals = 1);
        
            // Destructor
            ~DEM(void);
       t@@ -148,10 +149,9 @@ class DEM {
        
            // Render particles using raytracing
            void render(//const char *target,
       -        const float3 eye,
       -        const float focalLength = 1.0,
                const int method = 1,
                const float maxval = 1.0e3,
       +        const float focalLength = 1.0,
                const unsigned int img_width = 800,
                const unsigned int img_height = 800);