tAdded angular position information in datafiles and on device - sphere - GPU-based 3D discrete element method algorithm with optional fluid coupling
 (HTM) git clone git://src.adamsgaard.dk/sphere
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) LICENSE
       ---
 (DIR) commit 223197848f6e1f56d5003dc84ac7d89623de97d0
 (DIR) parent b742ff213fba24e811e63093723c2b8858c86fae
 (HTM) Author: Anders Damsgaard <adc@geo.au.dk>
       Date:   Thu, 11 Oct 2012 09:28:24 +0200
       
       Added angular position information in datafiles and on device
       
       Diffstat:
         M python/sphere.py                    |      21 ++++++++++++++++++---
         M src/datatypes.h                     |       4 +++-
         M src/device.cu                       |      13 ++++++++++---
         M src/file_io.cpp                     |      10 ++++++++++
         M src/integration.cuh                 |      10 +++++++++-
         M src/main.cpp                        |      10 ++++++++++
       
       6 files changed, 60 insertions(+), 8 deletions(-)
       ---
 (DIR) diff --git a/python/sphere.py b/python/sphere.py
       t@@ -39,6 +39,7 @@ class Spherebin:
            self.angvel  = numpy.zeros(self.np*self.nd, dtype=numpy.float64).reshape(self.np,self.nd)
            self.force   = numpy.zeros(self.np*self.nd, dtype=numpy.float64).reshape(self.np,self.nd)
            self.torque  = numpy.zeros(self.np*self.nd, dtype=numpy.float64).reshape(self.np,self.nd)
       +    self.angpos  = numpy.zeros(self.np*self.nd, dtype=numpy.float64).reshape(self.np,self.nd)
            self.fixvel  = numpy.zeros(self.np, dtype=numpy.float64)
            self.xsum    = numpy.zeros(self.np, dtype=numpy.float64)
            self.radius  = numpy.zeros(self.np, dtype=numpy.float64)
       t@@ -112,6 +113,7 @@ class Spherebin:
              self.angvel  = numpy.zeros(self.np*self.nd, dtype=numpy.float64).reshape(self.np,self.nd)
              self.force   = numpy.zeros(self.np*self.nd, dtype=numpy.float64).reshape(self.np,self.nd)
              self.torque  = numpy.zeros(self.np*self.nd, dtype=numpy.float64).reshape(self.np,self.nd)
       +      self.angpos  = numpy.zeros(self.np*self.nd, dtype=numpy.float64).reshape(self.np,self.nd)
              self.fixvel  = numpy.zeros(self.np, dtype=numpy.float64)
              self.xsum    = numpy.zeros(self.np, dtype=numpy.float64)
              self.radius  = numpy.zeros(self.np, dtype=numpy.float64)
       t@@ -145,6 +147,7 @@ class Spherebin:
                  self.angvel[j,i] = numpy.fromfile(fh, dtype=numpy.float64, count=1)
                  self.force[j,i]  = numpy.fromfile(fh, dtype=numpy.float64, count=1)
                  self.torque[j,i] = numpy.fromfile(fh, dtype=numpy.float64, count=1)
       +          self.angpos[j,i] = numpy.fromfile(fh, dtype=numpy.float64, count=1)
         
              # Per-particle single-value parameters
              for j in range(self.np):
       t@@ -247,6 +250,7 @@ class Spherebin:
                  fh.write(self.angvel[j,i].astype(numpy.float64))
                  fh.write(self.force[j,i].astype(numpy.float64))
                  fh.write(self.torque[j,i].astype(numpy.float64))
       +          fh.write(self.angpos[j,i].astype(numpy.float64))
         
              # Per-particle single-value parameters
              for j in range(self.np):
       t@@ -549,6 +553,11 @@ class Spherebin:
            z_adjust = 1.1        # Overheightening of grid. 1.0 = no overheightening
            self.num[2] = numpy.ceil((z_max-z_min)*z_adjust/cellsize)
            self.L[2] = (z_max-z_min)*z_adjust
       +    
       +    # zero kinematics
       +    self.vel     = numpy.zeros(self.np*self.nd, dtype=numpy.float64).reshape(self.np, self.nd)
       +    self.angvel  = numpy.zeros(self.np*self.nd, dtype=numpy.float64).reshape(self.np, self.nd)
       +    self.angpos  = numpy.zeros(self.np*self.nd, dtype=numpy.float64).reshape(self.np, self.nd)
        
            # Initialize upper wall
            self.wmode[0] = 0
       t@@ -575,6 +584,11 @@ class Spherebin:
            self.num[2] = numpy.ceil((z_max-z_min)*z_adjust/cellsize)
            self.L[2] = (z_max-z_min)*z_adjust
        
       +    # zero kinematics
       +    self.vel     = numpy.zeros(self.np*self.nd, dtype=numpy.float64).reshape(self.np, self.nd)
       +    self.angvel  = numpy.zeros(self.np*self.nd, dtype=numpy.float64).reshape(self.np, self.nd)
       +    self.angpos  = numpy.zeros(self.np*self.nd, dtype=numpy.float64).reshape(self.np, self.nd)
       +
            # Initialize upper wall
            self.wmode[0] = 1
            self.w_n[0,2] = -1.0
       t@@ -606,9 +620,10 @@ class Spherebin:
            # Initialize upper wall
            self.w_devs[0] = deviatoric_stress
        
       -    # Zero kinematics
       -    self.vel     = numpy.zeros(self.np*self.nd, dtype=numpy.float64).reshape(self.np,self.nd)
       -    self.angvel  = numpy.zeros(self.np*self.nd, dtype=numpy.float64).reshape(self.np,self.nd)
       +    # zero kinematics
       +    self.vel     = numpy.zeros(self.np*self.nd, dtype=numpy.float64).reshape(self.np, self.nd)
       +    self.angvel  = numpy.zeros(self.np*self.nd, dtype=numpy.float64).reshape(self.np, self.nd)
       +    self.angpos  = numpy.zeros(self.np*self.nd, dtype=numpy.float64).reshape(self.np, self.nd)
        
            fixheight = 2*cellsize
            #fixheight = cellsize
 (DIR) diff --git a/src/datatypes.h b/src/datatypes.h
       t@@ -144,7 +144,8 @@ struct Params {
        int fwritebin(char *target, Particles *p, 
                          Float4 *host_x, Float4 *host_vel, 
                      Float4 *host_angvel, Float4 *host_force, 
       -              Float4 *host_torque, uint4 *host_bonds,
       +              Float4 *host_torque, Float4 *host_angpos, 
       +              uint4 *host_bonds,
                      Grid *grid, Time *time, Params *params,
                      Float4 *host_w_nx, Float4 *host_w_mvfd);
        
       t@@ -160,6 +161,7 @@ void gpuMain(Float4* host_x,
                     Float4* host_angacc,
                     Float4* host_force,
                     Float4* host_torque,
       +             Float4* host_angpos,
                     uint4*  host_bonds,
                     Particles* p, Grid* grid,
                     Time* time, Params* params,
 (DIR) diff --git a/src/device.cu b/src/device.cu
       t@@ -161,6 +161,7 @@ __host__ void gpuMain(Float4* host_x,
                              Float4* host_angacc,
                              Float4* host_force,
                              Float4* host_torque,
       +                      Float4* host_angpos,
                              uint4*  host_bonds,
                              Particles* p, 
                              Grid* grid, 
       t@@ -185,6 +186,7 @@ __host__ void gpuMain(Float4* host_x,
          Float4* dev_angacc;        // Particle angular acceleration
          Float4* dev_force;        // Sum of forces
          Float4* dev_torque;        // Sum of torques
       +  Float4* dev_angpos;        // Particle angular position
          Float*  dev_radius;        // Particle radius
          Float*  dev_es_dot;        // Current shear energy producion rate
          Float*  dev_ev_dot;        // Current viscous energy producion rate
       t@@ -240,6 +242,7 @@ __host__ void gpuMain(Float4* host_x,
          cudaMalloc((void**)&dev_angacc, memSizeF4);
          cudaMalloc((void**)&dev_force, memSizeF4);
          cudaMalloc((void**)&dev_torque, memSizeF4);
       +  cudaMalloc((void**)&dev_angpos, memSizeF4);
          cudaMalloc((void**)&dev_radius, memSizeF);
          cudaMalloc((void**)&dev_radius_sorted, memSizeF);
          cudaMalloc((void**)&dev_es_dot, memSizeF);
       t@@ -280,6 +283,7 @@ __host__ void gpuMain(Float4* host_x,
          cudaMemcpy(dev_angacc, host_angacc, memSizeF4, cudaMemcpyHostToDevice);
          cudaMemcpy(dev_force, host_force, memSizeF4, cudaMemcpyHostToDevice);
          cudaMemcpy(dev_torque, host_torque, memSizeF4, cudaMemcpyHostToDevice);
       +  cudaMemcpy(dev_angpos, host_angpos, memSizeF4, cudaMemcpyHostToDevice);
          //cudaMemcpy(dev_bonds, host_bonds, sizeof(uint4) * p->np, cudaMemcpyHostToDevice);
          cudaMemcpy(dev_radius, p->radius, memSizeF, cudaMemcpyHostToDevice);
          cudaMemcpy(dev_es_dot, p->es_dot, memSizeF, cudaMemcpyHostToDevice);
       t@@ -361,7 +365,7 @@ __host__ void gpuMain(Float4* host_x,
          sprintf(file,"%s/output/%s.output0.bin", cwd, inputbin);
          if (fwritebin(file, p, host_x, host_vel, 
                        host_angvel, host_force, 
       -                host_torque,
       +                host_torque, host_angpos,
                        host_bonds,
                        grid, time, params,
                        host_w_nx, host_w_mvfd) != 0)  {
       t@@ -539,7 +543,7 @@ __host__ void gpuMain(Float4* host_x,
            integrate<<<dimGrid, dimBlock>>>(dev_x_sorted, dev_vel_sorted, 
                                             dev_angvel_sorted, dev_radius_sorted,
                                             dev_x, dev_vel, dev_angvel,
       -                                     dev_force, dev_torque, 
       +                                     dev_force, dev_torque, dev_angpos,
                                             dev_gridParticleIndex);
        
            cudaThreadSynchronize();
       t@@ -598,6 +602,7 @@ __host__ void gpuMain(Float4* host_x,
              cudaMemcpy(host_angvel, dev_angvel, memSizeF4, cudaMemcpyDeviceToHost);
              cudaMemcpy(host_force, dev_force, memSizeF4, cudaMemcpyDeviceToHost);
              cudaMemcpy(host_torque, dev_torque, memSizeF4, cudaMemcpyDeviceToHost);
       +      cudaMemcpy(host_angpos, dev_angpos, memSizeF4, cudaMemcpyDeviceToHost);
              //cudaMemcpy(host_bonds, dev_bonds, sizeof(uint4) * p->np, cudaMemcpyDeviceToHost);
              cudaMemcpy(p->es_dot, dev_es_dot, memSizeF, cudaMemcpyDeviceToHost);
              cudaMemcpy(p->ev_dot, dev_ev_dot, memSizeF, cudaMemcpyDeviceToHost);
       t@@ -620,7 +625,8 @@ __host__ void gpuMain(Float4* host_x,
        
              if (fwritebin(file, p, host_x, host_vel, 
                                host_angvel, host_force, 
       -                    host_torque, host_bonds,
       +                    host_torque, host_angpos,
       +                    host_bonds,
                            grid, time, params,
                                host_w_nx, host_w_mvfd) != 0) {
                cout << "\n Error during fwritebin() in main loop\n";
       t@@ -695,6 +701,7 @@ __host__ void gpuMain(Float4* host_x,
          cudaFree(dev_angacc);
          cudaFree(dev_force);
          cudaFree(dev_torque);
       +  cudaFree(dev_angpos);
          cudaFree(dev_radius);
          cudaFree(dev_radius_sorted);
          cudaFree(dev_es_dot);
 (DIR) diff --git a/src/file_io.cpp b/src/file_io.cpp
       t@@ -17,6 +17,7 @@ int fwritebin(char *target,
            Float4 *host_angvel, 
            Float4 *host_force, 
            Float4 *host_torque, 
       +    Float4 *host_angpos, 
            uint4 *host_bonds,
            Grid *grid, 
            Time *time, 
       t@@ -73,6 +74,7 @@ int fwritebin(char *target,
              fwrite(&host_angvel[j].x, sizeof(Float), 1, fp);
              fwrite(&host_force[j].x, sizeof(Float), 1, fp);
              fwrite(&host_torque[j].x, sizeof(Float), 1, fp);
       +      fwrite(&host_angpos[j].x, sizeof(Float), 1, fp);
        
              // y-axis
              fwrite(&host_x[j].y, sizeof(Float), 1, fp);
       t@@ -80,6 +82,7 @@ int fwritebin(char *target,
              fwrite(&host_angvel[j].y, sizeof(Float), 1, fp);
              fwrite(&host_force[j].y, sizeof(Float), 1, fp);
              fwrite(&host_torque[j].y, sizeof(Float), 1, fp);
       +      fwrite(&host_angpos[j].y, sizeof(Float), 1, fp);
        
              // z-axis
              fwrite(&host_x[j].z, sizeof(Float), 1, fp);
       t@@ -87,6 +90,7 @@ int fwritebin(char *target,
              fwrite(&host_angvel[j].z, sizeof(Float), 1, fp);
              fwrite(&host_force[j].z, sizeof(Float), 1, fp);
              fwrite(&host_torque[j].z, sizeof(Float), 1, fp);
       +      fwrite(&host_angpos[j].z, sizeof(Float), 1, fp);
            } 
        
            // Individual particle values
       t@@ -200,6 +204,8 @@ int fwritebin(char *target,
              fwrite(&d, sizeof(d), 1, fp);
              d = (double)host_torque[j].x;
              fwrite(&d, sizeof(d), 1, fp);
       +      d = (double)host_angpos[j].x;
       +      fwrite(&d, sizeof(d), 1, fp);
        
              // y-axis
              d = (double)host_x[j].y;
       t@@ -212,6 +218,8 @@ int fwritebin(char *target,
              fwrite(&d, sizeof(d), 1, fp);
              d = (double)host_torque[j].y;
              fwrite(&d, sizeof(d), 1, fp);
       +      d = (double)host_angpos[j].y;
       +      fwrite(&d, sizeof(d), 1, fp);
        
              // z-axis
              d = (double)host_x[j].z;
       t@@ -224,6 +232,8 @@ int fwritebin(char *target,
              fwrite(&d, sizeof(d), 1, fp);
              d = (double)host_torque[j].z;
              fwrite(&d, sizeof(d), 1, fp);
       +      d = (double)host_angpos[j].z;
       +      fwrite(&d, sizeof(d), 1, fp);
            } 
        
            // Individual particle values
 (DIR) diff --git a/src/integration.cuh b/src/integration.cuh
       t@@ -10,7 +10,7 @@
        __global__ void integrate(Float4* dev_x_sorted, Float4* dev_vel_sorted, // Input
                                  Float4* dev_angvel_sorted, Float* dev_radius_sorted, // Input
                                  Float4* dev_x, Float4* dev_vel, Float4* dev_angvel, // Output
       -                          Float4* dev_force, Float4* dev_torque, // Input
       +                          Float4* dev_force, Float4* dev_torque, Float4* dev_angpos, // Input
                                  unsigned int* dev_gridParticleIndex) // Input: Sorted-Unsorted key
        {
          unsigned int idx = threadIdx.x + blockIdx.x * blockDim.x; // Thread id
       t@@ -22,6 +22,7 @@ __global__ void integrate(Float4* dev_x_sorted, Float4* dev_vel_sorted, // Input
            unsigned int orig_idx = dev_gridParticleIndex[idx];
            Float4 force  = dev_force[orig_idx];
            Float4 torque = dev_torque[orig_idx];
       +    Float4 angpos = dev_angpos[orig_idx];
        
            // Initialize acceleration vectors to zero
            Float4 acc    = MAKE_FLOAT4(0.0f, 0.0f, 0.0f, 0.0f);
       t@@ -88,6 +89,12 @@ __global__ void integrate(Float4* dev_x_sorted, Float4* dev_vel_sorted, // Input
            //x.y += vel.y * dt;
            //x.z += vel.z * dt;
        
       +    // Update angular position. Second-order scheme based on Taylor expansion
       +    // (greater accuracy than the first-order Euler's scheme)
       +    angpos.x += angvel.x * dt + (angacc.x * dt*dt)/2.0f;
       +    angpos.y += angvel.y * dt + (angacc.y * dt*dt)/2.0f;
       +    angpos.z += angvel.z * dt + (angacc.z * dt*dt)/2.0f;
       +
            // Update position. Second-order scheme based on Taylor expansion 
            // (greater accuracy than the first-order Euler's scheme)
            x.x += vel.x * dt + (acc.x * dt*dt)/2.0f;
       t@@ -121,6 +128,7 @@ __global__ void integrate(Float4* dev_x_sorted, Float4* dev_vel_sorted, // Input
            // Store data in global memory at original, pre-sort positions
            dev_angvel[orig_idx] = angvel;
            dev_vel[orig_idx]    = vel;
       +    dev_angpos[orig_idx] = angpos;
            dev_x[orig_idx]      = x;
          } 
        } // End of integrate(...)
 (DIR) diff --git a/src/main.cpp b/src/main.cpp
       t@@ -190,6 +190,7 @@ int main(int argc, char *argv[])
          Float4 *host_angacc = new Float4[p.np];  // Particle angular acceleration vector (dotomega)
          Float4 *host_force  = new Float4[p.np];  // Particle summed force
          Float4 *host_torque = new Float4[p.np];  // Particle summed torque
       +  Float4 *host_angpos = new Float4[p.np];  // Particle angular position
        
        
          uint4  *host_bonds  = new uint4[p.np];   // Bonds from particle [i] to two particles
       t@@ -223,6 +224,9 @@ int main(int argc, char *argv[])
              exit(1);
            if (fread(&host_torque[j].x, sizeof(Float), 1, fp) != 1)
              exit(1);
       +    if (fread(&host_angpos[j].x, sizeof(Float), 1, fp) != 1)
       +      exit(1);
       +
        
            if (fread(&host_x[j].y, sizeof(Float), 1, fp) != 1)
              exit(1); // Return unsuccessful exit status
       t@@ -234,6 +238,8 @@ int main(int argc, char *argv[])
              exit(1);
            if (fread(&host_torque[j].y, sizeof(Float), 1, fp) != 1)
              exit(1);
       +    if (fread(&host_angpos[j].y, sizeof(Float), 1, fp) != 1)
       +      exit(1);
        
            if (fread(&host_x[j].z, sizeof(Float), 1, fp) != 1)
              exit(1); // Return unsuccessful exit status
       t@@ -245,6 +251,8 @@ int main(int argc, char *argv[])
              exit(1);
            if (fread(&host_torque[j].z, sizeof(Float), 1, fp) != 1)
              exit(1);
       +    if (fread(&host_angpos[j].z, sizeof(Float), 1, fp) != 1)
       +      exit(1);
          }
        
          for (j=0; j<p.np; ++j) {
       t@@ -438,6 +446,7 @@ int main(int argc, char *argv[])
                  host_angacc,
                  host_force,
                  host_torque,
       +          host_angpos,
                  host_bonds,
                  &p, &grid, 
                  &time, &params,
       t@@ -457,6 +466,7 @@ int main(int argc, char *argv[])
          delete[] host_angacc;
          delete[] host_force;
          delete[] host_torque;
       +  delete[] host_angpos;
        
          // Particle bonds
          delete[] host_bonds;