mx1.adamsgaard.dk

       tReplaced __umul24 multiplication functions with standard multiplication operators - sphere - GPU-based 3D discrete element method algorithm with optional fluid coupling
 (HTM) git clone git://src.adamsgaard.dk/sphere
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) LICENSE
       ---
 (DIR) commit 82d698d30f1988547ebf70a50e0fb9a7b295d99f
 (DIR) parent 819f1dd8f7de89c262525884dcd25f86bd9eb292
 (HTM) Author: Anders Damsgaard <adc@geo.au.dk>
       Date:   Tue,  4 Sep 2012 09:07:27 +0200
       
       Replaced __umul24 multiplication functions with standard multiplication operators
       
       Diffstat:
         M src/contactsearch.cuh               |      18 ++++++------------
         M src/sorting.cuh                     |       7 ++-----
       
       2 files changed, 8 insertions(+), 17 deletions(-)
       ---
 (DIR) diff --git a/src/contactsearch.cuh b/src/contactsearch.cuh
       t@@ -106,11 +106,8 @@ __device__ void findAndProcessContactsInCell(int3 targetCell,
          //// Check and process particle-particle collisions
        
          // Calculate linear cell ID
       -  unsigned int cellID = targetCell.x  
       -                            + __umul24(targetCell.y, devC_num[0]) 
       -                        + __umul24(__umul24(devC_num[0], 
       -                                                  devC_num[1]), 
       -                                    targetCell.z); 
       +  unsigned int cellID = targetCell.x + targetCell.y * devC_num[0]
       +                        + (devC_num[0] * devC_num[1]) * targetCell.z; 
        
          // Lowest particle index in cell
          unsigned int startIdx = dev_cellStart[cellID];
       t@@ -203,11 +200,8 @@ __device__ void findContactsInCell(int3 targetCell,
          //// Check and process particle-particle collisions
        
          // Calculate linear cell ID
       -  unsigned int cellID = targetCell.x  
       -                            + __umul24(targetCell.y, devC_num[0]) 
       -                        + __umul24(__umul24(devC_num[0], 
       -                                                  devC_num[1]), 
       -                                    targetCell.z); 
       +  unsigned int cellID = targetCell.x + targetCell.y * devC_num[0]
       +                        + (devC_num[0] * devC_num[1]) * targetCell.z; 
        
          // Lowest particle index in cell
          unsigned int startIdx = dev_cellStart[cellID];
       t@@ -312,7 +306,7 @@ __global__ void topology(unsigned int* dev_cellStart,
                                 Float4* dev_distmod)
        {
          // Thread index equals index of particle A
       -  unsigned int idx_a = __umul24(blockIdx.x, blockDim.x) + threadIdx.x;
       +  unsigned int idx_a = blockIdx.x * blockDim.x + threadIdx.x;
          if (idx_a < devC_np) {
            // Fetch particle data in global read
            Float4 x_a      = dev_x_sorted[idx_a];
       t@@ -375,7 +369,7 @@ __global__ void interact(unsigned int* dev_gridParticleIndex, // Input: Unsorted
                                 Float4* dev_delta_t)
        {
          // Thread index equals index of particle A
       -  unsigned int idx_a = __umul24(blockIdx.x, blockDim.x) + threadIdx.x;
       +  unsigned int idx_a = blockIdx.x * blockDim.x + threadIdx.x;
        
          if (idx_a < devC_np) {
        
 (DIR) diff --git a/src/sorting.cuh b/src/sorting.cuh
       t@@ -11,9 +11,7 @@ __device__ int calcCellID(Float3 x)
          i_z = floor((x.z - devC_origo[2]) / (devC_L[2]/devC_num[2]));
        
          // Integral coordinates are converted to 1D coordinate:
       -  return __umul24(__umul24(i_z, devC_num[1]),
       -                        devC_num[0]) 
       -             + __umul24(i_y, devC_num[0]) + i_x;
       +  return (i_z * devC_num[1]) * devC_num[0] + i_y * devC_num[0] + i_x;
        
        } // End of calcCellID(...)
        
       t@@ -25,7 +23,7 @@ __global__ void calcParticleCellID(unsigned int* dev_gridParticleCellID,
                                           Float4* dev_x) 
        {
          //unsigned int idx = threadIdx.x + blockIdx.x * blockDim.x; // Thread id
       -  unsigned int idx = __umul24(blockIdx.x, blockDim.x) + threadIdx.x;
       +  unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x;
        
          if (idx < devC_np) { // Condition prevents block size error
        
       t@@ -65,7 +63,6 @@ __global__ void reorderArrays(unsigned int* dev_cellStart, unsigned int* dev_cel
        
          // Thread index in grid
          unsigned int idx = threadIdx.x + blockIdx.x * blockDim.x; 
       -  //unsigned int idx = __umul24(blockIdx.x, blockDim.x) + threadIdx.x;
        
          // CellID hash value of particle idx
          unsigned int cellID;