tFixed threadsPerBlock value - sphere - GPU-based 3D discrete element method algorithm with optional fluid coupling
(HTM) git clone git://src.adamsgaard.dk/sphere
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) LICENSE
---
(DIR) commit 5c10dece10e2b7eb6aebc93258c423f586a83dd6
(DIR) parent 18da38f2435da18e108b208f2054eb4c11107207
(HTM) Author: Anders Damsgaard <adc@geo.au.dk>
Date: Thu, 2 May 2013 09:58:15 +0200
Fixed threadsPerBlock value
Diffstat:
M CMakeLists.txt | 10 +++++-----
M python/sphere.py | 37 ++++++++++++++++++++++---------
M src/device.cu | 4 ++--
M src/latticeboltzmann.cuh | 1 -
4 files changed, 33 insertions(+), 19 deletions(-)
---
(DIR) diff --git a/CMakeLists.txt b/CMakeLists.txt
t@@ -24,13 +24,13 @@ find_package(OpenMP)
enable_testing()
# Set build type = Debug
-#set(CMAKE_BUILD_TYPE Debug)
-#if (CUDA_FOUND)
-# set(CUDA_NVCC_FLAGS -g;-G)
-#endif()
+set(CMAKE_BUILD_TYPE Debug)
+if (CUDA_FOUND)
+ set(CUDA_NVCC_FLAGS -g;-G)
+endif()
# Set build type = Release
-set(CMAKE_BUILD_TYPE Release)
+#set(CMAKE_BUILD_TYPE Release)
# Add source directory to project.
add_subdirectory(src)
(DIR) diff --git a/python/sphere.py b/python/sphere.py
t@@ -116,9 +116,9 @@ class Spherebin:
self.nu = numpy.zeros(1, dtype=numpy.float64)
self.f_v = numpy.zeros(
- (self.num[0] * self.num[1] * self.num[2], self.nd),
+ (self.num[0], self.num[1], self.num[2], self.nd),
dtype=numpy.float64)
- self.f_rho = numpy.zeros(self.num[0] * self.num[1] * self.num[2],
+ self.f_rho = numpy.zeros((self.num[0], self.num[1], self.num[2]),
dtype=numpy.float64)
def __cmp__(self, other):
t@@ -308,22 +308,34 @@ class Spherebin:
self.tau_b = numpy.fromfile(fh, dtype=numpy.float64, count=1)
self.bonds = numpy.empty((self.nb0, 2), dtype=numpy.uint32)
for i in range(self.nb0):
- self.bonds[i,0] = numpy.fromfile(fh, dtype=numpy.uint32, count=1)
- self.bonds[i,1] = numpy.fromfile(fh, dtype=numpy.uint32, count=1)
- self.bonds_delta_n = numpy.fromfile(fh, dtype=numpy.float64, count=self.nb0)
- self.bonds_delta_t = numpy.fromfile(fh, dtype=numpy.float64, count=self.nb0*self.nd).reshape(self.nb0, self.nd)
- self.bonds_omega_n = numpy.fromfile(fh, dtype=numpy.float64, count=self.nb0)
- self.bonds_omega_t = numpy.fromfile(fh, dtype=numpy.float64, count=self.nb0*self.nd).reshape(self.nb0, self.nd)
+ self.bonds[i,0] = numpy.fromfile(fh, dtype=numpy.uint32,
+ count=1)
+ self.bonds[i,1] = numpy.fromfile(fh, dtype=numpy.uint32,
+ count=1)
+ self.bonds_delta_n = numpy.fromfile(fh, dtype=numpy.float64,
+ count=self.nb0)
+ self.bonds_delta_t = numpy.fromfile(fh, dtype=numpy.float64,
+ count=self.nb0*self.nd).reshape(self.nb0, self.nd)
+ self.bonds_omega_n = numpy.fromfile(fh, dtype=numpy.float64,
+ count=self.nb0)
+ self.bonds_omega_t = numpy.fromfile(fh, dtype=numpy.float64,
+ count=self.nb0*self.nd).reshape(self.nb0, self.nd)
else:
self.nb0 = numpy.zeros(1, dtype=numpy.uint32)
if (fluid == True):
ncells = self.num[0]*self.num[1]*self.num[2]
self.nu = numpy.fromfile(fh, dtype=numpy.float64, count=1)
- self.f_v = numpy.empty(ncells*self.nd, dtype=numpy.float64)
+ self.f_v = numpy.empty(
+ (self.num[0], self.num[1], self.num[2], self.nd),
+ dtype=numpy.float64)
self.f_rho = numpy.empty(ncells, dtype=numpy.float64)
- self.f_v = numpy.fromfile(fh, dtype=numpy.float64, count=ncells*self.nd)
- self.f_rho = numpy.fromfile(fh, dtype=numpy.float64, count=ncells)
+ self.f_v = numpy.fromfile(fh, dtype=numpy.float64,
+ count=ncells*self.nd).reshape(
+ self.num[0], self.num[1], self.num[2], self.nd)
+ self.f_rho = numpy.fromfile(fh, dtype=numpy.float64,
+ count=ncells).reshape(
+ self.num[0], self.num[1], self.num[2])
finally:
if fh is not None:
t@@ -1771,6 +1783,9 @@ class Spherebin:
fig.savefig('../img_out/' + self.sid + '-ts-x1x3-slipangles.png')
fig.clf()
+ def plotRho(self):
+ x=2
+
def convert(graphicsformat = "png",
folder = "../img_out"):
(DIR) diff --git a/src/device.cu b/src/device.cu
t@@ -562,8 +562,8 @@ __host__ void DEM::startTime()
tic = clock();
//// GPU workload configuration
- //unsigned int threadsPerBlock = 256;
- unsigned int threadsPerBlock = 512;
+ unsigned int threadsPerBlock = 256;
+ //unsigned int threadsPerBlock = 512;
// Create enough blocks to accomodate the particles
unsigned int blocksPerGrid = iDivUp(np, threadsPerBlock);
(DIR) diff --git a/src/latticeboltzmann.cuh b/src/latticeboltzmann.cuh
t@@ -124,7 +124,6 @@ __global__ void latticeBoltzmannD3Q19(
//printf("(x,y,x) = (%d,%d,%d), tidx = %d\n", x, y, z, tidx);
-
// Load the fluid distribution into local registers
__syncthreads();
Float f_0 = dev_f[grid2index(x,y,z,0)];