tAdded angular position information in datafiles and on device - sphere - GPU-based 3D discrete element method algorithm with optional fluid coupling
(HTM) git clone git://src.adamsgaard.dk/sphere
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) LICENSE
---
(DIR) commit 223197848f6e1f56d5003dc84ac7d89623de97d0
(DIR) parent b742ff213fba24e811e63093723c2b8858c86fae
(HTM) Author: Anders Damsgaard <adc@geo.au.dk>
Date: Thu, 11 Oct 2012 09:28:24 +0200
Added angular position information in datafiles and on device
Diffstat:
M python/sphere.py | 21 ++++++++++++++++++---
M src/datatypes.h | 4 +++-
M src/device.cu | 13 ++++++++++---
M src/file_io.cpp | 10 ++++++++++
M src/integration.cuh | 10 +++++++++-
M src/main.cpp | 10 ++++++++++
6 files changed, 60 insertions(+), 8 deletions(-)
---
(DIR) diff --git a/python/sphere.py b/python/sphere.py
t@@ -39,6 +39,7 @@ class Spherebin:
self.angvel = numpy.zeros(self.np*self.nd, dtype=numpy.float64).reshape(self.np,self.nd)
self.force = numpy.zeros(self.np*self.nd, dtype=numpy.float64).reshape(self.np,self.nd)
self.torque = numpy.zeros(self.np*self.nd, dtype=numpy.float64).reshape(self.np,self.nd)
+ self.angpos = numpy.zeros(self.np*self.nd, dtype=numpy.float64).reshape(self.np,self.nd)
self.fixvel = numpy.zeros(self.np, dtype=numpy.float64)
self.xsum = numpy.zeros(self.np, dtype=numpy.float64)
self.radius = numpy.zeros(self.np, dtype=numpy.float64)
t@@ -112,6 +113,7 @@ class Spherebin:
self.angvel = numpy.zeros(self.np*self.nd, dtype=numpy.float64).reshape(self.np,self.nd)
self.force = numpy.zeros(self.np*self.nd, dtype=numpy.float64).reshape(self.np,self.nd)
self.torque = numpy.zeros(self.np*self.nd, dtype=numpy.float64).reshape(self.np,self.nd)
+ self.angpos = numpy.zeros(self.np*self.nd, dtype=numpy.float64).reshape(self.np,self.nd)
self.fixvel = numpy.zeros(self.np, dtype=numpy.float64)
self.xsum = numpy.zeros(self.np, dtype=numpy.float64)
self.radius = numpy.zeros(self.np, dtype=numpy.float64)
t@@ -145,6 +147,7 @@ class Spherebin:
self.angvel[j,i] = numpy.fromfile(fh, dtype=numpy.float64, count=1)
self.force[j,i] = numpy.fromfile(fh, dtype=numpy.float64, count=1)
self.torque[j,i] = numpy.fromfile(fh, dtype=numpy.float64, count=1)
+ self.angpos[j,i] = numpy.fromfile(fh, dtype=numpy.float64, count=1)
# Per-particle single-value parameters
for j in range(self.np):
t@@ -247,6 +250,7 @@ class Spherebin:
fh.write(self.angvel[j,i].astype(numpy.float64))
fh.write(self.force[j,i].astype(numpy.float64))
fh.write(self.torque[j,i].astype(numpy.float64))
+ fh.write(self.angpos[j,i].astype(numpy.float64))
# Per-particle single-value parameters
for j in range(self.np):
t@@ -549,6 +553,11 @@ class Spherebin:
z_adjust = 1.1 # Overheightening of grid. 1.0 = no overheightening
self.num[2] = numpy.ceil((z_max-z_min)*z_adjust/cellsize)
self.L[2] = (z_max-z_min)*z_adjust
+
+ # zero kinematics
+ self.vel = numpy.zeros(self.np*self.nd, dtype=numpy.float64).reshape(self.np, self.nd)
+ self.angvel = numpy.zeros(self.np*self.nd, dtype=numpy.float64).reshape(self.np, self.nd)
+ self.angpos = numpy.zeros(self.np*self.nd, dtype=numpy.float64).reshape(self.np, self.nd)
# Initialize upper wall
self.wmode[0] = 0
t@@ -575,6 +584,11 @@ class Spherebin:
self.num[2] = numpy.ceil((z_max-z_min)*z_adjust/cellsize)
self.L[2] = (z_max-z_min)*z_adjust
+ # zero kinematics
+ self.vel = numpy.zeros(self.np*self.nd, dtype=numpy.float64).reshape(self.np, self.nd)
+ self.angvel = numpy.zeros(self.np*self.nd, dtype=numpy.float64).reshape(self.np, self.nd)
+ self.angpos = numpy.zeros(self.np*self.nd, dtype=numpy.float64).reshape(self.np, self.nd)
+
# Initialize upper wall
self.wmode[0] = 1
self.w_n[0,2] = -1.0
t@@ -606,9 +620,10 @@ class Spherebin:
# Initialize upper wall
self.w_devs[0] = deviatoric_stress
- # Zero kinematics
- self.vel = numpy.zeros(self.np*self.nd, dtype=numpy.float64).reshape(self.np,self.nd)
- self.angvel = numpy.zeros(self.np*self.nd, dtype=numpy.float64).reshape(self.np,self.nd)
+ # zero kinematics
+ self.vel = numpy.zeros(self.np*self.nd, dtype=numpy.float64).reshape(self.np, self.nd)
+ self.angvel = numpy.zeros(self.np*self.nd, dtype=numpy.float64).reshape(self.np, self.nd)
+ self.angpos = numpy.zeros(self.np*self.nd, dtype=numpy.float64).reshape(self.np, self.nd)
fixheight = 2*cellsize
#fixheight = cellsize
(DIR) diff --git a/src/datatypes.h b/src/datatypes.h
t@@ -144,7 +144,8 @@ struct Params {
int fwritebin(char *target, Particles *p,
Float4 *host_x, Float4 *host_vel,
Float4 *host_angvel, Float4 *host_force,
- Float4 *host_torque, uint4 *host_bonds,
+ Float4 *host_torque, Float4 *host_angpos,
+ uint4 *host_bonds,
Grid *grid, Time *time, Params *params,
Float4 *host_w_nx, Float4 *host_w_mvfd);
t@@ -160,6 +161,7 @@ void gpuMain(Float4* host_x,
Float4* host_angacc,
Float4* host_force,
Float4* host_torque,
+ Float4* host_angpos,
uint4* host_bonds,
Particles* p, Grid* grid,
Time* time, Params* params,
(DIR) diff --git a/src/device.cu b/src/device.cu
t@@ -161,6 +161,7 @@ __host__ void gpuMain(Float4* host_x,
Float4* host_angacc,
Float4* host_force,
Float4* host_torque,
+ Float4* host_angpos,
uint4* host_bonds,
Particles* p,
Grid* grid,
t@@ -185,6 +186,7 @@ __host__ void gpuMain(Float4* host_x,
Float4* dev_angacc; // Particle angular acceleration
Float4* dev_force; // Sum of forces
Float4* dev_torque; // Sum of torques
+ Float4* dev_angpos; // Particle angular position
Float* dev_radius; // Particle radius
Float* dev_es_dot; // Current shear energy producion rate
Float* dev_ev_dot; // Current viscous energy producion rate
t@@ -240,6 +242,7 @@ __host__ void gpuMain(Float4* host_x,
cudaMalloc((void**)&dev_angacc, memSizeF4);
cudaMalloc((void**)&dev_force, memSizeF4);
cudaMalloc((void**)&dev_torque, memSizeF4);
+ cudaMalloc((void**)&dev_angpos, memSizeF4);
cudaMalloc((void**)&dev_radius, memSizeF);
cudaMalloc((void**)&dev_radius_sorted, memSizeF);
cudaMalloc((void**)&dev_es_dot, memSizeF);
t@@ -280,6 +283,7 @@ __host__ void gpuMain(Float4* host_x,
cudaMemcpy(dev_angacc, host_angacc, memSizeF4, cudaMemcpyHostToDevice);
cudaMemcpy(dev_force, host_force, memSizeF4, cudaMemcpyHostToDevice);
cudaMemcpy(dev_torque, host_torque, memSizeF4, cudaMemcpyHostToDevice);
+ cudaMemcpy(dev_angpos, host_angpos, memSizeF4, cudaMemcpyHostToDevice);
//cudaMemcpy(dev_bonds, host_bonds, sizeof(uint4) * p->np, cudaMemcpyHostToDevice);
cudaMemcpy(dev_radius, p->radius, memSizeF, cudaMemcpyHostToDevice);
cudaMemcpy(dev_es_dot, p->es_dot, memSizeF, cudaMemcpyHostToDevice);
t@@ -361,7 +365,7 @@ __host__ void gpuMain(Float4* host_x,
sprintf(file,"%s/output/%s.output0.bin", cwd, inputbin);
if (fwritebin(file, p, host_x, host_vel,
host_angvel, host_force,
- host_torque,
+ host_torque, host_angpos,
host_bonds,
grid, time, params,
host_w_nx, host_w_mvfd) != 0) {
t@@ -539,7 +543,7 @@ __host__ void gpuMain(Float4* host_x,
integrate<<<dimGrid, dimBlock>>>(dev_x_sorted, dev_vel_sorted,
dev_angvel_sorted, dev_radius_sorted,
dev_x, dev_vel, dev_angvel,
- dev_force, dev_torque,
+ dev_force, dev_torque, dev_angpos,
dev_gridParticleIndex);
cudaThreadSynchronize();
t@@ -598,6 +602,7 @@ __host__ void gpuMain(Float4* host_x,
cudaMemcpy(host_angvel, dev_angvel, memSizeF4, cudaMemcpyDeviceToHost);
cudaMemcpy(host_force, dev_force, memSizeF4, cudaMemcpyDeviceToHost);
cudaMemcpy(host_torque, dev_torque, memSizeF4, cudaMemcpyDeviceToHost);
+ cudaMemcpy(host_angpos, dev_angpos, memSizeF4, cudaMemcpyDeviceToHost);
//cudaMemcpy(host_bonds, dev_bonds, sizeof(uint4) * p->np, cudaMemcpyDeviceToHost);
cudaMemcpy(p->es_dot, dev_es_dot, memSizeF, cudaMemcpyDeviceToHost);
cudaMemcpy(p->ev_dot, dev_ev_dot, memSizeF, cudaMemcpyDeviceToHost);
t@@ -620,7 +625,8 @@ __host__ void gpuMain(Float4* host_x,
if (fwritebin(file, p, host_x, host_vel,
host_angvel, host_force,
- host_torque, host_bonds,
+ host_torque, host_angpos,
+ host_bonds,
grid, time, params,
host_w_nx, host_w_mvfd) != 0) {
cout << "\n Error during fwritebin() in main loop\n";
t@@ -695,6 +701,7 @@ __host__ void gpuMain(Float4* host_x,
cudaFree(dev_angacc);
cudaFree(dev_force);
cudaFree(dev_torque);
+ cudaFree(dev_angpos);
cudaFree(dev_radius);
cudaFree(dev_radius_sorted);
cudaFree(dev_es_dot);
(DIR) diff --git a/src/file_io.cpp b/src/file_io.cpp
t@@ -17,6 +17,7 @@ int fwritebin(char *target,
Float4 *host_angvel,
Float4 *host_force,
Float4 *host_torque,
+ Float4 *host_angpos,
uint4 *host_bonds,
Grid *grid,
Time *time,
t@@ -73,6 +74,7 @@ int fwritebin(char *target,
fwrite(&host_angvel[j].x, sizeof(Float), 1, fp);
fwrite(&host_force[j].x, sizeof(Float), 1, fp);
fwrite(&host_torque[j].x, sizeof(Float), 1, fp);
+ fwrite(&host_angpos[j].x, sizeof(Float), 1, fp);
// y-axis
fwrite(&host_x[j].y, sizeof(Float), 1, fp);
t@@ -80,6 +82,7 @@ int fwritebin(char *target,
fwrite(&host_angvel[j].y, sizeof(Float), 1, fp);
fwrite(&host_force[j].y, sizeof(Float), 1, fp);
fwrite(&host_torque[j].y, sizeof(Float), 1, fp);
+ fwrite(&host_angpos[j].y, sizeof(Float), 1, fp);
// z-axis
fwrite(&host_x[j].z, sizeof(Float), 1, fp);
t@@ -87,6 +90,7 @@ int fwritebin(char *target,
fwrite(&host_angvel[j].z, sizeof(Float), 1, fp);
fwrite(&host_force[j].z, sizeof(Float), 1, fp);
fwrite(&host_torque[j].z, sizeof(Float), 1, fp);
+ fwrite(&host_angpos[j].z, sizeof(Float), 1, fp);
}
// Individual particle values
t@@ -200,6 +204,8 @@ int fwritebin(char *target,
fwrite(&d, sizeof(d), 1, fp);
d = (double)host_torque[j].x;
fwrite(&d, sizeof(d), 1, fp);
+ d = (double)host_angpos[j].x;
+ fwrite(&d, sizeof(d), 1, fp);
// y-axis
d = (double)host_x[j].y;
t@@ -212,6 +218,8 @@ int fwritebin(char *target,
fwrite(&d, sizeof(d), 1, fp);
d = (double)host_torque[j].y;
fwrite(&d, sizeof(d), 1, fp);
+ d = (double)host_angpos[j].y;
+ fwrite(&d, sizeof(d), 1, fp);
// z-axis
d = (double)host_x[j].z;
t@@ -224,6 +232,8 @@ int fwritebin(char *target,
fwrite(&d, sizeof(d), 1, fp);
d = (double)host_torque[j].z;
fwrite(&d, sizeof(d), 1, fp);
+ d = (double)host_angpos[j].z;
+ fwrite(&d, sizeof(d), 1, fp);
}
// Individual particle values
(DIR) diff --git a/src/integration.cuh b/src/integration.cuh
t@@ -10,7 +10,7 @@
__global__ void integrate(Float4* dev_x_sorted, Float4* dev_vel_sorted, // Input
Float4* dev_angvel_sorted, Float* dev_radius_sorted, // Input
Float4* dev_x, Float4* dev_vel, Float4* dev_angvel, // Output
- Float4* dev_force, Float4* dev_torque, // Input
+ Float4* dev_force, Float4* dev_torque, Float4* dev_angpos, // Input
unsigned int* dev_gridParticleIndex) // Input: Sorted-Unsorted key
{
unsigned int idx = threadIdx.x + blockIdx.x * blockDim.x; // Thread id
t@@ -22,6 +22,7 @@ __global__ void integrate(Float4* dev_x_sorted, Float4* dev_vel_sorted, // Input
unsigned int orig_idx = dev_gridParticleIndex[idx];
Float4 force = dev_force[orig_idx];
Float4 torque = dev_torque[orig_idx];
+ Float4 angpos = dev_angpos[orig_idx];
// Initialize acceleration vectors to zero
Float4 acc = MAKE_FLOAT4(0.0f, 0.0f, 0.0f, 0.0f);
t@@ -88,6 +89,12 @@ __global__ void integrate(Float4* dev_x_sorted, Float4* dev_vel_sorted, // Input
//x.y += vel.y * dt;
//x.z += vel.z * dt;
+ // Update angular position. Second-order scheme based on Taylor expansion
+ // (greater accuracy than the first-order Euler's scheme)
+ angpos.x += angvel.x * dt + (angacc.x * dt*dt)/2.0f;
+ angpos.y += angvel.y * dt + (angacc.y * dt*dt)/2.0f;
+ angpos.z += angvel.z * dt + (angacc.z * dt*dt)/2.0f;
+
// Update position. Second-order scheme based on Taylor expansion
// (greater accuracy than the first-order Euler's scheme)
x.x += vel.x * dt + (acc.x * dt*dt)/2.0f;
t@@ -121,6 +128,7 @@ __global__ void integrate(Float4* dev_x_sorted, Float4* dev_vel_sorted, // Input
// Store data in global memory at original, pre-sort positions
dev_angvel[orig_idx] = angvel;
dev_vel[orig_idx] = vel;
+ dev_angpos[orig_idx] = angpos;
dev_x[orig_idx] = x;
}
} // End of integrate(...)
(DIR) diff --git a/src/main.cpp b/src/main.cpp
t@@ -190,6 +190,7 @@ int main(int argc, char *argv[])
Float4 *host_angacc = new Float4[p.np]; // Particle angular acceleration vector (dotomega)
Float4 *host_force = new Float4[p.np]; // Particle summed force
Float4 *host_torque = new Float4[p.np]; // Particle summed torque
+ Float4 *host_angpos = new Float4[p.np]; // Particle angular position
uint4 *host_bonds = new uint4[p.np]; // Bonds from particle [i] to two particles
t@@ -223,6 +224,9 @@ int main(int argc, char *argv[])
exit(1);
if (fread(&host_torque[j].x, sizeof(Float), 1, fp) != 1)
exit(1);
+ if (fread(&host_angpos[j].x, sizeof(Float), 1, fp) != 1)
+ exit(1);
+
if (fread(&host_x[j].y, sizeof(Float), 1, fp) != 1)
exit(1); // Return unsuccessful exit status
t@@ -234,6 +238,8 @@ int main(int argc, char *argv[])
exit(1);
if (fread(&host_torque[j].y, sizeof(Float), 1, fp) != 1)
exit(1);
+ if (fread(&host_angpos[j].y, sizeof(Float), 1, fp) != 1)
+ exit(1);
if (fread(&host_x[j].z, sizeof(Float), 1, fp) != 1)
exit(1); // Return unsuccessful exit status
t@@ -245,6 +251,8 @@ int main(int argc, char *argv[])
exit(1);
if (fread(&host_torque[j].z, sizeof(Float), 1, fp) != 1)
exit(1);
+ if (fread(&host_angpos[j].z, sizeof(Float), 1, fp) != 1)
+ exit(1);
}
for (j=0; j<p.np; ++j) {
t@@ -438,6 +446,7 @@ int main(int argc, char *argv[])
host_angacc,
host_force,
host_torque,
+ host_angpos,
host_bonds,
&p, &grid,
&time, ¶ms,
t@@ -457,6 +466,7 @@ int main(int argc, char *argv[])
delete[] host_angacc;
delete[] host_force;
delete[] host_torque;
+ delete[] host_angpos;
// Particle bonds
delete[] host_bonds;