tCode is now compiled for either FERMI or KEPLER archs - sphere - GPU-based 3D discrete element method algorithm with optional fluid coupling
(HTM) git clone git://src.adamsgaard.dk/sphere
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) LICENSE
---
(DIR) commit ca4c45e37ce173a2030edbd7f20dcbc0663d5316
(DIR) parent a0d18146c21e0e14765c3ec12f79ef48a28ea880
(HTM) Author: Anders Damsgaard <anders.damsgaard@geo.au.dk>
Date: Thu, 3 Oct 2013 14:02:20 +0200
Code is now compiled for either FERMI or KEPLER archs
Diffstat:
M CMakeLists.txt | 5 +++++
M README.rst | 7 ++++++-
M src/CMakeLists.txt | 9 +++++++--
3 files changed, 18 insertions(+), 3 deletions(-)
---
(DIR) diff --git a/CMakeLists.txt b/CMakeLists.txt
t@@ -14,6 +14,11 @@ cmake_minimum_required(VERSION 2.8)
# Find CUDA
find_package(CUDA REQUIRED)
+# Set CUDA GPU generation
+# 0: Fermi
+# 1: Kepler
+set(GPU_GENERATION 1)
+
# Find OpenMP
find_package(OpenMP)
(DIR) diff --git a/README.rst b/README.rst
t@@ -50,10 +50,15 @@ copy, execute::
Build instructions
------------------
Sphere is built using `cmake`, the platform-specific c/c++ compilers,
-and `nvcc` from the cuda toolkit. Execute the following commands from
+and `nvcc` from the cuda toolkit.
+
+If you plan to run sphere on a Kepler GPU, execute the following commands from
the root directory::
cmake . && make
+If you instead plan to execute it o a Fermi GPU, change ``set(GPU_GENERATION
+1)`` to ``set(GPU_GENERATION 0`` in `CMakeLists.txt`.
+
In some cases the CMake FindCUDA module will have troubles locating the
CUDA samples directory, and will complain about `helper_math.h` not being
found.
(DIR) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
t@@ -12,8 +12,13 @@ INCLUDE(FindCUDA)
# Additional NVCC command line arguments
# NOTE: Multiple arguments must be semi-colon selimited
-SET(CUDA_NVCC_FLAGS
- "--use_fast_math;-O3;-gencode=arch=compute_20,code=\"sm_20,compute_20\" -ccbin gcc-4.6")
+IF (GPU_GENERATION EQUAL 1) # Kepler
+ SET(CUDA_NVCC_FLAGS
+ "--use_fast_math;-O3;-gencode=arch=compute_20,code=\"sm_20,compute_20\";--fmad=false -ccbin gcc-4.6")
+ELSE() # Fermi
+ SET(CUDA_NVCC_FLAGS
+ "--use_fast_math;-O3;-gencode=arch=compute_35,code=\"sm_35,compute_35\";--fmad=false -ccbin gcc-4.6")
+ENDIF (GPU_GENERATION EQUAL 1)
# Rule to build executable program
CUDA_ADD_EXECUTABLE(../sphere