tremove exclusive flag, choose device on command line instead - sphere - GPU-based 3D discrete element method algorithm with optional fluid coupling
 (HTM) git clone git://src.adamsgaard.dk/sphere
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) LICENSE
       ---
 (DIR) commit a34e13cce992c03f6a112b758c3550f37f6a2e52
 (DIR) parent a99dcd0fce063ba0a59faf9ae09b290180f866e9
 (HTM) Author: Anders Damsgaard <anders.damsgaard@geo.au.dk>
       Date:   Mon, 28 Jul 2014 10:56:27 +0200
       
       remove exclusive flag, choose device on command line instead
       
       Diffstat:
         M python/sphere.py                    |      16 +++++++++-------
         M src/device.cu                       |      62 +++++++++++++++++++------------
         M src/main.cpp                        |      13 +++++++------
         M src/sphere.cpp                      |       4 ++--
         M src/sphere.h                        |       1 -
       
       5 files changed, 56 insertions(+), 40 deletions(-)
       ---
 (DIR) diff --git a/python/sphere.py b/python/sphere.py
       t@@ -3063,7 +3063,7 @@ class sim:
                return numpy.array(porosity), numpy.array(depth)
        
            def run(self, verbose=True, hideinputfile=False, dry=False, valgrind=False,
       -            cudamemcheck=False, exclusive_mode=False):
       +            cudamemcheck=False, device=-1):
                '''
                Start ``sphere`` calculations on the ``sim`` object
        
       t@@ -3082,8 +3082,10 @@ class sim:
                    check for device memory leaks and errors. This causes a significant
                    increase in computational time.
                :type cudamemcheck: bool
       -        :param exclusive_mode: The system GPUs are running in exclusive mode.
       -        :type exclusive_mode: bool
       +        :param device: Specify the GPU device to execute the program on.
       +            If not specified, sphere will use the device with the most CUDA cores.
       +            To see a list of devices, run ``nvidia-smi`` in the system shell.
       +        :type device: int
                '''
        
                self.writebin(verbose=False)
       t@@ -3092,7 +3094,7 @@ class sim:
                stdout = ""
                dryarg = ""
                fluidarg = ""
       -        exclusivearg = ""
       +        devicearg = ""
                valgrindbin = ""
                cudamemchk = ""
                binary = "sphere"
       t@@ -3108,11 +3110,11 @@ class sim:
                    cudamemchk = "cuda-memcheck --leak-check full "
                if (self.fluid == True):
                    fluidarg = "--fluid "
       -        if (exclusive_mode == True):
       -            exclusivearg = "--exclusive "
       +        if (device != -1):
       +            devicearg = "-d " + str(device) + " "
        
                cmd = "cd ..; " + valgrindbin + cudamemchk + "./" + binary + " " \
       -                + quiet + dryarg + fluidarg + exclusivearg + \
       +                + quiet + dryarg + fluidarg + devicearg + \
                        "input/" + self.sid + ".bin " + stdout
                #print(cmd)
                status = subprocess.call(cmd, shell=True)
 (DIR) diff --git a/src/device.cu b/src/device.cu
       t@@ -83,41 +83,55 @@ __host__ void DEM::initializeGPU(void)
            }
        
            // Loop through GPU's and choose the one with the most CUDA cores
       -    int ncudacores;
       -    int max_ncudacores = 0;
       -    for (int d=0; d<ndevices; d++) {
       -        cudaGetDeviceProperties(&prop, d);
       +    if (device == -1) {
       +        int ncudacores;
       +        int max_ncudacores = 0;
       +        for (int d=0; d<ndevices; d++) {
       +            cudaGetDeviceProperties(&prop, d);
       +            cudaDriverGetVersion(&cudaDriverVersion);
       +            cudaRuntimeGetVersion(&cudaRuntimeVersion);
       +
       +            ncudacores = prop.multiProcessorCount
       +                *cudaCoresPerSM(prop.major, prop.minor);
       +            if (ncudacores > max_ncudacores) {
       +                max_ncudacores = ncudacores;
       +                cudadevice = d;
       +            }
       +
       +            if (verbose == 1) {
       +                cout << "  CUDA device ID: " << d << "\n";
       +                cout << "  - Name: " <<  prop.name << ", compute capability: " 
       +                     << prop.major << "." << prop.minor << ".\n";
       +                cout << "  - CUDA Driver version: " << cudaDriverVersion/1000 
       +                     << "." <<  cudaDriverVersion%100 
       +                     << ", runtime version " << cudaRuntimeVersion/1000 << "." 
       +                     << cudaRuntimeVersion%100 << std::endl;
       +            }
       +        }
       +
       +        device = cudadevice; // store in DEM class
       +    } else {
       +
       +        cudaGetDeviceProperties(&prop, device);
                cudaDriverGetVersion(&cudaDriverVersion);
                cudaRuntimeGetVersion(&cudaRuntimeVersion);
        
       -        ncudacores = prop.multiProcessorCount
       +        int ncudacores = prop.multiProcessorCount
                    *cudaCoresPerSM(prop.major, prop.minor);
       -        if (ncudacores > max_ncudacores) {
       -            max_ncudacores = ncudacores;
       -            cudadevice = d;
       -        }
        
                if (verbose == 1) {
       -            cout << "  CUDA device ID: " << d << "\n";
       +            cout << "  CUDA device ID: " << device << "\n";
                    cout << "  - Name: " <<  prop.name << ", compute capability: " 
       -                << prop.major << "." << prop.minor << ".\n";
       +                 << prop.major << "." << prop.minor << ".\n";
                    cout << "  - CUDA Driver version: " << cudaDriverVersion/1000 
       -                << "." <<  cudaDriverVersion%100 
       -                << ", runtime version " << cudaRuntimeVersion/1000 << "." 
       -                << cudaRuntimeVersion%100 << std::endl;
       +                 << "." <<  cudaDriverVersion%100 
       +                 << ", runtime version " << cudaRuntimeVersion/1000 << "." 
       +                 << cudaRuntimeVersion%100
       +                 << "\n  - " << ncudacores << " CUDA cores" << std::endl;
                }
            }
        
       -    device = cudadevice; // store in DEM class
       -
       -    // Only call cudaChooseDevice if the exlusive mode flag isn't set
       -    if (exclusive_mode != 1) {
       -        if (verbose == 1) {
       -            cout << " Using CUDA device ID " << cudadevice << " with "
       -                 << max_ncudacores << " cores." << std::endl;
       -        }
       -        cudaChooseDevice(&cudadevice, &prop);
       -    }
       +    cudaChooseDevice(&device, &prop);
        
            checkForCudaErrors("While initializing CUDA device");
        }
 (DIR) diff --git a/src/main.cpp b/src/main.cpp
       t@@ -37,7 +37,7 @@ int main(const int argc, const char *argv[])
            float max_val = 0.0f;     // max value of colorbar
            float lower_cutoff = 0.0f;// lower cutoff, particles below won't be rendered
            int fluid = 0;
       -    int exclusive_mode = 0;   // system GPUs are running on exclusive mode
       +    int device = -1; // -1 run on device with most cores, 0+ run on specified device
        
            // Process input parameters
            int i;
       t@@ -53,8 +53,7 @@ int main(const int argc, const char *argv[])
                        "-h, --help\t\tprint help\n"
                        "-V, --version\t\tprint version information and exit\n"
                        "-q, --quiet\t\tsuppress status messages to stdout\n"
       -                "-e, --exclusive\t\tset this flag for systems containing\n"
       -                "               \t\tonly exclusive-mode GPUs\n"
       +                "-d <device>\t\texecute on device with specified id\n"
                        "-n, --dry\t\tshow key experiment parameters and quit\n"
                        "-f, --fluid\t\tsimulate fluid between particles\n"
                        "-r, --render\t\trender input files to images instead of\n"
       t@@ -107,8 +106,10 @@ int main(const int argc, const char *argv[])
                else if (argvi == "-f" || argvi == "--fluid")
                    fluid = 1;
        
       -        else if (argvi == "-e" || argvi == "--exclusive")
       -            exclusive_mode = 1;
       +        else if (argvi == "-d") {
       +            device = atoi(argv[i+1]);
       +            i++; // skip ahead
       +        }
        
                else if (argvi == "-m" || argvi == "--method") {
        
       t@@ -145,7 +146,7 @@ int main(const int argc, const char *argv[])
                            i += 2; // skip ahead
                        }
                    } else {
       -                i += 1;
       +                i++;
                    }
                }
        
 (DIR) diff --git a/src/sphere.cpp b/src/sphere.cpp
       t@@ -20,8 +20,8 @@ DEM::DEM(const std::string inputbin,
                 const int initCuda,
                 const int transferConstMem,
                 const int fluidFlow,
       -         const int exclusive)
       -: verbose(verbosity), navierstokes(fluidFlow), exclusive_mode(exclusive)
       +         const int device)
       +: verbose(verbosity), navierstokes(fluidFlow), device(device)
        {
            using std::cout;
            using std::cerr;
 (DIR) diff --git a/src/sphere.h b/src/sphere.h
       t@@ -57,7 +57,6 @@ class DEM {
                int ndevices;     // number of CUDA GPUs
                int device;       // primary GPU
                int* domain_size; // elements per GPU
       -        int exclusive_mode; // devices are running in exclusive mode (1)
        
        
                // DEVICE ARRAYS