tscrub.cu - cuda-memscrub - scrubs the global device memory of CUDA GPUs
 (HTM) git clone git://src.adamsgaard.dk/cuda-memscrub
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
       tscrub.cu (4333B)
       ---
            1 #include <stdio.h>
            2 #include <string.h>
            3 //#include <cuda.h>
            4 //#include <cutil.h>
            5 #include "utility.cuh"
            6 
            7 #define VERSION "0.1"
            8 #define VALUETOWRITE 1234
            9 #define MEMCHUNKS 19
           10 
           11 __global__ void write_value(int* d_mem, long unsigned int n_ints,
           12         unsigned int nx, unsigned int ny)
           13 {
           14     // 3d thread index
           15     unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
           16     unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
           17     unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
           18 
           19     // 1d thread index
           20     long unsigned int idx = x + nx*y + nx*ny*z;
           21 
           22     if (idx < n_ints) {
           23         __syncthreads();
           24         d_mem[idx] = VALUETOWRITE;
           25     }
           26 }
           27 
           28 int main(int argc, char** argv)
           29 {
           30     int target_device = 0;
           31 
           32     if (argc == 2 &&
           33             (strcmp(argv[1], "-v") == 0 || strcmp(argv[1], "--version") == 0)) {
           34         printf("CUDA memory scrubber, version %s\n", VERSION);
           35         printf("License GPLv3+: GNU GPL version 3 or later "
           36                 "http://gnu.org/licenses/gpl.html\n"
           37                 "There is NO WARRANTY, to the extent permitted by law.\n"
           38                 "Written by Anders Damsgaard <andersd@riseup.net>\n"
           39                 "Maintained at "
           40                 "https://github.com/anders-dc/cuda-memscrub\n");
           41         exit(0);
           42     } else if (argc == 3 &&
           43             (strcmp(argv[1], "-d") == 0 || strcmp(argv[1], "--device") == 0)) {
           44         target_device = atoi(argv[2]);
           45     } else if (argc == 2 &&
           46             (strcmp(argv[1], "-h") == 0 || strcmp(argv[1], "--help") == 0)) {
           47         printf("CUDA memory scrubber. Usage:\n"
           48                 " %s [OPTIONS]\n", argv[0]);
           49         printf("Options:\n"
           50                 "\t-h, --help\t\tshow this information\n"
           51                 "\t-v, --version\t\tshow version information\n"
           52                 "\t-d <n>, --device <n>\tscrub device with index n\n");
           53         exit(0);
           54     } else if (argc > 1) {
           55         fprintf(stderr, "argument not understood. See %s for usage "
           56                 "information\n", argv[0]);
           57         exit(EXIT_FAILURE);
           58     }
           59 
           60     checkForCudaErrors("Before initializing CUDA device");
           61 
           62     int device_count;
           63     cudaGetDeviceCount(&device_count);
           64     cudaDeviceProp prop;
           65 
           66     if (device_count == 0) {
           67         fprintf(stderr, "Error: No CUDA-enabled devices available. Bye.\n");
           68         exit(EXIT_FAILURE);
           69     } else if (target_device >= device_count) {
           70         fprintf(stderr, "Error: No CUDA-enabled device by id %d is detected.\n",
           71                 target_device);
           72         exit(EXIT_FAILURE);
           73     } else {
           74         cudaGetDeviceProperties(&prop, target_device);
           75         printf("target: device %d, %s\n", target_device, prop.name);
           76         cudaChooseDevice(&target_device, &prop);
           77     }
           78 
           79     size_t mem_size = prop.totalGlobalMem;
           80     printf("global memory size: %lu bytes\n", mem_size);
           81 
           82     long unsigned int n_ints = mem_size/sizeof(int)/(MEMCHUNKS+1);
           83     printf("overwriting the first %ld bytes, corresponding to %ld int values "
           84             "or the first %.1f%% of the global device memory.\n",
           85             n_ints*sizeof(int)*MEMCHUNKS, n_ints*MEMCHUNKS,
           86             (float)100*n_ints*sizeof(int)*MEMCHUNKS/prop.totalGlobalMem);
           87 
           88     int* d[MEMCHUNKS];  // array of device pointers
           89     int i;
           90     for (i=0; i<MEMCHUNKS; i++) {
           91         if (cudaMalloc((void**)&d[i], n_ints*sizeof(int))
           92                     == cudaErrorMemoryAllocation) {
           93             fprintf(stderr, "Error: Could not allocate the requested amount of "
           94                     "global memory on the device.\n");
           95             cudaDeviceReset();
           96             exit(EXIT_FAILURE);
           97         }
           98     }
           99     checkForCudaErrors("After memory allocation");
          100 
          101     dim3 dimBlock(prop.maxThreadsPerBlock, 1, 1);
          102     unsigned int grid_size = iDivUp(n_ints, prop.maxThreadsPerBlock);
          103     if (grid_size > prop.maxGridSize[0]) {
          104         fprintf(stderr, "Error: The device cannot handle a grid large enough to"
          105                 " handle the array size\n");
          106         cudaDeviceReset();
          107         exit(EXIT_FAILURE);
          108     }
          109     dim3 dimGrid(grid_size, 1, 1);
          110 
          111     for (i=0; i<MEMCHUNKS; i++) {
          112         write_value<<<dimGrid, dimBlock>>>(d[i], n_ints, 1, 1);
          113         cudaThreadSynchronize();
          114         checkForCudaErrors("After write_value", i);
          115     }
          116 
          117 
          118     for (i=0; i<MEMCHUNKS; i++) {
          119         cudaFree(d[i]);
          120         checkForCudaErrors("After cudaFree(d[i])", i);
          121     }
          122 
          123     cudaDeviceReset();
          124 
          125     return 0;
          126 }