#ifndef __GPUFETCH_HELPER_CUDA__ #define __GPUFETCH_HELPER_CUDA__ // gpufetch self contained helper_cuda.h // // Avoids relying on helper_cuda.h, which is // often very hard to include properly, causing // compilation issues. // // URL: https://github.com/NVIDIA/cuda-samples // Commit: 8199209 inline int _ConvertSMVer2Cores(int major, int minor) { // Defines for GPU Architecture types (using the SM version to determine // the # of cores per SM typedef struct { int SM; // 0xMm (hexidecimal notation), M = SM Major version, // and m = SM minor version int Cores; } sSMtoCores; sSMtoCores nGpuArchCoresPerSM[] = { {0x30, 192}, {0x32, 192}, {0x35, 192}, {0x37, 192}, {0x50, 128}, {0x52, 128}, {0x53, 128}, {0x60, 64}, {0x61, 128}, {0x62, 128}, {0x70, 64}, {0x72, 64}, {0x75, 64}, {0x80, 64}, {0x86, 128}, {0x87, 128}, // I added this one because it was missing in original cuda-samples... {0x89, 128}, {0x90, 128}, {-1, -1}}; int index = 0; while (nGpuArchCoresPerSM[index].SM != -1) { if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor)) { return nGpuArchCoresPerSM[index].Cores; } index++; } // If we don't find the values, we default use the previous one // to run properly printf( "MapSMtoCores for SM %d.%d is undefined." " Default to use %d Cores/SM\n", major, minor, nGpuArchCoresPerSM[index - 1].Cores); return nGpuArchCoresPerSM[index - 1].Cores; } #endif