Compare commits
1 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
af52d2850c |
@@ -80,21 +80,13 @@ if(ENABLE_CUDA_BACKEND)
|
|||||||
set(CMAKE_CUDA_ARCHITECTURES 20 21 30 32 35 37 50 52 53 60 61 62)
|
set(CMAKE_CUDA_ARCHITECTURES 20 21 30 32 35 37 50 52 53 60 61 62)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
# https://docs.nvidia.com/cuda/cuda-samples/index.html#new-features-in-cuda-toolkit-11-6
|
|
||||||
# Not sure about this. Why the heck did they change this?
|
|
||||||
if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL "11.6")
|
|
||||||
set(CUDA_SAMPLES_PATH ${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}/samples/Common)
|
|
||||||
else()
|
|
||||||
set(CUDA_SAMPLES_PATH ${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}/samples/common/inc)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
add_library(cuda_backend STATIC ${CUDA_DIR}/cuda.cpp ${CUDA_DIR}/uarch.cpp ${CUDA_DIR}/pci.cpp)
|
add_library(cuda_backend STATIC ${CUDA_DIR}/cuda.cpp ${CUDA_DIR}/uarch.cpp ${CUDA_DIR}/pci.cpp)
|
||||||
|
|
||||||
if(NOT ${PCIUTILS_FOUND})
|
if(NOT ${PCIUTILS_FOUND})
|
||||||
add_dependencies(cuda_backend pciutils)
|
add_dependencies(cuda_backend pciutils)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
target_include_directories(cuda_backend PUBLIC ${CUDA_SAMPLES_PATH} ${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}/targets/x86_64-linux/include)
|
target_include_directories(cuda_backend PUBLIC ${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}/targets/x86_64-linux/include)
|
||||||
|
|
||||||
target_link_libraries(cuda_backend PRIVATE cudart)
|
target_link_libraries(cuda_backend PRIVATE cudart)
|
||||||
target_link_libraries(gpufetch cuda_backend)
|
target_link_libraries(gpufetch cuda_backend)
|
||||||
|
|||||||
@@ -8,7 +8,7 @@
|
|||||||
#include "../cuda/cuda.hpp"
|
#include "../cuda/cuda.hpp"
|
||||||
#include "../cuda/uarch.hpp"
|
#include "../cuda/uarch.hpp"
|
||||||
|
|
||||||
static const char* VERSION = "0.23";
|
static const char* VERSION = "0.24";
|
||||||
|
|
||||||
void print_help(char *argv[]) {
|
void print_help(char *argv[]) {
|
||||||
const char **t = args_str;
|
const char **t = args_str;
|
||||||
|
|||||||
@@ -1,8 +1,11 @@
|
|||||||
#include <helper_cuda.h>
|
|
||||||
#include <cuda_runtime.h>
|
#include <cuda_runtime.h>
|
||||||
|
#include <cstring>
|
||||||
|
#include <cstdlib>
|
||||||
|
#include <cstdio>
|
||||||
|
|
||||||
#include "cuda.hpp"
|
#include "cuda.hpp"
|
||||||
#include "uarch.hpp"
|
#include "uarch.hpp"
|
||||||
|
#include "gpufetch_helper_cuda.hpp"
|
||||||
#include "../common/pci.hpp"
|
#include "../common/pci.hpp"
|
||||||
#include "../common/global.hpp"
|
#include "../common/global.hpp"
|
||||||
#include "../common/uarch.hpp"
|
#include "../common/uarch.hpp"
|
||||||
|
|||||||
60
src/cuda/gpufetch_helper_cuda.hpp
Normal file
60
src/cuda/gpufetch_helper_cuda.hpp
Normal file
@@ -0,0 +1,60 @@
|
|||||||
|
#ifndef __GPUFETCH_HELPER_CUDA__
|
||||||
|
#define __GPUFETCH_HELPER_CUDA__
|
||||||
|
|
||||||
|
// gpufetch self contained helper_cuda.h
|
||||||
|
//
|
||||||
|
// Avoids relying on helper_cuda.h, which is
|
||||||
|
// often very hard to include properly, causing
|
||||||
|
// compilation issues.
|
||||||
|
//
|
||||||
|
// URL: https://github.com/NVIDIA/cuda-samples
|
||||||
|
// Commit: 2e41896
|
||||||
|
|
||||||
|
inline int _ConvertSMVer2Cores(int major, int minor) {
|
||||||
|
// Defines for GPU Architecture types (using the SM version to determine
|
||||||
|
// the # of cores per SM
|
||||||
|
typedef struct {
|
||||||
|
int SM; // 0xMm (hexidecimal notation), M = SM Major version,
|
||||||
|
// and m = SM minor version
|
||||||
|
int Cores;
|
||||||
|
} sSMtoCores;
|
||||||
|
|
||||||
|
sSMtoCores nGpuArchCoresPerSM[] = {
|
||||||
|
{0x30, 192},
|
||||||
|
{0x32, 192},
|
||||||
|
{0x35, 192},
|
||||||
|
{0x37, 192},
|
||||||
|
{0x50, 128},
|
||||||
|
{0x52, 128},
|
||||||
|
{0x53, 128},
|
||||||
|
{0x60, 64},
|
||||||
|
{0x61, 128},
|
||||||
|
{0x62, 128},
|
||||||
|
{0x70, 64},
|
||||||
|
{0x72, 64},
|
||||||
|
{0x75, 64},
|
||||||
|
{0x80, 64},
|
||||||
|
{0x86, 128},
|
||||||
|
{0x87, 128},
|
||||||
|
{-1, -1}};
|
||||||
|
|
||||||
|
int index = 0;
|
||||||
|
|
||||||
|
while (nGpuArchCoresPerSM[index].SM != -1) {
|
||||||
|
if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor)) {
|
||||||
|
return nGpuArchCoresPerSM[index].Cores;
|
||||||
|
}
|
||||||
|
|
||||||
|
index++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we don't find the values, we default use the previous one
|
||||||
|
// to run properly
|
||||||
|
printf(
|
||||||
|
"MapSMtoCores for SM %d.%d is undefined."
|
||||||
|
" Default to use %d Cores/SM\n",
|
||||||
|
major, minor, nGpuArchCoresPerSM[index - 1].Cores);
|
||||||
|
return nGpuArchCoresPerSM[index - 1].Cores;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
@@ -1,7 +1,9 @@
|
|||||||
#include <cuda_runtime.h>
|
#include <cuda_runtime.h>
|
||||||
#include <helper_cuda.h>
|
#include <cstdlib>
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
#include <cstddef>
|
#include <cstddef>
|
||||||
|
#include <cstdio>
|
||||||
|
#include <cstring>
|
||||||
|
|
||||||
#include "../common/uarch.hpp"
|
#include "../common/uarch.hpp"
|
||||||
#include "../common/global.hpp"
|
#include "../common/global.hpp"
|
||||||
@@ -329,6 +331,7 @@ char* get_str_chip(struct uarch* arch) {
|
|||||||
return arch->chip_str;
|
return arch->chip_str;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: What about _ConvertSMVer2ArchName?
|
||||||
const char* get_str_uarch_cuda(struct uarch* arch) {
|
const char* get_str_uarch_cuda(struct uarch* arch) {
|
||||||
return uarch_str[arch->uarch];
|
return uarch_str[arch->uarch];
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user