diff --git a/src/common/gpu.hpp b/src/common/gpu.hpp index dd26dc0..c02bdaf 100644 --- a/src/common/gpu.hpp +++ b/src/common/gpu.hpp @@ -27,7 +27,7 @@ struct cache { }; struct topology { - int32_t shared_mp; + int32_t streaming_mp; int32_t cores_per_mp; int32_t cuda_cores; }; diff --git a/src/common/main.cpp b/src/common/main.cpp index fd40e7f..5fe60de 100644 --- a/src/common/main.cpp +++ b/src/common/main.cpp @@ -54,9 +54,9 @@ int main(int argc, char* argv[]) { return EXIT_FAILURE; printf("Name: %s\n", get_str_gpu_name(gpu)); - printf("Microarchitecture: %s\n", get_str_uarch(gpu)); - printf("Compute Capability: %s\n", get_str_cc(gpu)); - printf("Technology: %s\n", get_str_process(gpu)); + printf("Microarchitecture: %s\n", get_str_uarch(gpu->arch)); + printf("Compute Capability: %s\n", get_str_cc(gpu->arch)); + printf("Technology: %s\n", get_str_process(gpu->arch)); printf("Max Frequency: %s\n", get_str_freq(gpu)); printf("SM: %s\n", get_str_sm(gpu)); printf("Cores/MP: %s\n", get_str_cores_sm(gpu)); diff --git a/src/cuda/uarch.cpp b/src/cuda/uarch.cpp index d5bb45a..97bc5ab 100644 --- a/src/cuda/uarch.cpp +++ b/src/cuda/uarch.cpp @@ -1,7 +1,12 @@ +#include +#include #include #include +#include "../common/global.hpp" + typedef uint32_t MICROARCH; +typedef uint32_t GPUCHIP; // Data not available #define NA -1 @@ -11,28 +16,120 @@ typedef uint32_t MICROARCH; enum { UARCH_UNKNOWN, - UARCH_TESLA + UARCH_TESLA, + UARCH_FERMI, + UARCH_KEPLER, + UARCH_MAXWELL, + UARCH_PASCAL, + UARCH_VOLTA, + UARCH_TURING, + UARCH_AMPERE, +}; + +// TODO +enum { + CHIP_GA100, + CHIP_GA102, + CHIP_GA104, + CHIP_GA106, + CHIP_GA107 +}; + +static const char *uarch_str[] = { + /*[ARCH_UNKNOWN = */ STRING_UNKNOWN, + /*[ARCH_TESLA] = */ "Tesla", + /*[ARCH_FERMI] = */ "Fermi", + /*[ARCH_KEPLER] = */ "Kepler", + /*[ARCH_MAXWELL] = */ "Maxwell", + /*[ARCH_PASCAL] = */ "Pascal", + /*[ARCH_VOLTA] = */ "Volta", + /*[ARCH_TURING] = */ "Turing", + /*[ARCH_AMPERE] = */ "Ampere", }; struct uarch { + int32_t cc_major; + int32_t cc_minor; + int32_t compute_capability; MICROARCH uarch; + GPUCHIP chip; char* uarch_str; int32_t process; }; +void map_cc_to_uarch(struct uarch* arch) { + switch(arch->compute_capability) { + case 10: + case 11: + case 12: + case 13: + arch->uarch = UARCH_TESLA; + break; + case 20: + case 21: + arch->uarch = UARCH_FERMI; + break; + case 30: + case 32: + case 35: + case 37: + arch->uarch = UARCH_KEPLER; + break; + case 50: + case 52: + case 53: + arch->uarch = UARCH_MAXWELL; + break; + case 60: + case 61: + case 62: + arch->uarch = UARCH_PASCAL; + break; + case 70: + case 72: + arch->uarch = UARCH_VOLTA; + break; + case 75: + arch->uarch = UARCH_TURING; + break; + case 80: + case 86: + arch->uarch = UARCH_AMPERE; + break; + default: + arch->uarch = UARCH_UNKNOWN; + printErr("Invalid uarch: %d.%d\n", arch->cc_major, arch->cc_minor); + } +} + struct uarch* get_uarch_from_cuda(struct gpu_info* gpu) { - return NULL; + struct uarch* arch = (struct uarch*) emalloc(sizeof(struct uarch)); + + int dev = 0; + cudaDeviceProp deviceProp; + cudaGetDeviceProperties(&deviceProp, dev); + + arch->cc_major = deviceProp.major; + arch->cc_minor = deviceProp.minor; + arch->compute_capability = deviceProp.major * 10 + deviceProp.minor; + + map_cc_to_uarch(arch); + + return arch; } -char* get_str_uarch(struct gpu_info* gpu) { - return NULL; +const char* get_str_uarch(struct uarch* arch) { + return uarch_str[arch->uarch]; } -char* get_str_cc(struct gpu_info* gpu) { - return NULL; +char* get_str_cc(struct uarch* arch) { + uint32_t max_size = 4; + char* cc = (char *) ecalloc(max_size, sizeof(char)); + snprintf(cc, max_size, "%d.%d", arch->cc_major, arch->cc_minor); + return cc; } -char* get_str_process(struct gpu_info* gpu) { +char* get_str_process(struct uarch* arch) { return NULL; } diff --git a/src/cuda/uarch.hpp b/src/cuda/uarch.hpp index a81753d..81d75fb 100644 --- a/src/cuda/uarch.hpp +++ b/src/cuda/uarch.hpp @@ -4,9 +4,9 @@ struct uarch; struct uarch* get_uarch_from_cuda(struct gpu_info* gpu); -char* get_str_uarch(struct gpu_info* gpu); -char* get_str_cc(struct gpu_info* gpu); -char* get_str_process(struct gpu_info* gpu); +char* get_str_uarch(struct uarch* arch); +char* get_str_cc(struct uarch* arch); +char* get_str_process(struct uarch* arch); void free_uarch_struct(struct uarch* arch); #endif