From 981daef728029e34f823832c3d45d3694fc8fcf2 Mon Sep 17 00:00:00 2001 From: Dr-Noob Date: Fri, 13 Aug 2021 16:36:10 +0200 Subject: [PATCH] [v0.01] Retrieve more info; frequency, topology and peak performance --- src/common/gpu.cpp | 51 ++++++++++++++++++++++++++++++++++++++++++--- src/common/gpu.hpp | 2 ++ src/common/main.cpp | 6 +++--- src/cuda/cuda.cpp | 24 ++++++++++++--------- src/cuda/cuda.hpp | 6 +++--- src/cuda/nvmlb.cpp | 2 +- 6 files changed, 71 insertions(+), 20 deletions(-) diff --git a/src/common/gpu.cpp b/src/common/gpu.cpp index e6cf603..4dbbee0 100644 --- a/src/common/gpu.cpp +++ b/src/common/gpu.cpp @@ -1,12 +1,37 @@ -#include "gpu.hpp" #include +#include +#include +#include + +#include "../common/global.hpp" +#include "gpu.hpp" + +#define STRING_YES "Yes" +#define STRING_NO "No" +#define STRING_NONE "None" +#define STRING_MEGAHERZ "MHz" +#define STRING_GIGAHERZ "GHz" +#define STRING_KILOBYTES "KB" +#define STRING_MEGABYTES "MB" char* get_str_gpu_name(struct gpu_info* gpu) { return gpu->name; } char* get_str_freq(struct gpu_info* gpu) { - return NULL; + // Max 5 digits and 3 for '(M/G)Hz' + uint32_t size = (5+1+3+1); + assert(strlen(STRING_UNKNOWN)+1 <= size); + char* string = (char *) ecalloc(size, sizeof(char)); + + if(gpu->freq == UNKNOWN_FREQ || gpu->freq < 0) + snprintf(string,strlen(STRING_UNKNOWN)+1, STRING_UNKNOWN); + else if(gpu->freq >= 1000) + snprintf(string,size,"%.3f "STRING_GIGAHERZ, (float)(gpu->freq)/1000); + else + snprintf(string,size,"%.3f "STRING_MEGAHERZ, (float)gpu->freq); + + return string; } char* get_str_memory_size(struct gpu_info* gpu) { @@ -26,5 +51,25 @@ char* get_str_l2(struct gpu_info* gpu) { } char* get_str_peak_performance(struct gpu_info* gpu) { - return NULL; + char* str; + + if(gpu->peak_performance == -1) { + str = (char *) emalloc(sizeof(char) * (strlen(STRING_UNKNOWN) + 1)); + strncpy(str, STRING_UNKNOWN, strlen(STRING_UNKNOWN) + 1); + return str; + } + + // 7 for digits (e.g, XXXX.XX), 7 for XFLOP/s + double flopsd = (double) gpu->peak_performance; + uint32_t max_size = 7+1+7+1; + str = (char *) ecalloc(max_size, sizeof(char)); + + if(flopsd >= (double)1000000000000.0) + snprintf(str, max_size, "%.2f TFLOP/s", flopsd/1000000000000); + else if(flopsd >= 1000000000.0) + snprintf(str, max_size, "%.2f GFLOP/s", flopsd/1000000000); + else + snprintf(str, max_size, "%.2f MFLOP/s", flopsd/1000000); + + return str; } diff --git a/src/common/gpu.hpp b/src/common/gpu.hpp index 9cf1e7a..89ea9ec 100644 --- a/src/common/gpu.hpp +++ b/src/common/gpu.hpp @@ -7,6 +7,8 @@ #include "../cuda/nvmlb.hpp" #include "../cuda/pci.hpp" +#define UNKNOWN_FREQ -1 + enum { GPU_VENDOR_NVIDIA }; diff --git a/src/common/main.cpp b/src/common/main.cpp index 5908285..6609162 100644 --- a/src/common/main.cpp +++ b/src/common/main.cpp @@ -59,9 +59,9 @@ int main(int argc, char* argv[]) { printf("Compute Capability: %s\n", get_str_cc(gpu->arch)); printf("Technology: %s\n", get_str_process(gpu->arch)); printf("Max Frequency: %s\n", get_str_freq(gpu)); - printf("SM: %s\n", get_str_sm(gpu)); - printf("Cores/MP: %s\n", get_str_cores_sm(gpu)); - printf("CUDA cores: %s\n", get_str_cuda_cores(gpu)); + printf("SM: %d\n", get_str_sm(gpu)); + printf("Cores/MP: %d\n", get_str_cores_sm(gpu)); + printf("CUDA cores: %d\n", get_str_cuda_cores(gpu)); printf("Memory size: %s\n", get_str_memory_size(gpu)); printf("Memory type: %s\n", get_str_memory_type(gpu)); printf("L1 size: %s\n", get_str_l1(gpu)); diff --git a/src/cuda/cuda.cpp b/src/cuda/cuda.cpp index d194df2..5e59e5a 100644 --- a/src/cuda/cuda.cpp +++ b/src/cuda/cuda.cpp @@ -12,9 +12,13 @@ struct cache* get_cache_info(struct gpu_info* gpu) { return cach; } -struct topology* get_topology_info(struct gpu_info* gpu) { +struct topology* get_topology_info(struct gpu_info* gpu, cudaDeviceProp prop) { struct topology* topo = (struct topology*) emalloc(sizeof(struct topology)); + topo->streaming_mp = prop.multiProcessorCount; + topo->cores_per_mp = _ConvertSMVer2Cores(prop.major, prop.minor); + topo->cuda_cores = topo->streaming_mp * topo->cores_per_mp; + return topo; } @@ -25,7 +29,7 @@ struct memory* get_memory_info(struct gpu_info* gpu) { } int64_t get_peak_performance(struct gpu_info* gpu) { - return 1000; + return gpu->freq * 1000000 * gpu->topo->cuda_cores * 2; } struct gpu_info* get_gpu_info() { @@ -38,10 +42,10 @@ struct gpu_info* get_gpu_info() { cudaDeviceProp deviceProp; cudaGetDeviceProperties(&deviceProp, dev); + gpu->freq = deviceProp.clockRate * 1e-3f; gpu->vendor = GPU_VENDOR_NVIDIA; gpu->name = (char *) emalloc(sizeof(char) * (strlen(deviceProp.name) + 1)); strcpy(gpu->name, deviceProp.name); - gpu->freq = 10000; gpu->nvmld = nvml_init(); if(nvml_get_pci_info(dev, gpu->nvmld)) { @@ -50,21 +54,21 @@ struct gpu_info* get_gpu_info() { gpu->arch = get_uarch_from_cuda(gpu); gpu->cach = get_cache_info(gpu); - gpu->topo = get_topology_info(gpu); + gpu->topo = get_topology_info(gpu, deviceProp); gpu->peak_performance = get_peak_performance(gpu); return gpu; } -char* get_str_sm(struct gpu_info* gpu) { - return NULL; +int32_t get_str_sm(struct gpu_info* gpu) { + return gpu->topo->streaming_mp; } -char* get_str_cores_sm(struct gpu_info* gpu) { - return NULL; +int32_t get_str_cores_sm(struct gpu_info* gpu) { + return gpu->topo->cores_per_mp; } -char* get_str_cuda_cores(struct gpu_info* gpu) { - return NULL; +int32_t get_str_cuda_cores(struct gpu_info* gpu) { + return gpu->topo->cuda_cores; } diff --git a/src/cuda/cuda.hpp b/src/cuda/cuda.hpp index 72ea92a..f77329e 100644 --- a/src/cuda/cuda.hpp +++ b/src/cuda/cuda.hpp @@ -4,8 +4,8 @@ #include "../common/gpu.hpp" struct gpu_info* get_gpu_info(); -char* get_str_sm(struct gpu_info* gpu); -char* get_str_cores_sm(struct gpu_info* gpu); -char* get_str_cuda_cores(struct gpu_info* gpu); +int32_t get_str_sm(struct gpu_info* gpu); +int32_t get_str_cores_sm(struct gpu_info* gpu); +int32_t get_str_cuda_cores(struct gpu_info* gpu); #endif diff --git a/src/cuda/nvmlb.cpp b/src/cuda/nvmlb.cpp index 1f566ea..357914c 100644 --- a/src/cuda/nvmlb.cpp +++ b/src/cuda/nvmlb.cpp @@ -37,7 +37,7 @@ bool nvml_get_pci_info(int dev, struct nvml_data* data) { return false; } - if ((result = nvmlDeviceGetPciInfo(device, &data->pci)) != result) { + if ((result = nvmlDeviceGetPciInfo(device, &data->pci)) != NVML_SUCCESS) { printErr("nvmlDeviceGetPciInfo: %s\n", nvmlErrorString(result)); return false; }