From 821b6e760ed87ba2aa9dbec4f89ce6ccdd607314 Mon Sep 17 00:00:00 2001 From: Dr-Noob Date: Tue, 23 Nov 2021 18:09:13 +0100 Subject: [PATCH] [v0.10] Add support for displaying the number of tensor cores --- src/common/gpu.hpp | 1 + src/common/printer.cpp | 6 ++++++ src/cuda/cuda.cpp | 14 ++++++++++++++ src/cuda/cuda.hpp | 1 + 4 files changed, 22 insertions(+) diff --git a/src/common/gpu.hpp b/src/common/gpu.hpp index 329f1fd..9632ffb 100644 --- a/src/common/gpu.hpp +++ b/src/common/gpu.hpp @@ -40,6 +40,7 @@ struct topology { int32_t streaming_mp; int32_t cores_per_mp; int32_t cuda_cores; + int32_t tensor_cores; }; struct memory { diff --git a/src/common/printer.cpp b/src/common/printer.cpp index bee156f..a84ebb8 100644 --- a/src/common/printer.cpp +++ b/src/common/printer.cpp @@ -38,6 +38,7 @@ enum { ATTRIBUTE_STREAMINGMP, ATTRIBUTE_CORESPERMP, ATTRIBUTE_CUDA_CORES, + ATTRIBUTE_TENSOR_CORES, ATTRIBUTE_L2, ATTRIBUTE_MEMORY, ATTRIBUTE_MEMORY_FREQ, @@ -54,6 +55,7 @@ static const char* ATTRIBUTE_FIELDS [] = { "SMs:", "Cores/SM:", "CUDA cores:", + "Tensor cores:", "L2 Size:", "Memory:", "Memory frequency:", @@ -350,6 +352,7 @@ bool print_gpufetch_cuda(struct gpu_info* gpu, STYLE s, struct color** cs, struc char* sms = get_str_sm(gpu); char* corespersm = get_str_cores_sm(gpu); char* cores = get_str_cuda_cores(gpu); + char* tensorc = get_str_tensor_cores(gpu); char* max_frequency = get_str_freq(gpu); char* l2 = get_str_l2(gpu); char* mem_size = get_str_memory_size(gpu); @@ -372,6 +375,9 @@ bool print_gpufetch_cuda(struct gpu_info* gpu, STYLE s, struct color** cs, struc setAttribute(art, ATTRIBUTE_STREAMINGMP, sms); setAttribute(art, ATTRIBUTE_CORESPERMP, corespersm); setAttribute(art, ATTRIBUTE_CUDA_CORES, cores); + if(gpu->topo->tensor_cores >= 0) { + setAttribute(art, ATTRIBUTE_TENSOR_CORES, tensorc); + } setAttribute(art, ATTRIBUTE_MEMORY, mem); setAttribute(art, ATTRIBUTE_MEMORY_FREQ, mem_freq); setAttribute(art, ATTRIBUTE_BUS_WIDTH, bus_width); diff --git a/src/cuda/cuda.cpp b/src/cuda/cuda.cpp index 698e05a..b39b109 100644 --- a/src/cuda/cuda.cpp +++ b/src/cuda/cuda.cpp @@ -53,12 +53,19 @@ struct cache* get_cache_info(cudaDeviceProp prop) { return cach; } +int get_tensor_cores(int sm, int major) { + if(major == 7) return sm * 8; + else if(major == 8) return sm * 4; + else return 0; +} + struct topology* get_topology_info(cudaDeviceProp prop) { struct topology* topo = (struct topology*) emalloc(sizeof(struct topology)); topo->streaming_mp = prop.multiProcessorCount; topo->cores_per_mp = _ConvertSMVer2Cores(prop.major, prop.minor); topo->cuda_cores = topo->streaming_mp * topo->cores_per_mp; + topo->tensor_cores = get_tensor_cores(topo->streaming_mp, prop.major); return topo; } @@ -174,3 +181,10 @@ char* get_str_cuda_cores(struct gpu_info* gpu) { return dummy; } +char* get_str_tensor_cores(struct gpu_info* gpu) { + uint32_t max_size = 10; + char* dummy = (char *) ecalloc(max_size, sizeof(char)); + snprintf(dummy, max_size, "%d", gpu->topo->tensor_cores); + return dummy; +} + diff --git a/src/cuda/cuda.hpp b/src/cuda/cuda.hpp index 057552a..a132675 100644 --- a/src/cuda/cuda.hpp +++ b/src/cuda/cuda.hpp @@ -8,5 +8,6 @@ int print_gpus_list(); char* get_str_sm(struct gpu_info* gpu); char* get_str_cores_sm(struct gpu_info* gpu); char* get_str_cuda_cores(struct gpu_info* gpu); +char* get_str_tensor_cores(struct gpu_info* gpu); #endif