From 821b6e760ed87ba2aa9dbec4f89ce6ccdd607314 Mon Sep 17 00:00:00 2001
From: Dr-Noob <peibolms@gmail.com>
Date: Tue, 23 Nov 2021 18:09:13 +0100
Subject: [PATCH] [v0.10] Add support for displaying the number of tensor cores

---
 src/common/gpu.hpp     |  1 +
 src/common/printer.cpp |  6 ++++++
 src/cuda/cuda.cpp      | 14 ++++++++++++++
 src/cuda/cuda.hpp      |  1 +
 4 files changed, 22 insertions(+)

diff --git a/src/common/gpu.hpp b/src/common/gpu.hpp
index 329f1fd..9632ffb 100644
--- a/src/common/gpu.hpp
+++ b/src/common/gpu.hpp
@@ -40,6 +40,7 @@ struct topology {
   int32_t streaming_mp;
   int32_t cores_per_mp;
   int32_t cuda_cores;
+  int32_t tensor_cores;
 };
 
 struct memory {
diff --git a/src/common/printer.cpp b/src/common/printer.cpp
index bee156f..a84ebb8 100644
--- a/src/common/printer.cpp
+++ b/src/common/printer.cpp
@@ -38,6 +38,7 @@ enum {
   ATTRIBUTE_STREAMINGMP,
   ATTRIBUTE_CORESPERMP,
   ATTRIBUTE_CUDA_CORES,
+  ATTRIBUTE_TENSOR_CORES,
   ATTRIBUTE_L2,
   ATTRIBUTE_MEMORY,
   ATTRIBUTE_MEMORY_FREQ,
@@ -54,6 +55,7 @@ static const char* ATTRIBUTE_FIELDS [] = {
   "SMs:",
   "Cores/SM:",
   "CUDA cores:",
+  "Tensor cores:",
   "L2 Size:",
   "Memory:",
   "Memory frequency:",
@@ -350,6 +352,7 @@ bool print_gpufetch_cuda(struct gpu_info* gpu, STYLE s, struct color** cs, struc
   char* sms = get_str_sm(gpu);
   char* corespersm = get_str_cores_sm(gpu);
   char* cores = get_str_cuda_cores(gpu);
+  char* tensorc = get_str_tensor_cores(gpu);
   char* max_frequency = get_str_freq(gpu);
   char* l2 = get_str_l2(gpu);
   char* mem_size = get_str_memory_size(gpu);
@@ -372,6 +375,9 @@ bool print_gpufetch_cuda(struct gpu_info* gpu, STYLE s, struct color** cs, struc
   setAttribute(art, ATTRIBUTE_STREAMINGMP, sms);
   setAttribute(art, ATTRIBUTE_CORESPERMP, corespersm);
   setAttribute(art, ATTRIBUTE_CUDA_CORES, cores);
+  if(gpu->topo->tensor_cores >= 0) {
+    setAttribute(art, ATTRIBUTE_TENSOR_CORES, tensorc);
+  }
   setAttribute(art, ATTRIBUTE_MEMORY, mem);
   setAttribute(art, ATTRIBUTE_MEMORY_FREQ, mem_freq);
   setAttribute(art, ATTRIBUTE_BUS_WIDTH, bus_width);
diff --git a/src/cuda/cuda.cpp b/src/cuda/cuda.cpp
index 698e05a..b39b109 100644
--- a/src/cuda/cuda.cpp
+++ b/src/cuda/cuda.cpp
@@ -53,12 +53,19 @@ struct cache* get_cache_info(cudaDeviceProp prop) {
   return cach;
 }
 
+int get_tensor_cores(int sm, int major) {
+  if(major == 7) return sm * 8;
+  else if(major == 8) return sm * 4;
+  else return 0;
+}
+
 struct topology* get_topology_info(cudaDeviceProp prop) {
   struct topology* topo = (struct topology*) emalloc(sizeof(struct topology));
 
   topo->streaming_mp = prop.multiProcessorCount;
   topo->cores_per_mp = _ConvertSMVer2Cores(prop.major, prop.minor);
   topo->cuda_cores = topo->streaming_mp * topo->cores_per_mp;
+  topo->tensor_cores = get_tensor_cores(topo->streaming_mp, prop.major);
 
   return topo;
 }
@@ -174,3 +181,10 @@ char* get_str_cuda_cores(struct gpu_info* gpu) {
   return dummy;
 }
 
+char* get_str_tensor_cores(struct gpu_info* gpu) {
+  uint32_t max_size = 10;
+  char* dummy = (char *) ecalloc(max_size, sizeof(char));
+  snprintf(dummy, max_size, "%d", gpu->topo->tensor_cores);
+  return dummy;
+}
+
diff --git a/src/cuda/cuda.hpp b/src/cuda/cuda.hpp
index 057552a..a132675 100644
--- a/src/cuda/cuda.hpp
+++ b/src/cuda/cuda.hpp
@@ -8,5 +8,6 @@ int print_gpus_list();
 char* get_str_sm(struct gpu_info* gpu);
 char* get_str_cores_sm(struct gpu_info* gpu);
 char* get_str_cuda_cores(struct gpu_info* gpu);
+char* get_str_tensor_cores(struct gpu_info* gpu);
 
 #endif