From 844377f17afbba5c5fec22da59b265c80588259a Mon Sep 17 00:00:00 2001
From: Dr-Noob <peibolms@gmail.com>
Date: Wed, 8 Dec 2021 11:15:59 +0100
Subject: [PATCH] [v0.11] Add support for printing EUs (currently only in
 Gen9/Gen9.5)

---
 src/common/gpu.cpp     |  7 +++++++
 src/common/gpu.hpp     |  8 ++++++++
 src/common/printer.cpp |  6 ++++++
 src/cuda/cuda.cpp      |  8 --------
 src/intel/intel.cpp    |  5 +++++
 src/intel/intel.hpp    |  1 +
 src/intel/uarch.cpp    | 33 +++++++++++++++++++++++++++++++++
 src/intel/uarch.hpp    |  1 +
 8 files changed, 61 insertions(+), 8 deletions(-)

diff --git a/src/common/gpu.cpp b/src/common/gpu.cpp
index ed00694..3d2feee 100644
--- a/src/common/gpu.cpp
+++ b/src/common/gpu.cpp
@@ -148,3 +148,10 @@ char* get_str_peak_performance_tensor(struct gpu_info* gpu) {
   return get_str_peak_performance_generic(gpu->peak_performance_t);
 }
 
+char* get_str_generic(int32_t data) {
+  // Largest int is 10, +1 for possible negative, +1 for EOL
+  uint32_t max_size = 12;
+  char* dummy = (char *) ecalloc(max_size, sizeof(char));
+  snprintf(dummy, max_size, "%d", data);
+  return dummy;
+}
diff --git a/src/common/gpu.hpp b/src/common/gpu.hpp
index dc08a3a..60fcbc8 100644
--- a/src/common/gpu.hpp
+++ b/src/common/gpu.hpp
@@ -44,6 +44,12 @@ struct topology {
   int32_t tensor_cores;
 };
 
+struct topology_i {
+  int32_t slices;
+  int32_t subslices;
+  int32_t eu_subslice;
+};
+
 struct memory {
   int64_t size_bytes;
   MEMTYPE type;
@@ -59,6 +65,7 @@ struct gpu_info {
   int64_t freq;
   struct pci* pci;
   struct topology* topo;
+  struct topology_i* topo_i;
   struct memory* mem;
   struct cache* cach;
   int64_t peak_performance;
@@ -76,5 +83,6 @@ char* get_str_memory_clock(struct gpu_info* gpu);
 char* get_str_l2(struct gpu_info* gpu);
 char* get_str_peak_performance(struct gpu_info* gpu);
 char* get_str_peak_performance_tensor(struct gpu_info* gpu);
+char* get_str_generic(int32_t data);
 
 #endif
diff --git a/src/common/printer.cpp b/src/common/printer.cpp
index cf21f10..a5650c0 100644
--- a/src/common/printer.cpp
+++ b/src/common/printer.cpp
@@ -10,6 +10,7 @@
 #include "../common/gpu.hpp"
 
 #include "../intel/uarch.hpp"
+#include "../intel/intel.hpp"
 #include "../cuda/cuda.hpp"
 #include "../cuda/uarch.hpp"
 
@@ -41,6 +42,7 @@ enum {
   ATTRIBUTE_CORESPERMP,
   ATTRIBUTE_CUDA_CORES,
   ATTRIBUTE_TENSOR_CORES,
+  ATTRIBUTE_EUS,
   ATTRIBUTE_L2,
   ATTRIBUTE_MEMORY,
   ATTRIBUTE_MEMORY_FREQ,
@@ -60,6 +62,7 @@ static const char* ATTRIBUTE_FIELDS [] = {
   "Cores/SM:",
   "CUDA Cores:",
   "Tensor Cores:",
+  "Execution Units:",
   "L2 Size:",
   "Memory:",
   "Memory frequency:",
@@ -79,6 +82,7 @@ static const char* ATTRIBUTE_FIELDS_SHORT [] = {
   "Cores/SM:",
   "CUDA Cores:",
   "Tensor Cores:",
+  "EUs:",
   "L2 Size:",
   "Memory:",
   "Memory freq.:",
@@ -366,11 +370,13 @@ bool print_gpufetch_intel(struct gpu_info* gpu, STYLE s, struct color** cs, stru
   char* uarch = get_str_uarch_intel(gpu->arch);
   char* gt = get_str_gt(gpu->arch);
   char* manufacturing_process = get_str_process(gpu->arch);
+  char* eus = get_str_eu(gpu);
 
   setAttribute(art, ATTRIBUTE_NAME, gpu_name);
   setAttribute(art, ATTRIBUTE_UARCH, uarch);
   setAttribute(art, ATTRIBUTE_TECHNOLOGY, manufacturing_process);
   setAttribute(art, ATTRIBUTE_GT, gt);
+  setAttribute(art, ATTRIBUTE_EUS, eus);
 
   const char** attribute_fields = ATTRIBUTE_FIELDS;
   uint32_t longest_attribute = longest_attribute_length(art, attribute_fields);
diff --git a/src/cuda/cuda.cpp b/src/cuda/cuda.cpp
index 6554dfc..1a70c59 100644
--- a/src/cuda/cuda.cpp
+++ b/src/cuda/cuda.cpp
@@ -144,14 +144,6 @@ struct gpu_info* get_gpu_info_cuda(int gpu_idx) {
   return gpu;
 }
 
-char* get_str_generic(int32_t data) {
-  // Largest int is 10, +1 for possible negative, +1 for EOL
-  uint32_t max_size = 12;
-  char* dummy = (char *) ecalloc(max_size, sizeof(char));
-  snprintf(dummy, max_size, "%d", data);
-  return dummy;
-}
-
 char* get_str_sm(struct gpu_info* gpu) {
   return get_str_generic(gpu->topo->streaming_mp);
 }
diff --git a/src/intel/intel.cpp b/src/intel/intel.cpp
index 93df3cf..1ea5bb3 100644
--- a/src/intel/intel.cpp
+++ b/src/intel/intel.cpp
@@ -15,6 +15,7 @@ struct gpu_info* get_gpu_info_intel() {
   gpu->pci = get_pci_from_pciutils(devices, PCI_VENDOR_ID_INTEL);
   gpu->arch = get_uarch_from_pci(gpu->pci);
   gpu->name = get_name_from_uarch(gpu->arch);
+  gpu->topo_i = get_topology_info(gpu->arch);
 
   return gpu;
 }
@@ -26,3 +27,7 @@ bool print_gpu_intel(struct gpu_info* gpu) {
 
   return true;
 }
+
+char* get_str_eu(struct gpu_info* gpu) {
+  return get_str_generic(gpu->topo_i->subslices * gpu->topo_i->eu_subslice);
+}
diff --git a/src/intel/intel.hpp b/src/intel/intel.hpp
index 94ea86c..dc2ea72 100644
--- a/src/intel/intel.hpp
+++ b/src/intel/intel.hpp
@@ -5,5 +5,6 @@
 
 struct gpu_info* get_gpu_info_intel();
 bool print_gpu_intel(struct gpu_info* gpu);
+char* get_str_eu(struct gpu_info* gpu);
 
 #endif
diff --git a/src/intel/uarch.cpp b/src/intel/uarch.cpp
index 6e9bcdd..5300b5c 100644
--- a/src/intel/uarch.cpp
+++ b/src/intel/uarch.cpp
@@ -61,6 +61,17 @@ static const char *gt_str[] = {
    else if (arch->chip == chip_) fill_uarch(arch, str, uarch, gt, process);
 #define CHECK_UARCH_END else { printBug("map_chip_to_uarch: Unknown chip id: %d", arch->chip); fill_uarch(arch, STRING_UNKNOWN, UARCH_UNKNOWN, GT_UNKNOWN, 0); }
 
+#define CHECK_TOPO_START if (false) {}
+#define CHECK_TOPO(topo, arch, uarch_, gt_, eu_sub, sub, sli) \
+  else if(arch->uarch == uarch_ && arch->gt == gt_) fill_topo(topo, eu_sub, sub, sli);
+#define CHECK_TOPO_END else { printBug("TODOO"); fill_topo(topo, -1, -1, -1); }
+
+void fill_topo(struct topology_i* topo_i, int32_t eu_sub, int32_t sub, int32_t sli) {
+  topo_i->slices = sli;
+  topo_i->subslices = sub;
+  topo_i->eu_subslice = eu_sub;
+}
+
 void fill_uarch(struct uarch* arch, char const *str, MICROARCH u, int32_t gt, uint32_t process) {
   arch->chip_str = (char *) emalloc(sizeof(char) * (strlen(str)+1));
   strcpy(arch->chip_str, str);
@@ -138,3 +149,25 @@ char* get_name_from_uarch(struct uarch* arch) {
   sprintf(name, "Intel %s", arch->chip_str);
   return name;
 }
+
+/*
+ * https://en.wikichip.org/wiki/intel/microarchitectures/gen9#Configuration
+ */
+struct topology_i* get_topology_info(struct uarch* arch) {
+  struct topology_i* topo = (struct topology_i*) emalloc(sizeof(struct topology_i));
+
+  // Syntax: (EU per subslice, Subslices, Slices)
+  CHECK_TOPO_START
+  // Gen9
+  CHECK_TOPO(topo, arch, UARCH_GEN9,   GT1,  6, 2, 1)
+  CHECK_TOPO(topo, arch, UARCH_GEN9,   GT2,  8, 3, 1)
+  CHECK_TOPO(topo, arch, UARCH_GEN9,   GT3,  8, 6, 2)
+  CHECK_TOPO(topo, arch, UARCH_GEN9,   GT4e, 8, 9, 3)
+  // Gen9.5
+  CHECK_TOPO(topo, arch, UARCH_GEN9_5, GT1,  6, 2, 1)
+  CHECK_TOPO(topo, arch, UARCH_GEN9_5, GT2,  8, 3, 1)
+  CHECK_TOPO(topo, arch, UARCH_GEN9_5, GT3,  8, 6, 2)
+  CHECK_TOPO_END
+
+  return topo;
+}
diff --git a/src/intel/uarch.hpp b/src/intel/uarch.hpp
index 2c287e3..2947988 100644
--- a/src/intel/uarch.hpp
+++ b/src/intel/uarch.hpp
@@ -9,5 +9,6 @@ struct uarch* get_uarch_from_pci(struct pci* pci);
 char* get_name_from_uarch(struct uarch* arch);
 char* get_str_gt(struct uarch* arch);
 char* get_str_uarch_intel(struct uarch* arch);
+struct topology_i* get_topology_info(struct uarch* arch);
 
 #endif