From 844377f17afbba5c5fec22da59b265c80588259a Mon Sep 17 00:00:00 2001 From: Dr-Noob Date: Wed, 8 Dec 2021 11:15:59 +0100 Subject: [PATCH] [v0.11] Add support for printing EUs (currently only in Gen9/Gen9.5) --- src/common/gpu.cpp | 7 +++++++ src/common/gpu.hpp | 8 ++++++++ src/common/printer.cpp | 6 ++++++ src/cuda/cuda.cpp | 8 -------- src/intel/intel.cpp | 5 +++++ src/intel/intel.hpp | 1 + src/intel/uarch.cpp | 33 +++++++++++++++++++++++++++++++++ src/intel/uarch.hpp | 1 + 8 files changed, 61 insertions(+), 8 deletions(-) diff --git a/src/common/gpu.cpp b/src/common/gpu.cpp index ed00694..3d2feee 100644 --- a/src/common/gpu.cpp +++ b/src/common/gpu.cpp @@ -148,3 +148,10 @@ char* get_str_peak_performance_tensor(struct gpu_info* gpu) { return get_str_peak_performance_generic(gpu->peak_performance_t); } +char* get_str_generic(int32_t data) { + // Largest int is 10, +1 for possible negative, +1 for EOL + uint32_t max_size = 12; + char* dummy = (char *) ecalloc(max_size, sizeof(char)); + snprintf(dummy, max_size, "%d", data); + return dummy; +} diff --git a/src/common/gpu.hpp b/src/common/gpu.hpp index dc08a3a..60fcbc8 100644 --- a/src/common/gpu.hpp +++ b/src/common/gpu.hpp @@ -44,6 +44,12 @@ struct topology { int32_t tensor_cores; }; +struct topology_i { + int32_t slices; + int32_t subslices; + int32_t eu_subslice; +}; + struct memory { int64_t size_bytes; MEMTYPE type; @@ -59,6 +65,7 @@ struct gpu_info { int64_t freq; struct pci* pci; struct topology* topo; + struct topology_i* topo_i; struct memory* mem; struct cache* cach; int64_t peak_performance; @@ -76,5 +83,6 @@ char* get_str_memory_clock(struct gpu_info* gpu); char* get_str_l2(struct gpu_info* gpu); char* get_str_peak_performance(struct gpu_info* gpu); char* get_str_peak_performance_tensor(struct gpu_info* gpu); +char* get_str_generic(int32_t data); #endif diff --git a/src/common/printer.cpp b/src/common/printer.cpp index cf21f10..a5650c0 100644 --- a/src/common/printer.cpp +++ b/src/common/printer.cpp @@ -10,6 +10,7 @@ #include "../common/gpu.hpp" #include "../intel/uarch.hpp" +#include "../intel/intel.hpp" #include "../cuda/cuda.hpp" #include "../cuda/uarch.hpp" @@ -41,6 +42,7 @@ enum { ATTRIBUTE_CORESPERMP, ATTRIBUTE_CUDA_CORES, ATTRIBUTE_TENSOR_CORES, + ATTRIBUTE_EUS, ATTRIBUTE_L2, ATTRIBUTE_MEMORY, ATTRIBUTE_MEMORY_FREQ, @@ -60,6 +62,7 @@ static const char* ATTRIBUTE_FIELDS [] = { "Cores/SM:", "CUDA Cores:", "Tensor Cores:", + "Execution Units:", "L2 Size:", "Memory:", "Memory frequency:", @@ -79,6 +82,7 @@ static const char* ATTRIBUTE_FIELDS_SHORT [] = { "Cores/SM:", "CUDA Cores:", "Tensor Cores:", + "EUs:", "L2 Size:", "Memory:", "Memory freq.:", @@ -366,11 +370,13 @@ bool print_gpufetch_intel(struct gpu_info* gpu, STYLE s, struct color** cs, stru char* uarch = get_str_uarch_intel(gpu->arch); char* gt = get_str_gt(gpu->arch); char* manufacturing_process = get_str_process(gpu->arch); + char* eus = get_str_eu(gpu); setAttribute(art, ATTRIBUTE_NAME, gpu_name); setAttribute(art, ATTRIBUTE_UARCH, uarch); setAttribute(art, ATTRIBUTE_TECHNOLOGY, manufacturing_process); setAttribute(art, ATTRIBUTE_GT, gt); + setAttribute(art, ATTRIBUTE_EUS, eus); const char** attribute_fields = ATTRIBUTE_FIELDS; uint32_t longest_attribute = longest_attribute_length(art, attribute_fields); diff --git a/src/cuda/cuda.cpp b/src/cuda/cuda.cpp index 6554dfc..1a70c59 100644 --- a/src/cuda/cuda.cpp +++ b/src/cuda/cuda.cpp @@ -144,14 +144,6 @@ struct gpu_info* get_gpu_info_cuda(int gpu_idx) { return gpu; } -char* get_str_generic(int32_t data) { - // Largest int is 10, +1 for possible negative, +1 for EOL - uint32_t max_size = 12; - char* dummy = (char *) ecalloc(max_size, sizeof(char)); - snprintf(dummy, max_size, "%d", data); - return dummy; -} - char* get_str_sm(struct gpu_info* gpu) { return get_str_generic(gpu->topo->streaming_mp); } diff --git a/src/intel/intel.cpp b/src/intel/intel.cpp index 93df3cf..1ea5bb3 100644 --- a/src/intel/intel.cpp +++ b/src/intel/intel.cpp @@ -15,6 +15,7 @@ struct gpu_info* get_gpu_info_intel() { gpu->pci = get_pci_from_pciutils(devices, PCI_VENDOR_ID_INTEL); gpu->arch = get_uarch_from_pci(gpu->pci); gpu->name = get_name_from_uarch(gpu->arch); + gpu->topo_i = get_topology_info(gpu->arch); return gpu; } @@ -26,3 +27,7 @@ bool print_gpu_intel(struct gpu_info* gpu) { return true; } + +char* get_str_eu(struct gpu_info* gpu) { + return get_str_generic(gpu->topo_i->subslices * gpu->topo_i->eu_subslice); +} diff --git a/src/intel/intel.hpp b/src/intel/intel.hpp index 94ea86c..dc2ea72 100644 --- a/src/intel/intel.hpp +++ b/src/intel/intel.hpp @@ -5,5 +5,6 @@ struct gpu_info* get_gpu_info_intel(); bool print_gpu_intel(struct gpu_info* gpu); +char* get_str_eu(struct gpu_info* gpu); #endif diff --git a/src/intel/uarch.cpp b/src/intel/uarch.cpp index 6e9bcdd..5300b5c 100644 --- a/src/intel/uarch.cpp +++ b/src/intel/uarch.cpp @@ -61,6 +61,17 @@ static const char *gt_str[] = { else if (arch->chip == chip_) fill_uarch(arch, str, uarch, gt, process); #define CHECK_UARCH_END else { printBug("map_chip_to_uarch: Unknown chip id: %d", arch->chip); fill_uarch(arch, STRING_UNKNOWN, UARCH_UNKNOWN, GT_UNKNOWN, 0); } +#define CHECK_TOPO_START if (false) {} +#define CHECK_TOPO(topo, arch, uarch_, gt_, eu_sub, sub, sli) \ + else if(arch->uarch == uarch_ && arch->gt == gt_) fill_topo(topo, eu_sub, sub, sli); +#define CHECK_TOPO_END else { printBug("TODOO"); fill_topo(topo, -1, -1, -1); } + +void fill_topo(struct topology_i* topo_i, int32_t eu_sub, int32_t sub, int32_t sli) { + topo_i->slices = sli; + topo_i->subslices = sub; + topo_i->eu_subslice = eu_sub; +} + void fill_uarch(struct uarch* arch, char const *str, MICROARCH u, int32_t gt, uint32_t process) { arch->chip_str = (char *) emalloc(sizeof(char) * (strlen(str)+1)); strcpy(arch->chip_str, str); @@ -138,3 +149,25 @@ char* get_name_from_uarch(struct uarch* arch) { sprintf(name, "Intel %s", arch->chip_str); return name; } + +/* + * https://en.wikichip.org/wiki/intel/microarchitectures/gen9#Configuration + */ +struct topology_i* get_topology_info(struct uarch* arch) { + struct topology_i* topo = (struct topology_i*) emalloc(sizeof(struct topology_i)); + + // Syntax: (EU per subslice, Subslices, Slices) + CHECK_TOPO_START + // Gen9 + CHECK_TOPO(topo, arch, UARCH_GEN9, GT1, 6, 2, 1) + CHECK_TOPO(topo, arch, UARCH_GEN9, GT2, 8, 3, 1) + CHECK_TOPO(topo, arch, UARCH_GEN9, GT3, 8, 6, 2) + CHECK_TOPO(topo, arch, UARCH_GEN9, GT4e, 8, 9, 3) + // Gen9.5 + CHECK_TOPO(topo, arch, UARCH_GEN9_5, GT1, 6, 2, 1) + CHECK_TOPO(topo, arch, UARCH_GEN9_5, GT2, 8, 3, 1) + CHECK_TOPO(topo, arch, UARCH_GEN9_5, GT3, 8, 6, 2) + CHECK_TOPO_END + + return topo; +} diff --git a/src/intel/uarch.hpp b/src/intel/uarch.hpp index 2c287e3..2947988 100644 --- a/src/intel/uarch.hpp +++ b/src/intel/uarch.hpp @@ -9,5 +9,6 @@ struct uarch* get_uarch_from_pci(struct pci* pci); char* get_name_from_uarch(struct uarch* arch); char* get_str_gt(struct uarch* arch); char* get_str_uarch_intel(struct uarch* arch); +struct topology_i* get_topology_info(struct uarch* arch); #endif