[v0.11] Add support for printing EUs (currently only in Gen9/Gen9.5)
This commit is contained in:
@@ -148,3 +148,10 @@ char* get_str_peak_performance_tensor(struct gpu_info* gpu) {
|
|||||||
return get_str_peak_performance_generic(gpu->peak_performance_t);
|
return get_str_peak_performance_generic(gpu->peak_performance_t);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
char* get_str_generic(int32_t data) {
|
||||||
|
// Largest int is 10, +1 for possible negative, +1 for EOL
|
||||||
|
uint32_t max_size = 12;
|
||||||
|
char* dummy = (char *) ecalloc(max_size, sizeof(char));
|
||||||
|
snprintf(dummy, max_size, "%d", data);
|
||||||
|
return dummy;
|
||||||
|
}
|
||||||
|
|||||||
@@ -44,6 +44,12 @@ struct topology {
|
|||||||
int32_t tensor_cores;
|
int32_t tensor_cores;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct topology_i {
|
||||||
|
int32_t slices;
|
||||||
|
int32_t subslices;
|
||||||
|
int32_t eu_subslice;
|
||||||
|
};
|
||||||
|
|
||||||
struct memory {
|
struct memory {
|
||||||
int64_t size_bytes;
|
int64_t size_bytes;
|
||||||
MEMTYPE type;
|
MEMTYPE type;
|
||||||
@@ -59,6 +65,7 @@ struct gpu_info {
|
|||||||
int64_t freq;
|
int64_t freq;
|
||||||
struct pci* pci;
|
struct pci* pci;
|
||||||
struct topology* topo;
|
struct topology* topo;
|
||||||
|
struct topology_i* topo_i;
|
||||||
struct memory* mem;
|
struct memory* mem;
|
||||||
struct cache* cach;
|
struct cache* cach;
|
||||||
int64_t peak_performance;
|
int64_t peak_performance;
|
||||||
@@ -76,5 +83,6 @@ char* get_str_memory_clock(struct gpu_info* gpu);
|
|||||||
char* get_str_l2(struct gpu_info* gpu);
|
char* get_str_l2(struct gpu_info* gpu);
|
||||||
char* get_str_peak_performance(struct gpu_info* gpu);
|
char* get_str_peak_performance(struct gpu_info* gpu);
|
||||||
char* get_str_peak_performance_tensor(struct gpu_info* gpu);
|
char* get_str_peak_performance_tensor(struct gpu_info* gpu);
|
||||||
|
char* get_str_generic(int32_t data);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -10,6 +10,7 @@
|
|||||||
#include "../common/gpu.hpp"
|
#include "../common/gpu.hpp"
|
||||||
|
|
||||||
#include "../intel/uarch.hpp"
|
#include "../intel/uarch.hpp"
|
||||||
|
#include "../intel/intel.hpp"
|
||||||
#include "../cuda/cuda.hpp"
|
#include "../cuda/cuda.hpp"
|
||||||
#include "../cuda/uarch.hpp"
|
#include "../cuda/uarch.hpp"
|
||||||
|
|
||||||
@@ -41,6 +42,7 @@ enum {
|
|||||||
ATTRIBUTE_CORESPERMP,
|
ATTRIBUTE_CORESPERMP,
|
||||||
ATTRIBUTE_CUDA_CORES,
|
ATTRIBUTE_CUDA_CORES,
|
||||||
ATTRIBUTE_TENSOR_CORES,
|
ATTRIBUTE_TENSOR_CORES,
|
||||||
|
ATTRIBUTE_EUS,
|
||||||
ATTRIBUTE_L2,
|
ATTRIBUTE_L2,
|
||||||
ATTRIBUTE_MEMORY,
|
ATTRIBUTE_MEMORY,
|
||||||
ATTRIBUTE_MEMORY_FREQ,
|
ATTRIBUTE_MEMORY_FREQ,
|
||||||
@@ -60,6 +62,7 @@ static const char* ATTRIBUTE_FIELDS [] = {
|
|||||||
"Cores/SM:",
|
"Cores/SM:",
|
||||||
"CUDA Cores:",
|
"CUDA Cores:",
|
||||||
"Tensor Cores:",
|
"Tensor Cores:",
|
||||||
|
"Execution Units:",
|
||||||
"L2 Size:",
|
"L2 Size:",
|
||||||
"Memory:",
|
"Memory:",
|
||||||
"Memory frequency:",
|
"Memory frequency:",
|
||||||
@@ -79,6 +82,7 @@ static const char* ATTRIBUTE_FIELDS_SHORT [] = {
|
|||||||
"Cores/SM:",
|
"Cores/SM:",
|
||||||
"CUDA Cores:",
|
"CUDA Cores:",
|
||||||
"Tensor Cores:",
|
"Tensor Cores:",
|
||||||
|
"EUs:",
|
||||||
"L2 Size:",
|
"L2 Size:",
|
||||||
"Memory:",
|
"Memory:",
|
||||||
"Memory freq.:",
|
"Memory freq.:",
|
||||||
@@ -366,11 +370,13 @@ bool print_gpufetch_intel(struct gpu_info* gpu, STYLE s, struct color** cs, stru
|
|||||||
char* uarch = get_str_uarch_intel(gpu->arch);
|
char* uarch = get_str_uarch_intel(gpu->arch);
|
||||||
char* gt = get_str_gt(gpu->arch);
|
char* gt = get_str_gt(gpu->arch);
|
||||||
char* manufacturing_process = get_str_process(gpu->arch);
|
char* manufacturing_process = get_str_process(gpu->arch);
|
||||||
|
char* eus = get_str_eu(gpu);
|
||||||
|
|
||||||
setAttribute(art, ATTRIBUTE_NAME, gpu_name);
|
setAttribute(art, ATTRIBUTE_NAME, gpu_name);
|
||||||
setAttribute(art, ATTRIBUTE_UARCH, uarch);
|
setAttribute(art, ATTRIBUTE_UARCH, uarch);
|
||||||
setAttribute(art, ATTRIBUTE_TECHNOLOGY, manufacturing_process);
|
setAttribute(art, ATTRIBUTE_TECHNOLOGY, manufacturing_process);
|
||||||
setAttribute(art, ATTRIBUTE_GT, gt);
|
setAttribute(art, ATTRIBUTE_GT, gt);
|
||||||
|
setAttribute(art, ATTRIBUTE_EUS, eus);
|
||||||
|
|
||||||
const char** attribute_fields = ATTRIBUTE_FIELDS;
|
const char** attribute_fields = ATTRIBUTE_FIELDS;
|
||||||
uint32_t longest_attribute = longest_attribute_length(art, attribute_fields);
|
uint32_t longest_attribute = longest_attribute_length(art, attribute_fields);
|
||||||
|
|||||||
@@ -144,14 +144,6 @@ struct gpu_info* get_gpu_info_cuda(int gpu_idx) {
|
|||||||
return gpu;
|
return gpu;
|
||||||
}
|
}
|
||||||
|
|
||||||
char* get_str_generic(int32_t data) {
|
|
||||||
// Largest int is 10, +1 for possible negative, +1 for EOL
|
|
||||||
uint32_t max_size = 12;
|
|
||||||
char* dummy = (char *) ecalloc(max_size, sizeof(char));
|
|
||||||
snprintf(dummy, max_size, "%d", data);
|
|
||||||
return dummy;
|
|
||||||
}
|
|
||||||
|
|
||||||
char* get_str_sm(struct gpu_info* gpu) {
|
char* get_str_sm(struct gpu_info* gpu) {
|
||||||
return get_str_generic(gpu->topo->streaming_mp);
|
return get_str_generic(gpu->topo->streaming_mp);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -15,6 +15,7 @@ struct gpu_info* get_gpu_info_intel() {
|
|||||||
gpu->pci = get_pci_from_pciutils(devices, PCI_VENDOR_ID_INTEL);
|
gpu->pci = get_pci_from_pciutils(devices, PCI_VENDOR_ID_INTEL);
|
||||||
gpu->arch = get_uarch_from_pci(gpu->pci);
|
gpu->arch = get_uarch_from_pci(gpu->pci);
|
||||||
gpu->name = get_name_from_uarch(gpu->arch);
|
gpu->name = get_name_from_uarch(gpu->arch);
|
||||||
|
gpu->topo_i = get_topology_info(gpu->arch);
|
||||||
|
|
||||||
return gpu;
|
return gpu;
|
||||||
}
|
}
|
||||||
@@ -26,3 +27,7 @@ bool print_gpu_intel(struct gpu_info* gpu) {
|
|||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
char* get_str_eu(struct gpu_info* gpu) {
|
||||||
|
return get_str_generic(gpu->topo_i->subslices * gpu->topo_i->eu_subslice);
|
||||||
|
}
|
||||||
|
|||||||
@@ -5,5 +5,6 @@
|
|||||||
|
|
||||||
struct gpu_info* get_gpu_info_intel();
|
struct gpu_info* get_gpu_info_intel();
|
||||||
bool print_gpu_intel(struct gpu_info* gpu);
|
bool print_gpu_intel(struct gpu_info* gpu);
|
||||||
|
char* get_str_eu(struct gpu_info* gpu);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -61,6 +61,17 @@ static const char *gt_str[] = {
|
|||||||
else if (arch->chip == chip_) fill_uarch(arch, str, uarch, gt, process);
|
else if (arch->chip == chip_) fill_uarch(arch, str, uarch, gt, process);
|
||||||
#define CHECK_UARCH_END else { printBug("map_chip_to_uarch: Unknown chip id: %d", arch->chip); fill_uarch(arch, STRING_UNKNOWN, UARCH_UNKNOWN, GT_UNKNOWN, 0); }
|
#define CHECK_UARCH_END else { printBug("map_chip_to_uarch: Unknown chip id: %d", arch->chip); fill_uarch(arch, STRING_UNKNOWN, UARCH_UNKNOWN, GT_UNKNOWN, 0); }
|
||||||
|
|
||||||
|
#define CHECK_TOPO_START if (false) {}
|
||||||
|
#define CHECK_TOPO(topo, arch, uarch_, gt_, eu_sub, sub, sli) \
|
||||||
|
else if(arch->uarch == uarch_ && arch->gt == gt_) fill_topo(topo, eu_sub, sub, sli);
|
||||||
|
#define CHECK_TOPO_END else { printBug("TODOO"); fill_topo(topo, -1, -1, -1); }
|
||||||
|
|
||||||
|
void fill_topo(struct topology_i* topo_i, int32_t eu_sub, int32_t sub, int32_t sli) {
|
||||||
|
topo_i->slices = sli;
|
||||||
|
topo_i->subslices = sub;
|
||||||
|
topo_i->eu_subslice = eu_sub;
|
||||||
|
}
|
||||||
|
|
||||||
void fill_uarch(struct uarch* arch, char const *str, MICROARCH u, int32_t gt, uint32_t process) {
|
void fill_uarch(struct uarch* arch, char const *str, MICROARCH u, int32_t gt, uint32_t process) {
|
||||||
arch->chip_str = (char *) emalloc(sizeof(char) * (strlen(str)+1));
|
arch->chip_str = (char *) emalloc(sizeof(char) * (strlen(str)+1));
|
||||||
strcpy(arch->chip_str, str);
|
strcpy(arch->chip_str, str);
|
||||||
@@ -138,3 +149,25 @@ char* get_name_from_uarch(struct uarch* arch) {
|
|||||||
sprintf(name, "Intel %s", arch->chip_str);
|
sprintf(name, "Intel %s", arch->chip_str);
|
||||||
return name;
|
return name;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* https://en.wikichip.org/wiki/intel/microarchitectures/gen9#Configuration
|
||||||
|
*/
|
||||||
|
struct topology_i* get_topology_info(struct uarch* arch) {
|
||||||
|
struct topology_i* topo = (struct topology_i*) emalloc(sizeof(struct topology_i));
|
||||||
|
|
||||||
|
// Syntax: (EU per subslice, Subslices, Slices)
|
||||||
|
CHECK_TOPO_START
|
||||||
|
// Gen9
|
||||||
|
CHECK_TOPO(topo, arch, UARCH_GEN9, GT1, 6, 2, 1)
|
||||||
|
CHECK_TOPO(topo, arch, UARCH_GEN9, GT2, 8, 3, 1)
|
||||||
|
CHECK_TOPO(topo, arch, UARCH_GEN9, GT3, 8, 6, 2)
|
||||||
|
CHECK_TOPO(topo, arch, UARCH_GEN9, GT4e, 8, 9, 3)
|
||||||
|
// Gen9.5
|
||||||
|
CHECK_TOPO(topo, arch, UARCH_GEN9_5, GT1, 6, 2, 1)
|
||||||
|
CHECK_TOPO(topo, arch, UARCH_GEN9_5, GT2, 8, 3, 1)
|
||||||
|
CHECK_TOPO(topo, arch, UARCH_GEN9_5, GT3, 8, 6, 2)
|
||||||
|
CHECK_TOPO_END
|
||||||
|
|
||||||
|
return topo;
|
||||||
|
}
|
||||||
|
|||||||
@@ -9,5 +9,6 @@ struct uarch* get_uarch_from_pci(struct pci* pci);
|
|||||||
char* get_name_from_uarch(struct uarch* arch);
|
char* get_name_from_uarch(struct uarch* arch);
|
||||||
char* get_str_gt(struct uarch* arch);
|
char* get_str_gt(struct uarch* arch);
|
||||||
char* get_str_uarch_intel(struct uarch* arch);
|
char* get_str_uarch_intel(struct uarch* arch);
|
||||||
|
struct topology_i* get_topology_info(struct uarch* arch);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
Reference in New Issue
Block a user