[v0.11] Add peak performance with tensor cores to the output

This commit is contained in:
Dr-Noob
2021-11-23 18:49:34 +01:00
parent 8bf0276aae
commit 32b2c59b50
5 changed files with 34 additions and 8 deletions

View File

@@ -116,17 +116,17 @@ char* get_str_l2(struct gpu_info* gpu) {
return string; return string;
} }
char* get_str_peak_performance(struct gpu_info* gpu) { char* get_str_peak_performance_generic(int64_t pp) {
char* str; char* str;
if(gpu->peak_performance == -1) { if(pp == -1) {
str = (char *) emalloc(sizeof(char) * (strlen(STRING_UNKNOWN) + 1)); str = (char *) emalloc(sizeof(char) * (strlen(STRING_UNKNOWN) + 1));
strncpy(str, STRING_UNKNOWN, strlen(STRING_UNKNOWN) + 1); strncpy(str, STRING_UNKNOWN, strlen(STRING_UNKNOWN) + 1);
return str; return str;
} }
// 7 for digits (e.g, XXXX.XX), 7 for XFLOP/s // 7 for digits (e.g, XXXX.XX), 7 for XFLOP/s
double flopsd = (double) gpu->peak_performance; double flopsd = (double) pp;
uint32_t max_size = 7+1+7+1; uint32_t max_size = 7+1+7+1;
str = (char *) ecalloc(max_size, sizeof(char)); str = (char *) ecalloc(max_size, sizeof(char));
@@ -139,3 +139,12 @@ char* get_str_peak_performance(struct gpu_info* gpu) {
return str; return str;
} }
char* get_str_peak_performance(struct gpu_info* gpu) {
return get_str_peak_performance_generic(gpu->peak_performance);
}
char* get_str_peak_performance_tensor(struct gpu_info* gpu) {
return get_str_peak_performance_generic(gpu->peak_performance_t);
}

View File

@@ -61,6 +61,7 @@ struct gpu_info {
struct memory* mem; struct memory* mem;
struct cache* cach; struct cache* cach;
int64_t peak_performance; int64_t peak_performance;
int64_t peak_performance_t;
int32_t idx; int32_t idx;
}; };
@@ -73,5 +74,6 @@ char* get_str_bus_width(struct gpu_info* gpu);
char* get_str_memory_clock(struct gpu_info* gpu); char* get_str_memory_clock(struct gpu_info* gpu);
char* get_str_l2(struct gpu_info* gpu); char* get_str_l2(struct gpu_info* gpu);
char* get_str_peak_performance(struct gpu_info* gpu); char* get_str_peak_performance(struct gpu_info* gpu);
char* get_str_peak_performance_tensor(struct gpu_info* gpu);
#endif #endif

View File

@@ -7,7 +7,7 @@
#include "../cuda/cuda.hpp" #include "../cuda/cuda.hpp"
#include "../cuda/uarch.hpp" #include "../cuda/uarch.hpp"
static const char* VERSION = "0.10"; static const char* VERSION = "0.11";
void print_help(char *argv[]) { void print_help(char *argv[]) {
const char **t = args_str; const char **t = args_str;

View File

@@ -43,7 +43,8 @@ enum {
ATTRIBUTE_MEMORY, ATTRIBUTE_MEMORY,
ATTRIBUTE_MEMORY_FREQ, ATTRIBUTE_MEMORY_FREQ,
ATTRIBUTE_BUS_WIDTH, ATTRIBUTE_BUS_WIDTH,
ATTRIBUTE_PEAK ATTRIBUTE_PEAK,
ATTRIBUTE_PEAK_TENSOR,
}; };
static const char* ATTRIBUTE_FIELDS [] = { static const char* ATTRIBUTE_FIELDS [] = {
@@ -54,13 +55,14 @@ static const char* ATTRIBUTE_FIELDS [] = {
"Max Frequency:", "Max Frequency:",
"SMs:", "SMs:",
"Cores/SM:", "Cores/SM:",
"CUDA cores:", "CUDA Cores:",
"Tensor cores:", "Tensor Cores:",
"L2 Size:", "L2 Size:",
"Memory:", "Memory:",
"Memory frequency:", "Memory frequency:",
"Bus width:", "Bus width:",
"Peak Performance:", "Peak Performance:",
"Peak Performance (TC):",
}; };
static const char* ATTRIBUTE_FIELDS_SHORT [] = { static const char* ATTRIBUTE_FIELDS_SHORT [] = {
@@ -71,12 +73,14 @@ static const char* ATTRIBUTE_FIELDS_SHORT [] = {
"Max Freq.:", "Max Freq.:",
"SMs:", "SMs:",
"Cores/SM:", "Cores/SM:",
"CUDA cores:", "CUDA Cores:",
"Tensor Cores:",
"L2 Size:", "L2 Size:",
"Memory:", "Memory:",
"Memory freq.:", "Memory freq.:",
"Bus width:", "Bus width:",
"Peak Perf.:", "Peak Perf.:",
"Peak Perf.(TC):",
}; };
struct terminal { struct terminal {
@@ -360,6 +364,7 @@ bool print_gpufetch_cuda(struct gpu_info* gpu, STYLE s, struct color** cs, struc
char* mem_freq = get_str_memory_clock(gpu); char* mem_freq = get_str_memory_clock(gpu);
char* bus_width = get_str_bus_width(gpu); char* bus_width = get_str_bus_width(gpu);
char* pp = get_str_peak_performance(gpu); char* pp = get_str_peak_performance(gpu);
char* pp_tensor = get_str_peak_performance_tensor(gpu);
char* mem = (char *) emalloc(sizeof(char) * (strlen(mem_size) + strlen(mem_type) + 2)); char* mem = (char *) emalloc(sizeof(char) * (strlen(mem_size) + strlen(mem_type) + 2));
sprintf(mem, "%s %s", mem_size, mem_type); sprintf(mem, "%s %s", mem_size, mem_type);
@@ -383,6 +388,9 @@ bool print_gpufetch_cuda(struct gpu_info* gpu, STYLE s, struct color** cs, struc
setAttribute(art, ATTRIBUTE_BUS_WIDTH, bus_width); setAttribute(art, ATTRIBUTE_BUS_WIDTH, bus_width);
setAttribute(art, ATTRIBUTE_L2, l2); setAttribute(art, ATTRIBUTE_L2, l2);
setAttribute(art, ATTRIBUTE_PEAK, pp); setAttribute(art, ATTRIBUTE_PEAK, pp);
if(gpu->topo->tensor_cores >= 0) {
setAttribute(art, ATTRIBUTE_PEAK_TENSOR, pp_tensor);
}
const char** attribute_fields = ATTRIBUTE_FIELDS; const char** attribute_fields = ATTRIBUTE_FIELDS;
uint32_t longest_attribute = longest_attribute_length(art, attribute_fields); uint32_t longest_attribute = longest_attribute_length(art, attribute_fields);

View File

@@ -103,10 +103,16 @@ struct memory* get_memory_info(struct gpu_info* gpu, cudaDeviceProp prop) {
return mem; return mem;
} }
// Compute peak performance when using CUDA cores
int64_t get_peak_performance(struct gpu_info* gpu) { int64_t get_peak_performance(struct gpu_info* gpu) {
return gpu->freq * 1000000 * gpu->topo->cuda_cores * 2; return gpu->freq * 1000000 * gpu->topo->cuda_cores * 2;
} }
// Compute peak performance when using tensor cores
int64_t get_peak_performance_t(struct gpu_info* gpu) {
return gpu->freq * 1000000 * 4 * 4 * 8 * gpu->topo->tensor_cores;
}
struct gpu_info* get_gpu_info(int gpu_idx) { struct gpu_info* get_gpu_info(int gpu_idx) {
struct gpu_info* gpu = (struct gpu_info*) emalloc(sizeof(struct gpu_info)); struct gpu_info* gpu = (struct gpu_info*) emalloc(sizeof(struct gpu_info));
gpu->pci = NULL; gpu->pci = NULL;
@@ -156,6 +162,7 @@ struct gpu_info* get_gpu_info(int gpu_idx) {
gpu->mem = get_memory_info(gpu, deviceProp); gpu->mem = get_memory_info(gpu, deviceProp);
gpu->topo = get_topology_info(deviceProp); gpu->topo = get_topology_info(deviceProp);
gpu->peak_performance = get_peak_performance(gpu); gpu->peak_performance = get_peak_performance(gpu);
gpu->peak_performance_t = get_peak_performance_t(gpu);
return gpu; return gpu;
} }