[v0.11] Add peak performance with tensor cores to the output

This commit is contained in:
Dr-Noob
2021-11-23 18:49:34 +01:00
parent 8bf0276aae
commit 32b2c59b50
5 changed files with 34 additions and 8 deletions

View File

@@ -116,17 +116,17 @@ char* get_str_l2(struct gpu_info* gpu) {
return string;
}
char* get_str_peak_performance(struct gpu_info* gpu) {
char* get_str_peak_performance_generic(int64_t pp) {
char* str;
if(gpu->peak_performance == -1) {
if(pp == -1) {
str = (char *) emalloc(sizeof(char) * (strlen(STRING_UNKNOWN) + 1));
strncpy(str, STRING_UNKNOWN, strlen(STRING_UNKNOWN) + 1);
return str;
}
// 7 for digits (e.g, XXXX.XX), 7 for XFLOP/s
double flopsd = (double) gpu->peak_performance;
double flopsd = (double) pp;
uint32_t max_size = 7+1+7+1;
str = (char *) ecalloc(max_size, sizeof(char));
@@ -139,3 +139,12 @@ char* get_str_peak_performance(struct gpu_info* gpu) {
return str;
}
char* get_str_peak_performance(struct gpu_info* gpu) {
return get_str_peak_performance_generic(gpu->peak_performance);
}
char* get_str_peak_performance_tensor(struct gpu_info* gpu) {
return get_str_peak_performance_generic(gpu->peak_performance_t);
}

View File

@@ -61,6 +61,7 @@ struct gpu_info {
struct memory* mem;
struct cache* cach;
int64_t peak_performance;
int64_t peak_performance_t;
int32_t idx;
};
@@ -73,5 +74,6 @@ char* get_str_bus_width(struct gpu_info* gpu);
char* get_str_memory_clock(struct gpu_info* gpu);
char* get_str_l2(struct gpu_info* gpu);
char* get_str_peak_performance(struct gpu_info* gpu);
char* get_str_peak_performance_tensor(struct gpu_info* gpu);
#endif

View File

@@ -7,7 +7,7 @@
#include "../cuda/cuda.hpp"
#include "../cuda/uarch.hpp"
static const char* VERSION = "0.10";
static const char* VERSION = "0.11";
void print_help(char *argv[]) {
const char **t = args_str;

View File

@@ -43,7 +43,8 @@ enum {
ATTRIBUTE_MEMORY,
ATTRIBUTE_MEMORY_FREQ,
ATTRIBUTE_BUS_WIDTH,
ATTRIBUTE_PEAK
ATTRIBUTE_PEAK,
ATTRIBUTE_PEAK_TENSOR,
};
static const char* ATTRIBUTE_FIELDS [] = {
@@ -54,13 +55,14 @@ static const char* ATTRIBUTE_FIELDS [] = {
"Max Frequency:",
"SMs:",
"Cores/SM:",
"CUDA cores:",
"Tensor cores:",
"CUDA Cores:",
"Tensor Cores:",
"L2 Size:",
"Memory:",
"Memory frequency:",
"Bus width:",
"Peak Performance:",
"Peak Performance (TC):",
};
static const char* ATTRIBUTE_FIELDS_SHORT [] = {
@@ -71,12 +73,14 @@ static const char* ATTRIBUTE_FIELDS_SHORT [] = {
"Max Freq.:",
"SMs:",
"Cores/SM:",
"CUDA cores:",
"CUDA Cores:",
"Tensor Cores:",
"L2 Size:",
"Memory:",
"Memory freq.:",
"Bus width:",
"Peak Perf.:",
"Peak Perf.(TC):",
};
struct terminal {
@@ -360,6 +364,7 @@ bool print_gpufetch_cuda(struct gpu_info* gpu, STYLE s, struct color** cs, struc
char* mem_freq = get_str_memory_clock(gpu);
char* bus_width = get_str_bus_width(gpu);
char* pp = get_str_peak_performance(gpu);
char* pp_tensor = get_str_peak_performance_tensor(gpu);
char* mem = (char *) emalloc(sizeof(char) * (strlen(mem_size) + strlen(mem_type) + 2));
sprintf(mem, "%s %s", mem_size, mem_type);
@@ -383,6 +388,9 @@ bool print_gpufetch_cuda(struct gpu_info* gpu, STYLE s, struct color** cs, struc
setAttribute(art, ATTRIBUTE_BUS_WIDTH, bus_width);
setAttribute(art, ATTRIBUTE_L2, l2);
setAttribute(art, ATTRIBUTE_PEAK, pp);
if(gpu->topo->tensor_cores >= 0) {
setAttribute(art, ATTRIBUTE_PEAK_TENSOR, pp_tensor);
}
const char** attribute_fields = ATTRIBUTE_FIELDS;
uint32_t longest_attribute = longest_attribute_length(art, attribute_fields);