[v0.11] Add peak performance with tensor cores to the output
This commit is contained in:
@@ -116,17 +116,17 @@ char* get_str_l2(struct gpu_info* gpu) {
|
|||||||
return string;
|
return string;
|
||||||
}
|
}
|
||||||
|
|
||||||
char* get_str_peak_performance(struct gpu_info* gpu) {
|
char* get_str_peak_performance_generic(int64_t pp) {
|
||||||
char* str;
|
char* str;
|
||||||
|
|
||||||
if(gpu->peak_performance == -1) {
|
if(pp == -1) {
|
||||||
str = (char *) emalloc(sizeof(char) * (strlen(STRING_UNKNOWN) + 1));
|
str = (char *) emalloc(sizeof(char) * (strlen(STRING_UNKNOWN) + 1));
|
||||||
strncpy(str, STRING_UNKNOWN, strlen(STRING_UNKNOWN) + 1);
|
strncpy(str, STRING_UNKNOWN, strlen(STRING_UNKNOWN) + 1);
|
||||||
return str;
|
return str;
|
||||||
}
|
}
|
||||||
|
|
||||||
// 7 for digits (e.g, XXXX.XX), 7 for XFLOP/s
|
// 7 for digits (e.g, XXXX.XX), 7 for XFLOP/s
|
||||||
double flopsd = (double) gpu->peak_performance;
|
double flopsd = (double) pp;
|
||||||
uint32_t max_size = 7+1+7+1;
|
uint32_t max_size = 7+1+7+1;
|
||||||
str = (char *) ecalloc(max_size, sizeof(char));
|
str = (char *) ecalloc(max_size, sizeof(char));
|
||||||
|
|
||||||
@@ -139,3 +139,12 @@ char* get_str_peak_performance(struct gpu_info* gpu) {
|
|||||||
|
|
||||||
return str;
|
return str;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
char* get_str_peak_performance(struct gpu_info* gpu) {
|
||||||
|
return get_str_peak_performance_generic(gpu->peak_performance);
|
||||||
|
}
|
||||||
|
|
||||||
|
char* get_str_peak_performance_tensor(struct gpu_info* gpu) {
|
||||||
|
return get_str_peak_performance_generic(gpu->peak_performance_t);
|
||||||
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -61,6 +61,7 @@ struct gpu_info {
|
|||||||
struct memory* mem;
|
struct memory* mem;
|
||||||
struct cache* cach;
|
struct cache* cach;
|
||||||
int64_t peak_performance;
|
int64_t peak_performance;
|
||||||
|
int64_t peak_performance_t;
|
||||||
int32_t idx;
|
int32_t idx;
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -73,5 +74,6 @@ char* get_str_bus_width(struct gpu_info* gpu);
|
|||||||
char* get_str_memory_clock(struct gpu_info* gpu);
|
char* get_str_memory_clock(struct gpu_info* gpu);
|
||||||
char* get_str_l2(struct gpu_info* gpu);
|
char* get_str_l2(struct gpu_info* gpu);
|
||||||
char* get_str_peak_performance(struct gpu_info* gpu);
|
char* get_str_peak_performance(struct gpu_info* gpu);
|
||||||
|
char* get_str_peak_performance_tensor(struct gpu_info* gpu);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
#include "../cuda/cuda.hpp"
|
#include "../cuda/cuda.hpp"
|
||||||
#include "../cuda/uarch.hpp"
|
#include "../cuda/uarch.hpp"
|
||||||
|
|
||||||
static const char* VERSION = "0.10";
|
static const char* VERSION = "0.11";
|
||||||
|
|
||||||
void print_help(char *argv[]) {
|
void print_help(char *argv[]) {
|
||||||
const char **t = args_str;
|
const char **t = args_str;
|
||||||
|
|||||||
@@ -43,7 +43,8 @@ enum {
|
|||||||
ATTRIBUTE_MEMORY,
|
ATTRIBUTE_MEMORY,
|
||||||
ATTRIBUTE_MEMORY_FREQ,
|
ATTRIBUTE_MEMORY_FREQ,
|
||||||
ATTRIBUTE_BUS_WIDTH,
|
ATTRIBUTE_BUS_WIDTH,
|
||||||
ATTRIBUTE_PEAK
|
ATTRIBUTE_PEAK,
|
||||||
|
ATTRIBUTE_PEAK_TENSOR,
|
||||||
};
|
};
|
||||||
|
|
||||||
static const char* ATTRIBUTE_FIELDS [] = {
|
static const char* ATTRIBUTE_FIELDS [] = {
|
||||||
@@ -54,13 +55,14 @@ static const char* ATTRIBUTE_FIELDS [] = {
|
|||||||
"Max Frequency:",
|
"Max Frequency:",
|
||||||
"SMs:",
|
"SMs:",
|
||||||
"Cores/SM:",
|
"Cores/SM:",
|
||||||
"CUDA cores:",
|
"CUDA Cores:",
|
||||||
"Tensor cores:",
|
"Tensor Cores:",
|
||||||
"L2 Size:",
|
"L2 Size:",
|
||||||
"Memory:",
|
"Memory:",
|
||||||
"Memory frequency:",
|
"Memory frequency:",
|
||||||
"Bus width:",
|
"Bus width:",
|
||||||
"Peak Performance:",
|
"Peak Performance:",
|
||||||
|
"Peak Performance (TC):",
|
||||||
};
|
};
|
||||||
|
|
||||||
static const char* ATTRIBUTE_FIELDS_SHORT [] = {
|
static const char* ATTRIBUTE_FIELDS_SHORT [] = {
|
||||||
@@ -71,12 +73,14 @@ static const char* ATTRIBUTE_FIELDS_SHORT [] = {
|
|||||||
"Max Freq.:",
|
"Max Freq.:",
|
||||||
"SMs:",
|
"SMs:",
|
||||||
"Cores/SM:",
|
"Cores/SM:",
|
||||||
"CUDA cores:",
|
"CUDA Cores:",
|
||||||
|
"Tensor Cores:",
|
||||||
"L2 Size:",
|
"L2 Size:",
|
||||||
"Memory:",
|
"Memory:",
|
||||||
"Memory freq.:",
|
"Memory freq.:",
|
||||||
"Bus width:",
|
"Bus width:",
|
||||||
"Peak Perf.:",
|
"Peak Perf.:",
|
||||||
|
"Peak Perf.(TC):",
|
||||||
};
|
};
|
||||||
|
|
||||||
struct terminal {
|
struct terminal {
|
||||||
@@ -360,6 +364,7 @@ bool print_gpufetch_cuda(struct gpu_info* gpu, STYLE s, struct color** cs, struc
|
|||||||
char* mem_freq = get_str_memory_clock(gpu);
|
char* mem_freq = get_str_memory_clock(gpu);
|
||||||
char* bus_width = get_str_bus_width(gpu);
|
char* bus_width = get_str_bus_width(gpu);
|
||||||
char* pp = get_str_peak_performance(gpu);
|
char* pp = get_str_peak_performance(gpu);
|
||||||
|
char* pp_tensor = get_str_peak_performance_tensor(gpu);
|
||||||
|
|
||||||
char* mem = (char *) emalloc(sizeof(char) * (strlen(mem_size) + strlen(mem_type) + 2));
|
char* mem = (char *) emalloc(sizeof(char) * (strlen(mem_size) + strlen(mem_type) + 2));
|
||||||
sprintf(mem, "%s %s", mem_size, mem_type);
|
sprintf(mem, "%s %s", mem_size, mem_type);
|
||||||
@@ -383,6 +388,9 @@ bool print_gpufetch_cuda(struct gpu_info* gpu, STYLE s, struct color** cs, struc
|
|||||||
setAttribute(art, ATTRIBUTE_BUS_WIDTH, bus_width);
|
setAttribute(art, ATTRIBUTE_BUS_WIDTH, bus_width);
|
||||||
setAttribute(art, ATTRIBUTE_L2, l2);
|
setAttribute(art, ATTRIBUTE_L2, l2);
|
||||||
setAttribute(art, ATTRIBUTE_PEAK, pp);
|
setAttribute(art, ATTRIBUTE_PEAK, pp);
|
||||||
|
if(gpu->topo->tensor_cores >= 0) {
|
||||||
|
setAttribute(art, ATTRIBUTE_PEAK_TENSOR, pp_tensor);
|
||||||
|
}
|
||||||
|
|
||||||
const char** attribute_fields = ATTRIBUTE_FIELDS;
|
const char** attribute_fields = ATTRIBUTE_FIELDS;
|
||||||
uint32_t longest_attribute = longest_attribute_length(art, attribute_fields);
|
uint32_t longest_attribute = longest_attribute_length(art, attribute_fields);
|
||||||
|
|||||||
@@ -103,10 +103,16 @@ struct memory* get_memory_info(struct gpu_info* gpu, cudaDeviceProp prop) {
|
|||||||
return mem;
|
return mem;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Compute peak performance when using CUDA cores
|
||||||
int64_t get_peak_performance(struct gpu_info* gpu) {
|
int64_t get_peak_performance(struct gpu_info* gpu) {
|
||||||
return gpu->freq * 1000000 * gpu->topo->cuda_cores * 2;
|
return gpu->freq * 1000000 * gpu->topo->cuda_cores * 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Compute peak performance when using tensor cores
|
||||||
|
int64_t get_peak_performance_t(struct gpu_info* gpu) {
|
||||||
|
return gpu->freq * 1000000 * 4 * 4 * 8 * gpu->topo->tensor_cores;
|
||||||
|
}
|
||||||
|
|
||||||
struct gpu_info* get_gpu_info(int gpu_idx) {
|
struct gpu_info* get_gpu_info(int gpu_idx) {
|
||||||
struct gpu_info* gpu = (struct gpu_info*) emalloc(sizeof(struct gpu_info));
|
struct gpu_info* gpu = (struct gpu_info*) emalloc(sizeof(struct gpu_info));
|
||||||
gpu->pci = NULL;
|
gpu->pci = NULL;
|
||||||
@@ -156,6 +162,7 @@ struct gpu_info* get_gpu_info(int gpu_idx) {
|
|||||||
gpu->mem = get_memory_info(gpu, deviceProp);
|
gpu->mem = get_memory_info(gpu, deviceProp);
|
||||||
gpu->topo = get_topology_info(deviceProp);
|
gpu->topo = get_topology_info(deviceProp);
|
||||||
gpu->peak_performance = get_peak_performance(gpu);
|
gpu->peak_performance = get_peak_performance(gpu);
|
||||||
|
gpu->peak_performance_t = get_peak_performance_t(gpu);
|
||||||
|
|
||||||
return gpu;
|
return gpu;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user