[v0.11] Add peak performance with tensor cores to the output

2021-11-23 18:49:34 +01:00
parent 8bf0276aae
commit 32b2c59b50
5 changed files with 34 additions and 8 deletions
--- a/src/common/gpu.cpp
+++ b/src/common/gpu.cpp
@@ -116,17 +116,17 @@ char* get_str_l2(struct gpu_info* gpu) {
  return string;
 }
-char* get_str_peak_performance(struct gpu_info* gpu) {
+char* get_str_peak_performance_generic(int64_t pp) {
  char* str;
-  if(gpu->peak_performance == -1) {
+  if(pp == -1) {
    str = (char *) emalloc(sizeof(char) * (strlen(STRING_UNKNOWN) + 1));
    strncpy(str, STRING_UNKNOWN, strlen(STRING_UNKNOWN) + 1);
    return str;
  }
  // 7 for digits (e.g, XXXX.XX), 7 for XFLOP/s
-  double flopsd = (double) gpu->peak_performance;
+  double flopsd = (double) pp;
  uint32_t max_size = 7+1+7+1;
  str = (char *) ecalloc(max_size, sizeof(char));
@@ -139,3 +139,12 @@ char* get_str_peak_performance(struct gpu_info* gpu) {
  return str;
 }
 char* get_str_peak_performance(struct gpu_info* gpu) {
  return get_str_peak_performance_generic(gpu->peak_performance);
 }
 char* get_str_peak_performance_tensor(struct gpu_info* gpu) {
  return get_str_peak_performance_generic(gpu->peak_performance_t);
 }
--- a/src/common/gpu.hpp
+++ b/src/common/gpu.hpp
@@ -61,6 +61,7 @@ struct gpu_info {
  struct memory* mem;
  struct cache* cach;
  int64_t peak_performance;
  int64_t peak_performance_t;
  int32_t idx;
 };
@@ -73,5 +74,6 @@ char* get_str_bus_width(struct gpu_info* gpu);
 char* get_str_memory_clock(struct gpu_info* gpu);
 char* get_str_l2(struct gpu_info* gpu);
 char* get_str_peak_performance(struct gpu_info* gpu);
 char* get_str_peak_performance_tensor(struct gpu_info* gpu);
 #endif
--- a/src/common/main.cpp
+++ b/src/common/main.cpp
@@ -7,7 +7,7 @@
 #include "../cuda/cuda.hpp"
 #include "../cuda/uarch.hpp"
-static const char* VERSION = "0.10";
+static const char* VERSION = "0.11";
 void print_help(char *argv[]) {
  const char **t = args_str;
--- a/src/common/printer.cpp
+++ b/src/common/printer.cpp
@@ -43,7 +43,8 @@ enum {
  ATTRIBUTE_MEMORY,
  ATTRIBUTE_MEMORY_FREQ,
  ATTRIBUTE_BUS_WIDTH,
-  ATTRIBUTE_PEAK
+  ATTRIBUTE_PEAK,
  ATTRIBUTE_PEAK_TENSOR,
 };
 static const char* ATTRIBUTE_FIELDS [] = {
@@ -54,13 +55,14 @@ static const char* ATTRIBUTE_FIELDS [] = {
  "Max Frequency:",
  "SMs:",
  "Cores/SM:",
-  "CUDA cores:",
+  "CUDA Cores:",
-  "Tensor cores:",
+  "Tensor Cores:",
  "L2 Size:",
  "Memory:",
  "Memory frequency:",
  "Bus width:",
  "Peak Performance:",
  "Peak Performance (TC):",
 };
 static const char* ATTRIBUTE_FIELDS_SHORT [] = {
@@ -71,12 +73,14 @@ static const char* ATTRIBUTE_FIELDS_SHORT [] = {
  "Max Freq.:",
  "SMs:",
  "Cores/SM:",
-  "CUDA cores:",
+  "CUDA Cores:",
  "Tensor Cores:",
  "L2 Size:",
  "Memory:",
  "Memory freq.:",
  "Bus width:",
  "Peak Perf.:",
  "Peak Perf.(TC):",
 };
 struct terminal {
@@ -360,6 +364,7 @@ bool print_gpufetch_cuda(struct gpu_info* gpu, STYLE s, struct color** cs, struc
  char* mem_freq = get_str_memory_clock(gpu);
  char* bus_width = get_str_bus_width(gpu);
  char* pp = get_str_peak_performance(gpu);
  char* pp_tensor = get_str_peak_performance_tensor(gpu);
  char* mem = (char *) emalloc(sizeof(char) * (strlen(mem_size) + strlen(mem_type) + 2));
  sprintf(mem, "%s %s", mem_size, mem_type);
@@ -383,6 +388,9 @@ bool print_gpufetch_cuda(struct gpu_info* gpu, STYLE s, struct color** cs, struc
  setAttribute(art, ATTRIBUTE_BUS_WIDTH, bus_width);
  setAttribute(art, ATTRIBUTE_L2, l2);
  setAttribute(art, ATTRIBUTE_PEAK, pp);
  if(gpu->topo->tensor_cores >= 0) {
    setAttribute(art, ATTRIBUTE_PEAK_TENSOR, pp_tensor);
  }
  const char** attribute_fields = ATTRIBUTE_FIELDS;
  uint32_t longest_attribute = longest_attribute_length(art, attribute_fields);
--- a/src/cuda/cuda.cpp
+++ b/src/cuda/cuda.cpp
@@ -103,10 +103,16 @@ struct memory* get_memory_info(struct gpu_info* gpu, cudaDeviceProp prop) {
  return mem;
 }
 // Compute peak performance when using CUDA cores
 int64_t get_peak_performance(struct gpu_info* gpu) {
  return gpu->freq * 1000000 * gpu->topo->cuda_cores * 2;
 }
 // Compute peak performance when using tensor cores
 int64_t get_peak_performance_t(struct gpu_info* gpu) {
  return gpu->freq * 1000000 * 4 * 4 * 8 * gpu->topo->tensor_cores;
 }
 struct gpu_info* get_gpu_info(int gpu_idx) {
  struct gpu_info* gpu = (struct gpu_info*) emalloc(sizeof(struct gpu_info));
  gpu->pci = NULL;
@@ -156,6 +162,7 @@ struct gpu_info* get_gpu_info(int gpu_idx) {
  gpu->mem = get_memory_info(gpu, deviceProp);
  gpu->topo = get_topology_info(deviceProp);
  gpu->peak_performance = get_peak_performance(gpu);
  gpu->peak_performance_t = get_peak_performance_t(gpu);
  return gpu;
 }