diff --git a/src/common/gpu.cpp b/src/common/gpu.cpp
index e66f2c7..ed00694 100644
--- a/src/common/gpu.cpp
+++ b/src/common/gpu.cpp
@@ -116,17 +116,17 @@ char* get_str_l2(struct gpu_info* gpu) {
   return string;
 }
 
-char* get_str_peak_performance(struct gpu_info* gpu) {
+char* get_str_peak_performance_generic(int64_t pp) {
   char* str;
 
-  if(gpu->peak_performance == -1) {
+  if(pp == -1) {
     str = (char *) emalloc(sizeof(char) * (strlen(STRING_UNKNOWN) + 1));
     strncpy(str, STRING_UNKNOWN, strlen(STRING_UNKNOWN) + 1);
     return str;
   }
 
   // 7 for digits (e.g, XXXX.XX), 7 for XFLOP/s
-  double flopsd = (double) gpu->peak_performance;
+  double flopsd = (double) pp;
   uint32_t max_size = 7+1+7+1;
   str = (char *) ecalloc(max_size, sizeof(char));
 
@@ -139,3 +139,12 @@ char* get_str_peak_performance(struct gpu_info* gpu) {
 
   return str;
 }
+
+char* get_str_peak_performance(struct gpu_info* gpu) {
+  return get_str_peak_performance_generic(gpu->peak_performance);
+}
+
+char* get_str_peak_performance_tensor(struct gpu_info* gpu) {
+  return get_str_peak_performance_generic(gpu->peak_performance_t);
+}
+
diff --git a/src/common/gpu.hpp b/src/common/gpu.hpp
index 9632ffb..2928a11 100644
--- a/src/common/gpu.hpp
+++ b/src/common/gpu.hpp
@@ -61,6 +61,7 @@ struct gpu_info {
   struct memory* mem;
   struct cache* cach;
   int64_t peak_performance;
+  int64_t peak_performance_t;
   int32_t idx;
 };
 
@@ -73,5 +74,6 @@ char* get_str_bus_width(struct gpu_info* gpu);
 char* get_str_memory_clock(struct gpu_info* gpu);
 char* get_str_l2(struct gpu_info* gpu);
 char* get_str_peak_performance(struct gpu_info* gpu);
+char* get_str_peak_performance_tensor(struct gpu_info* gpu);
 
 #endif
diff --git a/src/common/main.cpp b/src/common/main.cpp
index 7f27252..899ff89 100644
--- a/src/common/main.cpp
+++ b/src/common/main.cpp
@@ -7,7 +7,7 @@
 #include "../cuda/cuda.hpp"
 #include "../cuda/uarch.hpp"
 
-static const char* VERSION = "0.10";
+static const char* VERSION = "0.11";
 
 void print_help(char *argv[]) {
   const char **t = args_str;
diff --git a/src/common/printer.cpp b/src/common/printer.cpp
index a84ebb8..958593e 100644
--- a/src/common/printer.cpp
+++ b/src/common/printer.cpp
@@ -43,7 +43,8 @@ enum {
   ATTRIBUTE_MEMORY,
   ATTRIBUTE_MEMORY_FREQ,
   ATTRIBUTE_BUS_WIDTH,
-  ATTRIBUTE_PEAK
+  ATTRIBUTE_PEAK,
+  ATTRIBUTE_PEAK_TENSOR,
 };
 
 static const char* ATTRIBUTE_FIELDS [] = {
@@ -54,13 +55,14 @@ static const char* ATTRIBUTE_FIELDS [] = {
   "Max Frequency:",
   "SMs:",
   "Cores/SM:",
-  "CUDA cores:",
-  "Tensor cores:",
+  "CUDA Cores:",
+  "Tensor Cores:",
   "L2 Size:",
   "Memory:",
   "Memory frequency:",
   "Bus width:",
   "Peak Performance:",
+  "Peak Performance (TC):",
 };
 
 static const char* ATTRIBUTE_FIELDS_SHORT [] = {
@@ -71,12 +73,14 @@ static const char* ATTRIBUTE_FIELDS_SHORT [] = {
   "Max Freq.:",
   "SMs:",
   "Cores/SM:",
-  "CUDA cores:",
+  "CUDA Cores:",
+  "Tensor Cores:",
   "L2 Size:",
   "Memory:",
   "Memory freq.:",
   "Bus width:",
   "Peak Perf.:",
+  "Peak Perf.(TC):",
 };
 
 struct terminal {
@@ -360,6 +364,7 @@ bool print_gpufetch_cuda(struct gpu_info* gpu, STYLE s, struct color** cs, struc
   char* mem_freq = get_str_memory_clock(gpu);
   char* bus_width = get_str_bus_width(gpu);
   char* pp = get_str_peak_performance(gpu);
+  char* pp_tensor = get_str_peak_performance_tensor(gpu);
 
   char* mem = (char *) emalloc(sizeof(char) * (strlen(mem_size) + strlen(mem_type) + 2));
   sprintf(mem, "%s %s", mem_size, mem_type);
@@ -383,6 +388,9 @@ bool print_gpufetch_cuda(struct gpu_info* gpu, STYLE s, struct color** cs, struc
   setAttribute(art, ATTRIBUTE_BUS_WIDTH, bus_width);
   setAttribute(art, ATTRIBUTE_L2, l2);
   setAttribute(art, ATTRIBUTE_PEAK, pp);
+  if(gpu->topo->tensor_cores >= 0) {
+    setAttribute(art, ATTRIBUTE_PEAK_TENSOR, pp_tensor);
+  }
 
   const char** attribute_fields = ATTRIBUTE_FIELDS;
   uint32_t longest_attribute = longest_attribute_length(art, attribute_fields);
diff --git a/src/cuda/cuda.cpp b/src/cuda/cuda.cpp
index d0b61d0..c75efb9 100644
--- a/src/cuda/cuda.cpp
+++ b/src/cuda/cuda.cpp
@@ -103,10 +103,16 @@ struct memory* get_memory_info(struct gpu_info* gpu, cudaDeviceProp prop) {
   return mem;
 }
 
+// Compute peak performance when using CUDA cores
 int64_t get_peak_performance(struct gpu_info* gpu) {
   return gpu->freq * 1000000 * gpu->topo->cuda_cores * 2;
 }
 
+// Compute peak performance when using tensor cores
+int64_t get_peak_performance_t(struct gpu_info* gpu) {
+  return gpu->freq * 1000000 * 4 * 4 * 8 * gpu->topo->tensor_cores;
+}
+
 struct gpu_info* get_gpu_info(int gpu_idx) {
   struct gpu_info* gpu = (struct gpu_info*) emalloc(sizeof(struct gpu_info));
   gpu->pci = NULL;
@@ -156,6 +162,7 @@ struct gpu_info* get_gpu_info(int gpu_idx) {
   gpu->mem = get_memory_info(gpu, deviceProp);
   gpu->topo = get_topology_info(deviceProp);
   gpu->peak_performance = get_peak_performance(gpu);
+  gpu->peak_performance_t = get_peak_performance_t(gpu);
 
   return gpu;
 }