From 4e0e6b5ab5fe3bb6d9688c85d8ba03c55eb2c613 Mon Sep 17 00:00:00 2001 From: Dr-Noob Date: Sun, 19 Dec 2021 11:34:05 +0100 Subject: [PATCH] [v0.20] Rename CUDA topology struct to improve consistency --- src/common/gpu.hpp | 6 ++++-- src/common/printer.cpp | 4 ++-- src/cuda/cuda.cpp | 20 ++++++++++---------- 3 files changed, 16 insertions(+), 14 deletions(-) diff --git a/src/common/gpu.hpp b/src/common/gpu.hpp index d0b6819..f0cd8ab 100644 --- a/src/common/gpu.hpp +++ b/src/common/gpu.hpp @@ -36,13 +36,15 @@ struct cache { struct cach* L2; }; -struct topology { +// CUDA topology +struct topology_c { int32_t streaming_mp; int32_t cores_per_mp; int32_t cuda_cores; int32_t tensor_cores; }; +// Intel topology struct topology_i { int32_t slices; int32_t subslices; @@ -63,7 +65,7 @@ struct gpu_info { char* name; int64_t freq; struct pci* pci; - struct topology* topo; + struct topology_c* topo_c; struct topology_i* topo_i; struct memory* mem; struct cache* cach; diff --git a/src/common/printer.cpp b/src/common/printer.cpp index 2d34675..a11db60 100644 --- a/src/common/printer.cpp +++ b/src/common/printer.cpp @@ -438,7 +438,7 @@ bool print_gpufetch_cuda(struct gpu_info* gpu, STYLE s, struct color** cs, struc setAttribute(art, ATTRIBUTE_STREAMINGMP, sms); setAttribute(art, ATTRIBUTE_CORESPERMP, corespersm); setAttribute(art, ATTRIBUTE_CUDA_CORES, cores); - if(gpu->topo->tensor_cores > 0) { + if(gpu->topo_c->tensor_cores > 0) { setAttribute(art, ATTRIBUTE_TENSOR_CORES, tensorc); } setAttribute(art, ATTRIBUTE_MEMORY, mem); @@ -446,7 +446,7 @@ bool print_gpufetch_cuda(struct gpu_info* gpu, STYLE s, struct color** cs, struc setAttribute(art, ATTRIBUTE_BUS_WIDTH, bus_width); setAttribute(art, ATTRIBUTE_L2, l2); setAttribute(art, ATTRIBUTE_PEAK, pp); - if(gpu->topo->tensor_cores > 0) { + if(gpu->topo_c->tensor_cores > 0) { setAttribute(art, ATTRIBUTE_PEAK_TENSOR, pp_tensor); } diff --git a/src/cuda/cuda.cpp b/src/cuda/cuda.cpp index 734f687..15b7727 100644 --- a/src/cuda/cuda.cpp +++ b/src/cuda/cuda.cpp @@ -31,8 +31,8 @@ int get_tensor_cores(int sm, int major) { else return 0; } -struct topology* get_topology_info(cudaDeviceProp prop) { - struct topology* topo = (struct topology*) emalloc(sizeof(struct topology)); +struct topology_c* get_topology_info(cudaDeviceProp prop) { + struct topology_c* topo = (struct topology_c*) emalloc(sizeof(struct topology_c)); topo->streaming_mp = prop.multiProcessorCount; topo->cores_per_mp = _ConvertSMVer2Cores(prop.major, prop.minor); @@ -77,15 +77,15 @@ struct memory* get_memory_info(struct gpu_info* gpu, cudaDeviceProp prop) { // Compute peak performance when using CUDA cores int64_t get_peak_performance_cuda(struct gpu_info* gpu) { - return gpu->freq * 1000000 * gpu->topo->cuda_cores * 2; + return gpu->freq * 1000000 * gpu->topo_c->cuda_cores * 2; } // Compute peak performance when using tensor cores int64_t get_peak_performance_tcu(cudaDeviceProp prop, struct gpu_info* gpu) { // Volta / Turing tensor cores performs 4x4x4 FP16 matrix multiplication // Ampere tensor cores performs 8x4x8 FP16 matrix multiplicacion - if(prop.major == 7) return gpu->freq * 1000000 * 4 * 4 * 4 * 2 * gpu->topo->tensor_cores; - else if(prop.major == 8) return gpu->freq * 1000000 * 8 * 4 * 8 * 2 * gpu->topo->tensor_cores; + if(prop.major == 7) return gpu->freq * 1000000 * 4 * 4 * 4 * 2 * gpu->topo_c->tensor_cores; + else if(prop.major == 8) return gpu->freq * 1000000 * 8 * 4 * 8 * 2 * gpu->topo_c->tensor_cores; else return 0; } @@ -141,7 +141,7 @@ struct gpu_info* get_gpu_info_cuda(int gpu_idx) { gpu->arch = get_uarch_from_cuda(gpu); gpu->cach = get_cache_info(deviceProp); gpu->mem = get_memory_info(gpu, deviceProp); - gpu->topo = get_topology_info(deviceProp); + gpu->topo_c = get_topology_info(deviceProp); gpu->peak_performance = get_peak_performance_cuda(gpu); gpu->peak_performance_tcu = get_peak_performance_tcu(deviceProp, gpu); @@ -149,18 +149,18 @@ struct gpu_info* get_gpu_info_cuda(int gpu_idx) { } char* get_str_sm(struct gpu_info* gpu) { - return get_str_generic(gpu->topo->streaming_mp); + return get_str_generic(gpu->topo_c->streaming_mp); } char* get_str_cores_sm(struct gpu_info* gpu) { - return get_str_generic(gpu->topo->cores_per_mp); + return get_str_generic(gpu->topo_c->cores_per_mp); } char* get_str_cuda_cores(struct gpu_info* gpu) { - return get_str_generic(gpu->topo->cuda_cores); + return get_str_generic(gpu->topo_c->cuda_cores); } char* get_str_tensor_cores(struct gpu_info* gpu) { - return get_str_generic(gpu->topo->tensor_cores); + return get_str_generic(gpu->topo_c->tensor_cores); }