[v0.01] Retrieve more info; frequency, topology and peak performance
This commit is contained in:
@@ -1,12 +1,37 @@
|
|||||||
#include "gpu.hpp"
|
|
||||||
#include <cstddef>
|
#include <cstddef>
|
||||||
|
#include <cstring>
|
||||||
|
#include <cstdio>
|
||||||
|
#include <cassert>
|
||||||
|
|
||||||
|
#include "../common/global.hpp"
|
||||||
|
#include "gpu.hpp"
|
||||||
|
|
||||||
|
#define STRING_YES "Yes"
|
||||||
|
#define STRING_NO "No"
|
||||||
|
#define STRING_NONE "None"
|
||||||
|
#define STRING_MEGAHERZ "MHz"
|
||||||
|
#define STRING_GIGAHERZ "GHz"
|
||||||
|
#define STRING_KILOBYTES "KB"
|
||||||
|
#define STRING_MEGABYTES "MB"
|
||||||
|
|
||||||
char* get_str_gpu_name(struct gpu_info* gpu) {
|
char* get_str_gpu_name(struct gpu_info* gpu) {
|
||||||
return gpu->name;
|
return gpu->name;
|
||||||
}
|
}
|
||||||
|
|
||||||
char* get_str_freq(struct gpu_info* gpu) {
|
char* get_str_freq(struct gpu_info* gpu) {
|
||||||
return NULL;
|
// Max 5 digits and 3 for '(M/G)Hz'
|
||||||
|
uint32_t size = (5+1+3+1);
|
||||||
|
assert(strlen(STRING_UNKNOWN)+1 <= size);
|
||||||
|
char* string = (char *) ecalloc(size, sizeof(char));
|
||||||
|
|
||||||
|
if(gpu->freq == UNKNOWN_FREQ || gpu->freq < 0)
|
||||||
|
snprintf(string,strlen(STRING_UNKNOWN)+1, STRING_UNKNOWN);
|
||||||
|
else if(gpu->freq >= 1000)
|
||||||
|
snprintf(string,size,"%.3f "STRING_GIGAHERZ, (float)(gpu->freq)/1000);
|
||||||
|
else
|
||||||
|
snprintf(string,size,"%.3f "STRING_MEGAHERZ, (float)gpu->freq);
|
||||||
|
|
||||||
|
return string;
|
||||||
}
|
}
|
||||||
|
|
||||||
char* get_str_memory_size(struct gpu_info* gpu) {
|
char* get_str_memory_size(struct gpu_info* gpu) {
|
||||||
@@ -26,5 +51,25 @@ char* get_str_l2(struct gpu_info* gpu) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
char* get_str_peak_performance(struct gpu_info* gpu) {
|
char* get_str_peak_performance(struct gpu_info* gpu) {
|
||||||
return NULL;
|
char* str;
|
||||||
|
|
||||||
|
if(gpu->peak_performance == -1) {
|
||||||
|
str = (char *) emalloc(sizeof(char) * (strlen(STRING_UNKNOWN) + 1));
|
||||||
|
strncpy(str, STRING_UNKNOWN, strlen(STRING_UNKNOWN) + 1);
|
||||||
|
return str;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 7 for digits (e.g, XXXX.XX), 7 for XFLOP/s
|
||||||
|
double flopsd = (double) gpu->peak_performance;
|
||||||
|
uint32_t max_size = 7+1+7+1;
|
||||||
|
str = (char *) ecalloc(max_size, sizeof(char));
|
||||||
|
|
||||||
|
if(flopsd >= (double)1000000000000.0)
|
||||||
|
snprintf(str, max_size, "%.2f TFLOP/s", flopsd/1000000000000);
|
||||||
|
else if(flopsd >= 1000000000.0)
|
||||||
|
snprintf(str, max_size, "%.2f GFLOP/s", flopsd/1000000000);
|
||||||
|
else
|
||||||
|
snprintf(str, max_size, "%.2f MFLOP/s", flopsd/1000000);
|
||||||
|
|
||||||
|
return str;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -7,6 +7,8 @@
|
|||||||
#include "../cuda/nvmlb.hpp"
|
#include "../cuda/nvmlb.hpp"
|
||||||
#include "../cuda/pci.hpp"
|
#include "../cuda/pci.hpp"
|
||||||
|
|
||||||
|
#define UNKNOWN_FREQ -1
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
GPU_VENDOR_NVIDIA
|
GPU_VENDOR_NVIDIA
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -59,9 +59,9 @@ int main(int argc, char* argv[]) {
|
|||||||
printf("Compute Capability: %s\n", get_str_cc(gpu->arch));
|
printf("Compute Capability: %s\n", get_str_cc(gpu->arch));
|
||||||
printf("Technology: %s\n", get_str_process(gpu->arch));
|
printf("Technology: %s\n", get_str_process(gpu->arch));
|
||||||
printf("Max Frequency: %s\n", get_str_freq(gpu));
|
printf("Max Frequency: %s\n", get_str_freq(gpu));
|
||||||
printf("SM: %s\n", get_str_sm(gpu));
|
printf("SM: %d\n", get_str_sm(gpu));
|
||||||
printf("Cores/MP: %s\n", get_str_cores_sm(gpu));
|
printf("Cores/MP: %d\n", get_str_cores_sm(gpu));
|
||||||
printf("CUDA cores: %s\n", get_str_cuda_cores(gpu));
|
printf("CUDA cores: %d\n", get_str_cuda_cores(gpu));
|
||||||
printf("Memory size: %s\n", get_str_memory_size(gpu));
|
printf("Memory size: %s\n", get_str_memory_size(gpu));
|
||||||
printf("Memory type: %s\n", get_str_memory_type(gpu));
|
printf("Memory type: %s\n", get_str_memory_type(gpu));
|
||||||
printf("L1 size: %s\n", get_str_l1(gpu));
|
printf("L1 size: %s\n", get_str_l1(gpu));
|
||||||
|
|||||||
@@ -12,9 +12,13 @@ struct cache* get_cache_info(struct gpu_info* gpu) {
|
|||||||
return cach;
|
return cach;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct topology* get_topology_info(struct gpu_info* gpu) {
|
struct topology* get_topology_info(struct gpu_info* gpu, cudaDeviceProp prop) {
|
||||||
struct topology* topo = (struct topology*) emalloc(sizeof(struct topology));
|
struct topology* topo = (struct topology*) emalloc(sizeof(struct topology));
|
||||||
|
|
||||||
|
topo->streaming_mp = prop.multiProcessorCount;
|
||||||
|
topo->cores_per_mp = _ConvertSMVer2Cores(prop.major, prop.minor);
|
||||||
|
topo->cuda_cores = topo->streaming_mp * topo->cores_per_mp;
|
||||||
|
|
||||||
return topo;
|
return topo;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -25,7 +29,7 @@ struct memory* get_memory_info(struct gpu_info* gpu) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
int64_t get_peak_performance(struct gpu_info* gpu) {
|
int64_t get_peak_performance(struct gpu_info* gpu) {
|
||||||
return 1000;
|
return gpu->freq * 1000000 * gpu->topo->cuda_cores * 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct gpu_info* get_gpu_info() {
|
struct gpu_info* get_gpu_info() {
|
||||||
@@ -38,10 +42,10 @@ struct gpu_info* get_gpu_info() {
|
|||||||
cudaDeviceProp deviceProp;
|
cudaDeviceProp deviceProp;
|
||||||
cudaGetDeviceProperties(&deviceProp, dev);
|
cudaGetDeviceProperties(&deviceProp, dev);
|
||||||
|
|
||||||
|
gpu->freq = deviceProp.clockRate * 1e-3f;
|
||||||
gpu->vendor = GPU_VENDOR_NVIDIA;
|
gpu->vendor = GPU_VENDOR_NVIDIA;
|
||||||
gpu->name = (char *) emalloc(sizeof(char) * (strlen(deviceProp.name) + 1));
|
gpu->name = (char *) emalloc(sizeof(char) * (strlen(deviceProp.name) + 1));
|
||||||
strcpy(gpu->name, deviceProp.name);
|
strcpy(gpu->name, deviceProp.name);
|
||||||
gpu->freq = 10000;
|
|
||||||
|
|
||||||
gpu->nvmld = nvml_init();
|
gpu->nvmld = nvml_init();
|
||||||
if(nvml_get_pci_info(dev, gpu->nvmld)) {
|
if(nvml_get_pci_info(dev, gpu->nvmld)) {
|
||||||
@@ -50,21 +54,21 @@ struct gpu_info* get_gpu_info() {
|
|||||||
|
|
||||||
gpu->arch = get_uarch_from_cuda(gpu);
|
gpu->arch = get_uarch_from_cuda(gpu);
|
||||||
gpu->cach = get_cache_info(gpu);
|
gpu->cach = get_cache_info(gpu);
|
||||||
gpu->topo = get_topology_info(gpu);
|
gpu->topo = get_topology_info(gpu, deviceProp);
|
||||||
gpu->peak_performance = get_peak_performance(gpu);
|
gpu->peak_performance = get_peak_performance(gpu);
|
||||||
|
|
||||||
return gpu;
|
return gpu;
|
||||||
}
|
}
|
||||||
|
|
||||||
char* get_str_sm(struct gpu_info* gpu) {
|
int32_t get_str_sm(struct gpu_info* gpu) {
|
||||||
return NULL;
|
return gpu->topo->streaming_mp;
|
||||||
}
|
}
|
||||||
|
|
||||||
char* get_str_cores_sm(struct gpu_info* gpu) {
|
int32_t get_str_cores_sm(struct gpu_info* gpu) {
|
||||||
return NULL;
|
return gpu->topo->cores_per_mp;
|
||||||
}
|
}
|
||||||
|
|
||||||
char* get_str_cuda_cores(struct gpu_info* gpu) {
|
int32_t get_str_cuda_cores(struct gpu_info* gpu) {
|
||||||
return NULL;
|
return gpu->topo->cuda_cores;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -4,8 +4,8 @@
|
|||||||
#include "../common/gpu.hpp"
|
#include "../common/gpu.hpp"
|
||||||
|
|
||||||
struct gpu_info* get_gpu_info();
|
struct gpu_info* get_gpu_info();
|
||||||
char* get_str_sm(struct gpu_info* gpu);
|
int32_t get_str_sm(struct gpu_info* gpu);
|
||||||
char* get_str_cores_sm(struct gpu_info* gpu);
|
int32_t get_str_cores_sm(struct gpu_info* gpu);
|
||||||
char* get_str_cuda_cores(struct gpu_info* gpu);
|
int32_t get_str_cuda_cores(struct gpu_info* gpu);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -37,7 +37,7 @@ bool nvml_get_pci_info(int dev, struct nvml_data* data) {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((result = nvmlDeviceGetPciInfo(device, &data->pci)) != result) {
|
if ((result = nvmlDeviceGetPciInfo(device, &data->pci)) != NVML_SUCCESS) {
|
||||||
printErr("nvmlDeviceGetPciInfo: %s\n", nvmlErrorString(result));
|
printErr("nvmlDeviceGetPciInfo: %s\n", nvmlErrorString(result));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user