diff --git a/src/cuda/cuda.cpp b/src/cuda/cuda.cpp index c2bbbcc..06e44d6 100644 --- a/src/cuda/cuda.cpp +++ b/src/cuda/cuda.cpp @@ -29,7 +29,7 @@ struct topology* get_topology_info(cudaDeviceProp prop) { int32_t guess_clock_multipilier(struct gpu_info* gpu, struct memory* mem) { // Guess clock multiplier - int32_t clk_mul = -1; + int32_t clk_mul = 1; int32_t clk8 = abs((mem->freq/8) - gpu->freq); int32_t clk4 = abs((mem->freq/4) - gpu->freq); @@ -37,10 +37,10 @@ int32_t guess_clock_multipilier(struct gpu_info* gpu, struct memory* mem) { int32_t clk1 = abs((mem->freq/1) - gpu->freq); int32_t min = mem->freq; - if(min > clk8) { clk_mul = 8; min = clk8; } - if(min > clk4) { clk_mul = 4; min = clk4; } - if(min > clk2) { clk_mul = 2; min = clk2; } - if(min > clk1) { clk_mul = 1; min = clk1; } + if(clkm_possible_for_uarch(8, gpu->arch) && min > clk8) { clk_mul = 8; min = clk8; } + if(clkm_possible_for_uarch(4, gpu->arch) && min > clk4) { clk_mul = 4; min = clk4; } + if(clkm_possible_for_uarch(2, gpu->arch) && min > clk2) { clk_mul = 2; min = clk2; } + if(clkm_possible_for_uarch(1, gpu->arch) && min > clk1) { clk_mul = 1; min = clk1; } return clk_mul; } diff --git a/src/cuda/uarch.cpp b/src/cuda/uarch.cpp index dc99571..6a9f144 100644 --- a/src/cuda/uarch.cpp +++ b/src/cuda/uarch.cpp @@ -275,6 +275,20 @@ struct uarch* get_uarch_from_cuda(struct gpu_info* gpu) { else if (arch->uarch == arch_ && (clkm_ == CM_ANY || clkm == clkm_)) return memtype; #define CHECK_MEMTYPE_END else { printBug("guess_memtype_from_cmul_and_uarch: Found invalid combination: clkm=%d, uarch=%d", clkm, arch->uarch); return MEMTYPE_UNKNOWN; } +bool clkm_possible_for_uarch(int clkm, struct uarch* arch) { + switch(arch->uarch) { + case UARCH_TESLA: return false; + case UARCH_FERMI: return clkm == 1 || clkm == 2; + case UARCH_KEPLER: return clkm == 1 || clkm == 2; + case UARCH_MAXWELL: return clkm == 1 || clkm == 2; + case UARCH_PASCAL: return clkm == 1 || clkm == 2 || clkm == 4; + case UARCH_VOLTA: return clkm == 1; + case UARCH_TURING: return clkm == 2 || clkm == 4; + case UARCH_AMPERE: return clkm == 1 || clkm == 4 || clkm == 8; + } + return false; +} + MEMTYPE guess_memtype_from_cmul_and_uarch(int clkm, struct uarch* arch) { /* * +---------+------------------+ diff --git a/src/cuda/uarch.hpp b/src/cuda/uarch.hpp index ad0446e..86355fe 100644 --- a/src/cuda/uarch.hpp +++ b/src/cuda/uarch.hpp @@ -6,6 +6,7 @@ struct uarch; struct uarch* get_uarch_from_cuda(struct gpu_info* gpu); +bool clkm_possible_for_uarch(int clkm, struct uarch* arch); MEMTYPE guess_memtype_from_cmul_and_uarch(int ddr, struct uarch* arch); char* get_str_uarch(struct uarch* arch); char* get_str_cc(struct uarch* arch);