369 lines
18 KiB
C++
369 lines
18 KiB
C++
#include <cuda_runtime.h>
|
|
#include <cstdlib>
|
|
#include <cstdint>
|
|
#include <cstddef>
|
|
#include <cstdio>
|
|
#include <cstring>
|
|
|
|
#include "../common/uarch.hpp"
|
|
#include "../common/global.hpp"
|
|
#include "../common/gpu.hpp"
|
|
#include "chips.hpp"
|
|
|
|
// Any clock multiplier
|
|
#define CM_ANY -1
|
|
|
|
// MICROARCH values
|
|
enum {
|
|
UARCH_UNKNOWN,
|
|
UARCH_TESLA,
|
|
UARCH_FERMI,
|
|
UARCH_KEPLER,
|
|
UARCH_MAXWELL,
|
|
UARCH_PASCAL,
|
|
UARCH_VOLTA,
|
|
UARCH_TURING,
|
|
UARCH_AMPERE,
|
|
UARCH_ADA,
|
|
UARCH_HOPPER
|
|
};
|
|
|
|
static const char *uarch_str[] = {
|
|
/*[ARCH_UNKNOWN = */ STRING_UNKNOWN,
|
|
/*[ARCH_TESLA] = */ "Tesla",
|
|
/*[ARCH_FERMI] = */ "Fermi",
|
|
/*[ARCH_KEPLER] = */ "Kepler",
|
|
/*[ARCH_MAXWELL] = */ "Maxwell",
|
|
/*[ARCH_PASCAL] = */ "Pascal",
|
|
/*[ARCH_VOLTA] = */ "Volta",
|
|
/*[ARCH_TURING] = */ "Turing",
|
|
/*[ARCH_AMPERE] = */ "Ampere",
|
|
/*[ARCH_ADA] = */ "Ada Lovelace",
|
|
/*[ARCH_HOPPER] = */ "Hopper"
|
|
};
|
|
|
|
#define CHECK_UARCH_START if (false) {}
|
|
#define CHECK_UARCH(arch, chip_, str, uarch, process) \
|
|
else if (arch->chip == chip_) fill_uarch(arch, str, uarch, process);
|
|
#define CHECK_UARCH_END else { if(arch->chip != CHIP_UNKNOWN_CUDA) printBug("map_chip_to_uarch_cuda: Unknown chip id: %d", arch->chip); fill_uarch(arch, STRING_UNKNOWN, UARCH_UNKNOWN, UNK); }
|
|
|
|
void fill_uarch(struct uarch* arch, char const *str, MICROARCH u, uint32_t process) {
|
|
arch->chip_str = (char *) emalloc(sizeof(char) * (strlen(str)+1));
|
|
strcpy(arch->chip_str, str);
|
|
arch->uarch = u;
|
|
arch->process = process;
|
|
}
|
|
|
|
/*
|
|
* - manufacturing process values were retrieved from techpowerup
|
|
* - chip code names details:
|
|
* o CHIP_XXXGL: indicates a professional-class (Quadro/Tesla) chip
|
|
* o CHIP_XXXM: indicates a mobile chip
|
|
*/
|
|
void map_chip_to_uarch_cuda(struct uarch* arch) {
|
|
CHECK_UARCH_START
|
|
// TESLA (1.0, 1.1, 1.2, 1.3) //
|
|
CHECK_UARCH(arch, CHIP_G80, "G80", UARCH_TESLA, 90)
|
|
CHECK_UARCH(arch, CHIP_G80GL, "G80", UARCH_TESLA, 90)
|
|
CHECK_UARCH(arch, CHIP_G84, "G84", UARCH_TESLA, 80)
|
|
CHECK_UARCH(arch, CHIP_G84GL, "G84", UARCH_TESLA, 80)
|
|
CHECK_UARCH(arch, CHIP_G84GLM, "G84", UARCH_TESLA, 80)
|
|
CHECK_UARCH(arch, CHIP_G84M, "G84", UARCH_TESLA, 80)
|
|
CHECK_UARCH(arch, CHIP_G86, "G86", UARCH_TESLA, 80)
|
|
CHECK_UARCH(arch, CHIP_G86GLM, "G86", UARCH_TESLA, 80)
|
|
CHECK_UARCH(arch, CHIP_G86M, "G86", UARCH_TESLA, 80)
|
|
CHECK_UARCH(arch, CHIP_G92, "G92", UARCH_TESLA, 65)
|
|
CHECK_UARCH(arch, CHIP_G92GL, "G92", UARCH_TESLA, 65)
|
|
CHECK_UARCH(arch, CHIP_G92GLM, "G92", UARCH_TESLA, 65)
|
|
CHECK_UARCH(arch, CHIP_G92M, "G92", UARCH_TESLA, 65)
|
|
CHECK_UARCH(arch, CHIP_G94, "G94", UARCH_TESLA, 65)
|
|
CHECK_UARCH(arch, CHIP_G94GL, "G94", UARCH_TESLA, 65)
|
|
CHECK_UARCH(arch, CHIP_G94GLM, "G94", UARCH_TESLA, 65)
|
|
CHECK_UARCH(arch, CHIP_G94M, "G94", UARCH_TESLA, 65)
|
|
CHECK_UARCH(arch, CHIP_G96, "G96", UARCH_TESLA, 65)
|
|
CHECK_UARCH(arch, CHIP_G96C, "G96", UARCH_TESLA, 55)
|
|
CHECK_UARCH(arch, CHIP_G96CGL, "G96", UARCH_TESLA, 55)
|
|
CHECK_UARCH(arch, CHIP_G96CM, "G96", UARCH_TESLA, 55)
|
|
CHECK_UARCH(arch, CHIP_G96GL, "G96", UARCH_TESLA, 65)
|
|
CHECK_UARCH(arch, CHIP_G96GLM, "G96", UARCH_TESLA, 65)
|
|
CHECK_UARCH(arch, CHIP_G96M, "G96", UARCH_TESLA, 65)
|
|
CHECK_UARCH(arch, CHIP_G98, "G98", UARCH_TESLA, 65)
|
|
CHECK_UARCH(arch, CHIP_G98GL, "G98", UARCH_TESLA, 65)
|
|
CHECK_UARCH(arch, CHIP_G98GLM, "G98", UARCH_TESLA, 65)
|
|
CHECK_UARCH(arch, CHIP_G98M, "G98", UARCH_TESLA, 65)
|
|
CHECK_UARCH(arch, CHIP_GT200, "GT200", UARCH_TESLA, 65)
|
|
CHECK_UARCH(arch, CHIP_GT200B, "GT200", UARCH_TESLA, 55)
|
|
CHECK_UARCH(arch, CHIP_GT200GL, "GT200", UARCH_TESLA, 55)
|
|
CHECK_UARCH(arch, CHIP_GT215, "GT215", UARCH_TESLA, 40)
|
|
CHECK_UARCH(arch, CHIP_GT215GLM, "GT215", UARCH_TESLA, 40)
|
|
CHECK_UARCH(arch, CHIP_GT215M, "GT215", UARCH_TESLA, 40)
|
|
CHECK_UARCH(arch, CHIP_GT216, "GT216", UARCH_TESLA, 40)
|
|
CHECK_UARCH(arch, CHIP_GT216GL, "GT216", UARCH_TESLA, 40)
|
|
CHECK_UARCH(arch, CHIP_GT216GLM, "GT216", UARCH_TESLA, 40)
|
|
CHECK_UARCH(arch, CHIP_GT216M, "GT216", UARCH_TESLA, 40)
|
|
CHECK_UARCH(arch, CHIP_GT218, "GT218", UARCH_TESLA, 40)
|
|
CHECK_UARCH(arch, CHIP_GT218GL, "GT218", UARCH_TESLA, 40)
|
|
CHECK_UARCH(arch, CHIP_GT218GLM, "GT218", UARCH_TESLA, 40)
|
|
CHECK_UARCH(arch, CHIP_GT218M, "GT218", UARCH_TESLA, 40)
|
|
// FERMI (2.0, 2.1) //
|
|
CHECK_UARCH(arch, CHIP_GF100, "GF100", UARCH_FERMI, 40)
|
|
CHECK_UARCH(arch, CHIP_GF100GL, "GF100", UARCH_FERMI, 40)
|
|
CHECK_UARCH(arch, CHIP_GF100GLM, "GF100", UARCH_FERMI, 40)
|
|
CHECK_UARCH(arch, CHIP_GF100M, "GF100", UARCH_FERMI, 40)
|
|
CHECK_UARCH(arch, CHIP_GF104, "GF104", UARCH_FERMI, 40)
|
|
CHECK_UARCH(arch, CHIP_GF104GLM, "GF104", UARCH_FERMI, 40)
|
|
CHECK_UARCH(arch, CHIP_GF104M, "GF104", UARCH_FERMI, 40)
|
|
CHECK_UARCH(arch, CHIP_GF106, "GF106", UARCH_FERMI, 40)
|
|
CHECK_UARCH(arch, CHIP_GF106GL, "GF106", UARCH_FERMI, 40)
|
|
CHECK_UARCH(arch, CHIP_GF106GLM, "GF106", UARCH_FERMI, 40)
|
|
CHECK_UARCH(arch, CHIP_GF106M, "GF106", UARCH_FERMI, 40)
|
|
CHECK_UARCH(arch, CHIP_GF108, "GF108", UARCH_FERMI, 40)
|
|
CHECK_UARCH(arch, CHIP_GF108GL, "GF108", UARCH_FERMI, 40)
|
|
CHECK_UARCH(arch, CHIP_GF108GLM, "GF108", UARCH_FERMI, 40)
|
|
CHECK_UARCH(arch, CHIP_GF108M, "GF108", UARCH_FERMI, 40)
|
|
CHECK_UARCH(arch, CHIP_GF110, "GF110", UARCH_FERMI, 40)
|
|
CHECK_UARCH(arch, CHIP_GF110GL, "GF110", UARCH_FERMI, 40)
|
|
CHECK_UARCH(arch, CHIP_GF114, "GF114", UARCH_FERMI, 40)
|
|
CHECK_UARCH(arch, CHIP_GF114M, "GF114", UARCH_FERMI, 40)
|
|
CHECK_UARCH(arch, CHIP_GF116, "GF116", UARCH_FERMI, 40)
|
|
CHECK_UARCH(arch, CHIP_GF116M, "GF116", UARCH_FERMI, 40)
|
|
CHECK_UARCH(arch, CHIP_GF117M, "GF117", UARCH_FERMI, 28)
|
|
CHECK_UARCH(arch, CHIP_GF119, "GF119", UARCH_FERMI, 40)
|
|
CHECK_UARCH(arch, CHIP_GF119M, "GF119", UARCH_FERMI, 40)
|
|
// KEPLER (3.0, 3.2, 3.5, 3.7 //
|
|
CHECK_UARCH(arch, CHIP_GK104, "GK104", UARCH_KEPLER, 28)
|
|
CHECK_UARCH(arch, CHIP_GK104GL, "GK104", UARCH_KEPLER, 28)
|
|
CHECK_UARCH(arch, CHIP_GK104GLM, "GK104", UARCH_KEPLER, 28)
|
|
CHECK_UARCH(arch, CHIP_GK104M, "GK104", UARCH_KEPLER, 28)
|
|
CHECK_UARCH(arch, CHIP_GK106, "GK106", UARCH_KEPLER, 28)
|
|
CHECK_UARCH(arch, CHIP_GK106GL, "GK106", UARCH_KEPLER, 28)
|
|
CHECK_UARCH(arch, CHIP_GK106GLM, "GK106", UARCH_KEPLER, 28)
|
|
CHECK_UARCH(arch, CHIP_GK106M, "GK106", UARCH_KEPLER, 28)
|
|
CHECK_UARCH(arch, CHIP_GK107, "GK107", UARCH_KEPLER, 28)
|
|
CHECK_UARCH(arch, CHIP_GK107GL, "GK107", UARCH_KEPLER, 28)
|
|
CHECK_UARCH(arch, CHIP_GK107GLM, "GK107", UARCH_KEPLER, 28)
|
|
CHECK_UARCH(arch, CHIP_GK107M, "GK107", UARCH_KEPLER, 28)
|
|
CHECK_UARCH(arch, CHIP_GK110, "GK110", UARCH_KEPLER, 28)
|
|
CHECK_UARCH(arch, CHIP_GK110B, "GK110", UARCH_KEPLER, 28)
|
|
CHECK_UARCH(arch, CHIP_GK110BGL, "GK110", UARCH_KEPLER, 28)
|
|
CHECK_UARCH(arch, CHIP_GK110GL, "GK110", UARCH_KEPLER, 28)
|
|
CHECK_UARCH(arch, CHIP_GK208, "GK208", UARCH_KEPLER, 28)
|
|
CHECK_UARCH(arch, CHIP_GK208B, "GK208", UARCH_KEPLER, 28)
|
|
CHECK_UARCH(arch, CHIP_GK208BM, "GK208", UARCH_KEPLER, 28)
|
|
CHECK_UARCH(arch, CHIP_GK208GLM, "GK208", UARCH_KEPLER, 28)
|
|
CHECK_UARCH(arch, CHIP_GK208M, "GK208", UARCH_KEPLER, 28)
|
|
CHECK_UARCH(arch, CHIP_GK210GL, "GK210", UARCH_KEPLER, 28)
|
|
// MAXWELL (5.0, 5.2, 5.3) //
|
|
CHECK_UARCH(arch, CHIP_GM107, "GM107", UARCH_MAXWELL, 28)
|
|
CHECK_UARCH(arch, CHIP_GM107GL, "GM107", UARCH_MAXWELL, 28)
|
|
CHECK_UARCH(arch, CHIP_GM107GLM, "GM107", UARCH_MAXWELL, 28)
|
|
CHECK_UARCH(arch, CHIP_GM107M, "GM107", UARCH_MAXWELL, 28)
|
|
CHECK_UARCH(arch, CHIP_GM108GLM, "GM108", UARCH_MAXWELL, 28)
|
|
CHECK_UARCH(arch, CHIP_GM108M, "GM108", UARCH_MAXWELL, 28)
|
|
CHECK_UARCH(arch, CHIP_GM200, "GM200", UARCH_MAXWELL, 28)
|
|
CHECK_UARCH(arch, CHIP_GM200GL, "GM200", UARCH_MAXWELL, 28)
|
|
CHECK_UARCH(arch, CHIP_GM204, "GM204", UARCH_MAXWELL, 28)
|
|
CHECK_UARCH(arch, CHIP_GM204GL, "GM204", UARCH_MAXWELL, 28)
|
|
CHECK_UARCH(arch, CHIP_GM204GLM, "GM204", UARCH_MAXWELL, 28)
|
|
CHECK_UARCH(arch, CHIP_GM204M, "GM204", UARCH_MAXWELL, 28)
|
|
CHECK_UARCH(arch, CHIP_GM206, "GM206", UARCH_MAXWELL, 28)
|
|
CHECK_UARCH(arch, CHIP_GM206GL, "GM206", UARCH_MAXWELL, 28)
|
|
CHECK_UARCH(arch, CHIP_GM206GLM, "GM206", UARCH_MAXWELL, 28)
|
|
CHECK_UARCH(arch, CHIP_GM206M, "GM206", UARCH_MAXWELL, 28)
|
|
// PASCAL (6.0, 6.1, 6.2) //
|
|
CHECK_UARCH(arch, CHIP_GP100, "GP100", UARCH_PASCAL, 16)
|
|
CHECK_UARCH(arch, CHIP_GP100GL, "GP100", UARCH_PASCAL, 16)
|
|
CHECK_UARCH(arch, CHIP_GP102, "GP102", UARCH_PASCAL, 16)
|
|
CHECK_UARCH(arch, CHIP_GP102GL, "GP102", UARCH_PASCAL, 16)
|
|
CHECK_UARCH(arch, CHIP_GP104, "GP104", UARCH_PASCAL, 16)
|
|
CHECK_UARCH(arch, CHIP_GP104BM, "GP104", UARCH_PASCAL, 16)
|
|
CHECK_UARCH(arch, CHIP_GP104GL, "GP104", UARCH_PASCAL, 16)
|
|
CHECK_UARCH(arch, CHIP_GP104GLM, "GP104", UARCH_PASCAL, 16)
|
|
CHECK_UARCH(arch, CHIP_GP104M, "GP104", UARCH_PASCAL, 16)
|
|
CHECK_UARCH(arch, CHIP_GP106, "GP106", UARCH_PASCAL, 16)
|
|
CHECK_UARCH(arch, CHIP_GP106BM, "GP106", UARCH_PASCAL, 16)
|
|
CHECK_UARCH(arch, CHIP_GP106GL, "GP106", UARCH_PASCAL, 16)
|
|
CHECK_UARCH(arch, CHIP_GP106M, "GP106", UARCH_PASCAL, 16)
|
|
CHECK_UARCH(arch, CHIP_GP107, "GP107", UARCH_PASCAL, 14)
|
|
CHECK_UARCH(arch, CHIP_GP107BM, "GP107", UARCH_PASCAL, 14)
|
|
CHECK_UARCH(arch, CHIP_GP107GL, "GP107", UARCH_PASCAL, 14)
|
|
CHECK_UARCH(arch, CHIP_GP107GLM, "GP107", UARCH_PASCAL, 14)
|
|
CHECK_UARCH(arch, CHIP_GP107M, "GP107", UARCH_PASCAL, 14)
|
|
CHECK_UARCH(arch, CHIP_GP108, "GP108", UARCH_PASCAL, 14)
|
|
CHECK_UARCH(arch, CHIP_GP108BM, "GP108", UARCH_PASCAL, 14)
|
|
CHECK_UARCH(arch, CHIP_GP108GLM, "GP108", UARCH_PASCAL, 14)
|
|
CHECK_UARCH(arch, CHIP_GP108M, "GP108", UARCH_PASCAL, 14)
|
|
// VOLTA (7.0, 7.2) //
|
|
CHECK_UARCH(arch, CHIP_GV100, "GV100", UARCH_VOLTA, 12)
|
|
CHECK_UARCH(arch, CHIP_GV100GL, "GV100", UARCH_VOLTA, 12)
|
|
// TURING (7.5) //
|
|
CHECK_UARCH(arch, CHIP_TU102, "TU102", UARCH_TURING, 12)
|
|
CHECK_UARCH(arch, CHIP_TU102GL, "TU102", UARCH_TURING, 12)
|
|
CHECK_UARCH(arch, CHIP_TU104, "TU104", UARCH_TURING, 12)
|
|
CHECK_UARCH(arch, CHIP_TU104BM, "TU104", UARCH_TURING, 12)
|
|
CHECK_UARCH(arch, CHIP_TU104GL, "TU104", UARCH_TURING, 12)
|
|
CHECK_UARCH(arch, CHIP_TU104GLM, "TU104", UARCH_TURING, 12)
|
|
CHECK_UARCH(arch, CHIP_TU104M, "TU104", UARCH_TURING, 12)
|
|
CHECK_UARCH(arch, CHIP_TU106, "TU106", UARCH_TURING, 12)
|
|
CHECK_UARCH(arch, CHIP_TU106BM, "TU106", UARCH_TURING, 12)
|
|
CHECK_UARCH(arch, CHIP_TU106GLM, "TU106", UARCH_TURING, 12)
|
|
CHECK_UARCH(arch, CHIP_TU106M, "TU106", UARCH_TURING, 12)
|
|
CHECK_UARCH(arch, CHIP_TU116, "TU116", UARCH_TURING, 12)
|
|
CHECK_UARCH(arch, CHIP_TU116BM, "TU116", UARCH_TURING, 12)
|
|
CHECK_UARCH(arch, CHIP_TU116GL, "TU116", UARCH_TURING, 12)
|
|
CHECK_UARCH(arch, CHIP_TU116M, "TU116", UARCH_TURING, 12)
|
|
CHECK_UARCH(arch, CHIP_TU117, "TU117", UARCH_TURING, 12)
|
|
CHECK_UARCH(arch, CHIP_TU117BM, "TU117", UARCH_TURING, 12)
|
|
CHECK_UARCH(arch, CHIP_TU117GL, "TU117", UARCH_TURING, 12)
|
|
CHECK_UARCH(arch, CHIP_TU117GLM, "TU117", UARCH_TURING, 12)
|
|
CHECK_UARCH(arch, CHIP_TU117M, "TU117", UARCH_TURING, 12)
|
|
// AMPERE (8.0, 8.6) //
|
|
CHECK_UARCH(arch, CHIP_GA100, "GA100", UARCH_AMPERE, 7)
|
|
CHECK_UARCH(arch, CHIP_GA100GL, "GA100", UARCH_AMPERE, 7)
|
|
CHECK_UARCH(arch, CHIP_GA102, "GA102", UARCH_AMPERE, 8)
|
|
CHECK_UARCH(arch, CHIP_GA102GL, "GA102", UARCH_AMPERE, 8)
|
|
CHECK_UARCH(arch, CHIP_GA103, "GA103", UARCH_AMPERE, 8)
|
|
CHECK_UARCH(arch, CHIP_GA103GLM, "GA103", UARCH_AMPERE, 8)
|
|
CHECK_UARCH(arch, CHIP_GA103M, "GA103", UARCH_AMPERE, 8)
|
|
CHECK_UARCH(arch, CHIP_GA104, "GA104", UARCH_AMPERE, 8)
|
|
CHECK_UARCH(arch, CHIP_GA104GL, "GA104", UARCH_AMPERE, 8)
|
|
CHECK_UARCH(arch, CHIP_GA104GLM, "GA104", UARCH_AMPERE, 8)
|
|
CHECK_UARCH(arch, CHIP_GA104M, "GA104", UARCH_AMPERE, 8)
|
|
CHECK_UARCH(arch, CHIP_GA106, "GA106", UARCH_AMPERE, 8)
|
|
CHECK_UARCH(arch, CHIP_GA106M, "GA106", UARCH_AMPERE, 8)
|
|
CHECK_UARCH(arch, CHIP_GA107, "GA107", UARCH_AMPERE, 8)
|
|
CHECK_UARCH(arch, CHIP_GA107BM, "GA107", UARCH_AMPERE, 8)
|
|
CHECK_UARCH(arch, CHIP_GA107GLM, "GA107", UARCH_AMPERE, 8)
|
|
CHECK_UARCH(arch, CHIP_GA107M, "GA107", UARCH_AMPERE, 8)
|
|
// ADA LOVELACE (8.9)
|
|
CHECK_UARCH(arch, CHIP_AD102, "AD102", UARCH_ADA, 4)
|
|
CHECK_UARCH(arch, CHIP_AD102GL, "AD102", UARCH_ADA, 4)
|
|
CHECK_UARCH(arch, CHIP_AD104, "AD104", UARCH_ADA, 4)
|
|
CHECK_UARCH(arch, CHIP_AD104GL, "AD104", UARCH_ADA, 4)
|
|
// HOPPER (9.0)
|
|
CHECK_UARCH(arch, CHIP_GH100, "GH100", UARCH_HOPPER, 4)
|
|
CHECK_UARCH_END
|
|
}
|
|
|
|
struct uarch* get_uarch_from_cuda(struct gpu_info* gpu) {
|
|
struct uarch* arch = (struct uarch*) emalloc(sizeof(struct uarch));
|
|
|
|
cudaError_t err = cudaSuccess;
|
|
cudaDeviceProp deviceProp;
|
|
if ((err = cudaGetDeviceProperties(&deviceProp, gpu->idx)) != cudaSuccess) {
|
|
printErr("%s: %s", cudaGetErrorName(err), cudaGetErrorString(err));
|
|
return NULL;
|
|
}
|
|
|
|
arch->chip_str = NULL;
|
|
arch->cc_major = deviceProp.major;
|
|
arch->cc_minor = deviceProp.minor;
|
|
arch->compute_capability = deviceProp.major * 10 + deviceProp.minor;
|
|
arch->chip = get_chip_from_pci_cuda(gpu->pci);
|
|
map_chip_to_uarch_cuda(arch);
|
|
|
|
return arch;
|
|
}
|
|
|
|
#define CHECK_MEMTYPE_START if (false) {}
|
|
#define CHECK_MEMTYPE(arch, clkm, arch_, clkm_, memtype) \
|
|
else if (arch->uarch == arch_ && (clkm_ == CM_ANY || clkm == clkm_)) return memtype;
|
|
#define CHECK_MEMTYPE_END else { printBug("guess_memtype_from_cmul_and_uarch: Found invalid combination: clkm=%d, uarch=%d", clkm, arch->uarch); return MEMTYPE_UNKNOWN; }
|
|
|
|
bool clkm_possible_for_uarch(int clkm, struct uarch* arch) {
|
|
switch(arch->uarch) {
|
|
case UARCH_TESLA: return false;
|
|
case UARCH_FERMI: return clkm == 1 || clkm == 2;
|
|
case UARCH_KEPLER: return clkm == 1 || clkm == 2;
|
|
case UARCH_MAXWELL: return clkm == 1 || clkm == 2;
|
|
case UARCH_PASCAL: return clkm == 1 || clkm == 2 || clkm == 4;
|
|
case UARCH_VOLTA: return clkm == 1;
|
|
case UARCH_TURING: return clkm == 2 || clkm == 4;
|
|
case UARCH_AMPERE: return clkm == 1 || clkm == 4 || clkm == 8;
|
|
case UARCH_ADA: return clkm == 8;
|
|
case UARCH_HOPPER: return clkm == 1;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
MEMTYPE guess_memtype_from_cmul_and_uarch(int clkm, struct uarch* arch) {
|
|
if(arch->uarch == UARCH_UNKNOWN) {
|
|
printWarn("guess_memtype_from_cmul_and_uarch: Found unknown uarch");
|
|
return MEMTYPE_UNKNOWN;
|
|
}
|
|
/*
|
|
* +---------+------------------+
|
|
* | MEMTYPE | Clock multiplier |
|
|
* +---------+------------------+
|
|
* | DDR3 | 1 |
|
|
* | DDR4 | 1 |
|
|
* | GDDR5 | 2 |
|
|
* | GDDR5X | 4 |
|
|
* | GDDR6 | 4 |
|
|
* | GDDR6X | 8 |
|
|
* | HBM | 1 |
|
|
* | HBM2 | 1 |
|
|
* +---------+------------------+
|
|
*
|
|
* archs in parenthesis are not included in this rules
|
|
* and will be detected wrongly
|
|
*/
|
|
CHECK_MEMTYPE_START
|
|
// TESLA
|
|
CHECK_MEMTYPE(arch, clkm, UARCH_TESLA, CM_ANY, MEMTYPE_UNKNOWN)
|
|
// FERMI
|
|
CHECK_MEMTYPE(arch, clkm, UARCH_FERMI, 1, MEMTYPE_DDR3)
|
|
CHECK_MEMTYPE(arch, clkm, UARCH_FERMI, 2, MEMTYPE_GDDR5)
|
|
// KEPLER (jetson)
|
|
CHECK_MEMTYPE(arch, clkm, UARCH_KEPLER, 1, MEMTYPE_DDR3)
|
|
CHECK_MEMTYPE(arch, clkm, UARCH_KEPLER, 2, MEMTYPE_GDDR5)
|
|
// MAXWELL (switch, jetson)
|
|
CHECK_MEMTYPE(arch, clkm, UARCH_MAXWELL, 1, MEMTYPE_DDR3)
|
|
CHECK_MEMTYPE(arch, clkm, UARCH_MAXWELL, 2, MEMTYPE_GDDR5)
|
|
// PASCAL
|
|
CHECK_MEMTYPE(arch, clkm, UARCH_PASCAL, 1, MEMTYPE_DDR4)
|
|
CHECK_MEMTYPE(arch, clkm, UARCH_PASCAL, 2, MEMTYPE_GDDR5)
|
|
CHECK_MEMTYPE(arch, clkm, UARCH_PASCAL, 4, MEMTYPE_GDDR5X)
|
|
// VOLTA (jetson)
|
|
CHECK_MEMTYPE(arch, clkm, UARCH_VOLTA, CM_ANY, MEMTYPE_HBM2)
|
|
// TURING
|
|
CHECK_MEMTYPE(arch, clkm, UARCH_TURING, 2, MEMTYPE_GDDR5)
|
|
CHECK_MEMTYPE(arch, clkm, UARCH_TURING, 4, MEMTYPE_GDDR6)
|
|
// AMPERE
|
|
CHECK_MEMTYPE(arch, clkm, UARCH_AMPERE, 1, MEMTYPE_HBM2)
|
|
CHECK_MEMTYPE(arch, clkm, UARCH_AMPERE, 4, MEMTYPE_GDDR6)
|
|
CHECK_MEMTYPE(arch, clkm, UARCH_AMPERE, 8, MEMTYPE_GDDR6X)
|
|
// ADA
|
|
CHECK_MEMTYPE(arch, clkm, UARCH_ADA, 8, MEMTYPE_GDDR6X)
|
|
// HOPPER
|
|
CHECK_MEMTYPE(arch, clkm, UARCH_HOPPER, 1, MEMTYPE_HBM2)
|
|
CHECK_MEMTYPE_END
|
|
}
|
|
|
|
char* get_str_cc(struct uarch* arch) {
|
|
uint32_t max_size = 4;
|
|
char* cc = (char *) ecalloc(max_size, sizeof(char));
|
|
snprintf(cc, max_size, "%d.%d", arch->cc_major, arch->cc_minor);
|
|
return cc;
|
|
}
|
|
|
|
char* get_str_chip(struct uarch* arch) {
|
|
return arch->chip_str;
|
|
}
|
|
|
|
// TODO: What about _ConvertSMVer2ArchName?
|
|
const char* get_str_uarch_cuda(struct uarch* arch) {
|
|
return uarch_str[arch->uarch];
|
|
}
|
|
|
|
void free_uarch_struct(struct uarch* arch) {
|
|
free(arch->uarch_str);
|
|
free(arch->chip_str);
|
|
free(arch);
|
|
}
|
|
|
|
bool is_chip_TU116(struct uarch* arch) {
|
|
return arch->chip == CHIP_TU116 || arch->chip == CHIP_TU116BM ||
|
|
arch->chip == CHIP_TU116GL || arch->chip == CHIP_TU116M;
|
|
}
|