Files
gpufetch/src/cuda/uarch.cpp
Dr-Noob 0950b97393 [v0.30] Build pciutils only if neccesary
If only HSA is enabled we dont need pciutils since AMD detection does
not rely on it. Therefore we change CMakeLists.txt to build pciutils
only if required.

This commit has some side-effects:
1. We now don't build Intel backend by default. In other words, no
   backend is built by default, the user must specify which backend
   to use.
2. There were some issues with includes and wrongly used defines and
   variables. This commit fixes all that.
2025-10-16 08:26:42 +02:00

370 lines
18 KiB
C++

#include <cuda_runtime.h>
#include <cstdlib>
#include <cstdint>
#include <cstddef>
#include <cstdio>
#include <cstring>
#include "../common/uarch.hpp"
#include "../common/global.hpp"
#include "../common/gpu.hpp"
#include "pci.hpp"
#include "chips.hpp"
// Any clock multiplier
#define CM_ANY -1
// MICROARCH values
enum {
UARCH_UNKNOWN,
UARCH_TESLA,
UARCH_FERMI,
UARCH_KEPLER,
UARCH_MAXWELL,
UARCH_PASCAL,
UARCH_VOLTA,
UARCH_TURING,
UARCH_AMPERE,
UARCH_ADA,
UARCH_HOPPER
};
static const char *uarch_str[] = {
/*[ARCH_UNKNOWN = */ STRING_UNKNOWN,
/*[ARCH_TESLA] = */ "Tesla",
/*[ARCH_FERMI] = */ "Fermi",
/*[ARCH_KEPLER] = */ "Kepler",
/*[ARCH_MAXWELL] = */ "Maxwell",
/*[ARCH_PASCAL] = */ "Pascal",
/*[ARCH_VOLTA] = */ "Volta",
/*[ARCH_TURING] = */ "Turing",
/*[ARCH_AMPERE] = */ "Ampere",
/*[ARCH_ADA] = */ "Ada Lovelace",
/*[ARCH_HOPPER] = */ "Hopper"
};
#define CHECK_UARCH_START if (false) {}
#define CHECK_UARCH(arch, chip_, str, uarch, process) \
else if (arch->chip == chip_) fill_uarch(arch, str, uarch, process);
#define CHECK_UARCH_END else { if(arch->chip != CHIP_UNKNOWN_CUDA) printBug("map_chip_to_uarch_cuda: Unknown chip id: %d", arch->chip); fill_uarch(arch, STRING_UNKNOWN, UARCH_UNKNOWN, UNK); }
void fill_uarch(struct uarch* arch, char const *str, MICROARCH u, uint32_t process) {
arch->chip_str = (char *) emalloc(sizeof(char) * (strlen(str)+1));
strcpy(arch->chip_str, str);
arch->uarch = u;
arch->process = process;
}
/*
* - manufacturing process values were retrieved from techpowerup
* - chip code names details:
* o CHIP_XXXGL: indicates a professional-class (Quadro/Tesla) chip
* o CHIP_XXXM: indicates a mobile chip
*/
void map_chip_to_uarch_cuda(struct uarch* arch) {
CHECK_UARCH_START
// TESLA (1.0, 1.1, 1.2, 1.3) //
CHECK_UARCH(arch, CHIP_G80, "G80", UARCH_TESLA, 90)
CHECK_UARCH(arch, CHIP_G80GL, "G80", UARCH_TESLA, 90)
CHECK_UARCH(arch, CHIP_G84, "G84", UARCH_TESLA, 80)
CHECK_UARCH(arch, CHIP_G84GL, "G84", UARCH_TESLA, 80)
CHECK_UARCH(arch, CHIP_G84GLM, "G84", UARCH_TESLA, 80)
CHECK_UARCH(arch, CHIP_G84M, "G84", UARCH_TESLA, 80)
CHECK_UARCH(arch, CHIP_G86, "G86", UARCH_TESLA, 80)
CHECK_UARCH(arch, CHIP_G86GLM, "G86", UARCH_TESLA, 80)
CHECK_UARCH(arch, CHIP_G86M, "G86", UARCH_TESLA, 80)
CHECK_UARCH(arch, CHIP_G92, "G92", UARCH_TESLA, 65)
CHECK_UARCH(arch, CHIP_G92GL, "G92", UARCH_TESLA, 65)
CHECK_UARCH(arch, CHIP_G92GLM, "G92", UARCH_TESLA, 65)
CHECK_UARCH(arch, CHIP_G92M, "G92", UARCH_TESLA, 65)
CHECK_UARCH(arch, CHIP_G94, "G94", UARCH_TESLA, 65)
CHECK_UARCH(arch, CHIP_G94GL, "G94", UARCH_TESLA, 65)
CHECK_UARCH(arch, CHIP_G94GLM, "G94", UARCH_TESLA, 65)
CHECK_UARCH(arch, CHIP_G94M, "G94", UARCH_TESLA, 65)
CHECK_UARCH(arch, CHIP_G96, "G96", UARCH_TESLA, 65)
CHECK_UARCH(arch, CHIP_G96C, "G96", UARCH_TESLA, 55)
CHECK_UARCH(arch, CHIP_G96CGL, "G96", UARCH_TESLA, 55)
CHECK_UARCH(arch, CHIP_G96CM, "G96", UARCH_TESLA, 55)
CHECK_UARCH(arch, CHIP_G96GL, "G96", UARCH_TESLA, 65)
CHECK_UARCH(arch, CHIP_G96GLM, "G96", UARCH_TESLA, 65)
CHECK_UARCH(arch, CHIP_G96M, "G96", UARCH_TESLA, 65)
CHECK_UARCH(arch, CHIP_G98, "G98", UARCH_TESLA, 65)
CHECK_UARCH(arch, CHIP_G98GL, "G98", UARCH_TESLA, 65)
CHECK_UARCH(arch, CHIP_G98GLM, "G98", UARCH_TESLA, 65)
CHECK_UARCH(arch, CHIP_G98M, "G98", UARCH_TESLA, 65)
CHECK_UARCH(arch, CHIP_GT200, "GT200", UARCH_TESLA, 65)
CHECK_UARCH(arch, CHIP_GT200B, "GT200", UARCH_TESLA, 55)
CHECK_UARCH(arch, CHIP_GT200GL, "GT200", UARCH_TESLA, 55)
CHECK_UARCH(arch, CHIP_GT215, "GT215", UARCH_TESLA, 40)
CHECK_UARCH(arch, CHIP_GT215GLM, "GT215", UARCH_TESLA, 40)
CHECK_UARCH(arch, CHIP_GT215M, "GT215", UARCH_TESLA, 40)
CHECK_UARCH(arch, CHIP_GT216, "GT216", UARCH_TESLA, 40)
CHECK_UARCH(arch, CHIP_GT216GL, "GT216", UARCH_TESLA, 40)
CHECK_UARCH(arch, CHIP_GT216GLM, "GT216", UARCH_TESLA, 40)
CHECK_UARCH(arch, CHIP_GT216M, "GT216", UARCH_TESLA, 40)
CHECK_UARCH(arch, CHIP_GT218, "GT218", UARCH_TESLA, 40)
CHECK_UARCH(arch, CHIP_GT218GL, "GT218", UARCH_TESLA, 40)
CHECK_UARCH(arch, CHIP_GT218GLM, "GT218", UARCH_TESLA, 40)
CHECK_UARCH(arch, CHIP_GT218M, "GT218", UARCH_TESLA, 40)
// FERMI (2.0, 2.1) //
CHECK_UARCH(arch, CHIP_GF100, "GF100", UARCH_FERMI, 40)
CHECK_UARCH(arch, CHIP_GF100GL, "GF100", UARCH_FERMI, 40)
CHECK_UARCH(arch, CHIP_GF100GLM, "GF100", UARCH_FERMI, 40)
CHECK_UARCH(arch, CHIP_GF100M, "GF100", UARCH_FERMI, 40)
CHECK_UARCH(arch, CHIP_GF104, "GF104", UARCH_FERMI, 40)
CHECK_UARCH(arch, CHIP_GF104GLM, "GF104", UARCH_FERMI, 40)
CHECK_UARCH(arch, CHIP_GF104M, "GF104", UARCH_FERMI, 40)
CHECK_UARCH(arch, CHIP_GF106, "GF106", UARCH_FERMI, 40)
CHECK_UARCH(arch, CHIP_GF106GL, "GF106", UARCH_FERMI, 40)
CHECK_UARCH(arch, CHIP_GF106GLM, "GF106", UARCH_FERMI, 40)
CHECK_UARCH(arch, CHIP_GF106M, "GF106", UARCH_FERMI, 40)
CHECK_UARCH(arch, CHIP_GF108, "GF108", UARCH_FERMI, 40)
CHECK_UARCH(arch, CHIP_GF108GL, "GF108", UARCH_FERMI, 40)
CHECK_UARCH(arch, CHIP_GF108GLM, "GF108", UARCH_FERMI, 40)
CHECK_UARCH(arch, CHIP_GF108M, "GF108", UARCH_FERMI, 40)
CHECK_UARCH(arch, CHIP_GF110, "GF110", UARCH_FERMI, 40)
CHECK_UARCH(arch, CHIP_GF110GL, "GF110", UARCH_FERMI, 40)
CHECK_UARCH(arch, CHIP_GF114, "GF114", UARCH_FERMI, 40)
CHECK_UARCH(arch, CHIP_GF114M, "GF114", UARCH_FERMI, 40)
CHECK_UARCH(arch, CHIP_GF116, "GF116", UARCH_FERMI, 40)
CHECK_UARCH(arch, CHIP_GF116M, "GF116", UARCH_FERMI, 40)
CHECK_UARCH(arch, CHIP_GF117M, "GF117", UARCH_FERMI, 28)
CHECK_UARCH(arch, CHIP_GF119, "GF119", UARCH_FERMI, 40)
CHECK_UARCH(arch, CHIP_GF119M, "GF119", UARCH_FERMI, 40)
// KEPLER (3.0, 3.2, 3.5, 3.7 //
CHECK_UARCH(arch, CHIP_GK104, "GK104", UARCH_KEPLER, 28)
CHECK_UARCH(arch, CHIP_GK104GL, "GK104", UARCH_KEPLER, 28)
CHECK_UARCH(arch, CHIP_GK104GLM, "GK104", UARCH_KEPLER, 28)
CHECK_UARCH(arch, CHIP_GK104M, "GK104", UARCH_KEPLER, 28)
CHECK_UARCH(arch, CHIP_GK106, "GK106", UARCH_KEPLER, 28)
CHECK_UARCH(arch, CHIP_GK106GL, "GK106", UARCH_KEPLER, 28)
CHECK_UARCH(arch, CHIP_GK106GLM, "GK106", UARCH_KEPLER, 28)
CHECK_UARCH(arch, CHIP_GK106M, "GK106", UARCH_KEPLER, 28)
CHECK_UARCH(arch, CHIP_GK107, "GK107", UARCH_KEPLER, 28)
CHECK_UARCH(arch, CHIP_GK107GL, "GK107", UARCH_KEPLER, 28)
CHECK_UARCH(arch, CHIP_GK107GLM, "GK107", UARCH_KEPLER, 28)
CHECK_UARCH(arch, CHIP_GK107M, "GK107", UARCH_KEPLER, 28)
CHECK_UARCH(arch, CHIP_GK110, "GK110", UARCH_KEPLER, 28)
CHECK_UARCH(arch, CHIP_GK110B, "GK110", UARCH_KEPLER, 28)
CHECK_UARCH(arch, CHIP_GK110BGL, "GK110", UARCH_KEPLER, 28)
CHECK_UARCH(arch, CHIP_GK110GL, "GK110", UARCH_KEPLER, 28)
CHECK_UARCH(arch, CHIP_GK208, "GK208", UARCH_KEPLER, 28)
CHECK_UARCH(arch, CHIP_GK208B, "GK208", UARCH_KEPLER, 28)
CHECK_UARCH(arch, CHIP_GK208BM, "GK208", UARCH_KEPLER, 28)
CHECK_UARCH(arch, CHIP_GK208GLM, "GK208", UARCH_KEPLER, 28)
CHECK_UARCH(arch, CHIP_GK208M, "GK208", UARCH_KEPLER, 28)
CHECK_UARCH(arch, CHIP_GK210GL, "GK210", UARCH_KEPLER, 28)
// MAXWELL (5.0, 5.2, 5.3) //
CHECK_UARCH(arch, CHIP_GM107, "GM107", UARCH_MAXWELL, 28)
CHECK_UARCH(arch, CHIP_GM107GL, "GM107", UARCH_MAXWELL, 28)
CHECK_UARCH(arch, CHIP_GM107GLM, "GM107", UARCH_MAXWELL, 28)
CHECK_UARCH(arch, CHIP_GM107M, "GM107", UARCH_MAXWELL, 28)
CHECK_UARCH(arch, CHIP_GM108GLM, "GM108", UARCH_MAXWELL, 28)
CHECK_UARCH(arch, CHIP_GM108M, "GM108", UARCH_MAXWELL, 28)
CHECK_UARCH(arch, CHIP_GM200, "GM200", UARCH_MAXWELL, 28)
CHECK_UARCH(arch, CHIP_GM200GL, "GM200", UARCH_MAXWELL, 28)
CHECK_UARCH(arch, CHIP_GM204, "GM204", UARCH_MAXWELL, 28)
CHECK_UARCH(arch, CHIP_GM204GL, "GM204", UARCH_MAXWELL, 28)
CHECK_UARCH(arch, CHIP_GM204GLM, "GM204", UARCH_MAXWELL, 28)
CHECK_UARCH(arch, CHIP_GM204M, "GM204", UARCH_MAXWELL, 28)
CHECK_UARCH(arch, CHIP_GM206, "GM206", UARCH_MAXWELL, 28)
CHECK_UARCH(arch, CHIP_GM206GL, "GM206", UARCH_MAXWELL, 28)
CHECK_UARCH(arch, CHIP_GM206GLM, "GM206", UARCH_MAXWELL, 28)
CHECK_UARCH(arch, CHIP_GM206M, "GM206", UARCH_MAXWELL, 28)
// PASCAL (6.0, 6.1, 6.2) //
CHECK_UARCH(arch, CHIP_GP100, "GP100", UARCH_PASCAL, 16)
CHECK_UARCH(arch, CHIP_GP100GL, "GP100", UARCH_PASCAL, 16)
CHECK_UARCH(arch, CHIP_GP102, "GP102", UARCH_PASCAL, 16)
CHECK_UARCH(arch, CHIP_GP102GL, "GP102", UARCH_PASCAL, 16)
CHECK_UARCH(arch, CHIP_GP104, "GP104", UARCH_PASCAL, 16)
CHECK_UARCH(arch, CHIP_GP104BM, "GP104", UARCH_PASCAL, 16)
CHECK_UARCH(arch, CHIP_GP104GL, "GP104", UARCH_PASCAL, 16)
CHECK_UARCH(arch, CHIP_GP104GLM, "GP104", UARCH_PASCAL, 16)
CHECK_UARCH(arch, CHIP_GP104M, "GP104", UARCH_PASCAL, 16)
CHECK_UARCH(arch, CHIP_GP106, "GP106", UARCH_PASCAL, 16)
CHECK_UARCH(arch, CHIP_GP106BM, "GP106", UARCH_PASCAL, 16)
CHECK_UARCH(arch, CHIP_GP106GL, "GP106", UARCH_PASCAL, 16)
CHECK_UARCH(arch, CHIP_GP106M, "GP106", UARCH_PASCAL, 16)
CHECK_UARCH(arch, CHIP_GP107, "GP107", UARCH_PASCAL, 14)
CHECK_UARCH(arch, CHIP_GP107BM, "GP107", UARCH_PASCAL, 14)
CHECK_UARCH(arch, CHIP_GP107GL, "GP107", UARCH_PASCAL, 14)
CHECK_UARCH(arch, CHIP_GP107GLM, "GP107", UARCH_PASCAL, 14)
CHECK_UARCH(arch, CHIP_GP107M, "GP107", UARCH_PASCAL, 14)
CHECK_UARCH(arch, CHIP_GP108, "GP108", UARCH_PASCAL, 14)
CHECK_UARCH(arch, CHIP_GP108BM, "GP108", UARCH_PASCAL, 14)
CHECK_UARCH(arch, CHIP_GP108GLM, "GP108", UARCH_PASCAL, 14)
CHECK_UARCH(arch, CHIP_GP108M, "GP108", UARCH_PASCAL, 14)
// VOLTA (7.0, 7.2) //
CHECK_UARCH(arch, CHIP_GV100, "GV100", UARCH_VOLTA, 12)
CHECK_UARCH(arch, CHIP_GV100GL, "GV100", UARCH_VOLTA, 12)
// TURING (7.5) //
CHECK_UARCH(arch, CHIP_TU102, "TU102", UARCH_TURING, 12)
CHECK_UARCH(arch, CHIP_TU102GL, "TU102", UARCH_TURING, 12)
CHECK_UARCH(arch, CHIP_TU104, "TU104", UARCH_TURING, 12)
CHECK_UARCH(arch, CHIP_TU104BM, "TU104", UARCH_TURING, 12)
CHECK_UARCH(arch, CHIP_TU104GL, "TU104", UARCH_TURING, 12)
CHECK_UARCH(arch, CHIP_TU104GLM, "TU104", UARCH_TURING, 12)
CHECK_UARCH(arch, CHIP_TU104M, "TU104", UARCH_TURING, 12)
CHECK_UARCH(arch, CHIP_TU106, "TU106", UARCH_TURING, 12)
CHECK_UARCH(arch, CHIP_TU106BM, "TU106", UARCH_TURING, 12)
CHECK_UARCH(arch, CHIP_TU106GLM, "TU106", UARCH_TURING, 12)
CHECK_UARCH(arch, CHIP_TU106M, "TU106", UARCH_TURING, 12)
CHECK_UARCH(arch, CHIP_TU116, "TU116", UARCH_TURING, 12)
CHECK_UARCH(arch, CHIP_TU116BM, "TU116", UARCH_TURING, 12)
CHECK_UARCH(arch, CHIP_TU116GL, "TU116", UARCH_TURING, 12)
CHECK_UARCH(arch, CHIP_TU116M, "TU116", UARCH_TURING, 12)
CHECK_UARCH(arch, CHIP_TU117, "TU117", UARCH_TURING, 12)
CHECK_UARCH(arch, CHIP_TU117BM, "TU117", UARCH_TURING, 12)
CHECK_UARCH(arch, CHIP_TU117GL, "TU117", UARCH_TURING, 12)
CHECK_UARCH(arch, CHIP_TU117GLM, "TU117", UARCH_TURING, 12)
CHECK_UARCH(arch, CHIP_TU117M, "TU117", UARCH_TURING, 12)
// AMPERE (8.0, 8.6) //
CHECK_UARCH(arch, CHIP_GA100, "GA100", UARCH_AMPERE, 7)
CHECK_UARCH(arch, CHIP_GA100GL, "GA100", UARCH_AMPERE, 7)
CHECK_UARCH(arch, CHIP_GA102, "GA102", UARCH_AMPERE, 8)
CHECK_UARCH(arch, CHIP_GA102GL, "GA102", UARCH_AMPERE, 8)
CHECK_UARCH(arch, CHIP_GA103, "GA103", UARCH_AMPERE, 8)
CHECK_UARCH(arch, CHIP_GA103GLM, "GA103", UARCH_AMPERE, 8)
CHECK_UARCH(arch, CHIP_GA103M, "GA103", UARCH_AMPERE, 8)
CHECK_UARCH(arch, CHIP_GA104, "GA104", UARCH_AMPERE, 8)
CHECK_UARCH(arch, CHIP_GA104GL, "GA104", UARCH_AMPERE, 8)
CHECK_UARCH(arch, CHIP_GA104GLM, "GA104", UARCH_AMPERE, 8)
CHECK_UARCH(arch, CHIP_GA104M, "GA104", UARCH_AMPERE, 8)
CHECK_UARCH(arch, CHIP_GA106, "GA106", UARCH_AMPERE, 8)
CHECK_UARCH(arch, CHIP_GA106M, "GA106", UARCH_AMPERE, 8)
CHECK_UARCH(arch, CHIP_GA107, "GA107", UARCH_AMPERE, 8)
CHECK_UARCH(arch, CHIP_GA107BM, "GA107", UARCH_AMPERE, 8)
CHECK_UARCH(arch, CHIP_GA107GLM, "GA107", UARCH_AMPERE, 8)
CHECK_UARCH(arch, CHIP_GA107M, "GA107", UARCH_AMPERE, 8)
// ADA LOVELACE (8.9)
CHECK_UARCH(arch, CHIP_AD102, "AD102", UARCH_ADA, 4)
CHECK_UARCH(arch, CHIP_AD102GL, "AD102", UARCH_ADA, 4)
CHECK_UARCH(arch, CHIP_AD104, "AD104", UARCH_ADA, 4)
CHECK_UARCH(arch, CHIP_AD104GL, "AD104", UARCH_ADA, 4)
// HOPPER (9.0)
CHECK_UARCH(arch, CHIP_GH100, "GH100", UARCH_HOPPER, 4)
CHECK_UARCH_END
}
struct uarch* get_uarch_from_cuda(struct gpu_info* gpu) {
struct uarch* arch = (struct uarch*) emalloc(sizeof(struct uarch));
cudaError_t err = cudaSuccess;
cudaDeviceProp deviceProp;
if ((err = cudaGetDeviceProperties(&deviceProp, gpu->idx)) != cudaSuccess) {
printErr("%s: %s", cudaGetErrorName(err), cudaGetErrorString(err));
return NULL;
}
arch->chip_str = NULL;
arch->cc_major = deviceProp.major;
arch->cc_minor = deviceProp.minor;
arch->compute_capability = deviceProp.major * 10 + deviceProp.minor;
arch->chip = get_chip_from_pci_cuda(gpu->pci);
map_chip_to_uarch_cuda(arch);
return arch;
}
#define CHECK_MEMTYPE_START if (false) {}
#define CHECK_MEMTYPE(arch, clkm, arch_, clkm_, memtype) \
else if (arch->uarch == arch_ && (clkm_ == CM_ANY || clkm == clkm_)) return memtype;
#define CHECK_MEMTYPE_END else { printBug("guess_memtype_from_cmul_and_uarch: Found invalid combination: clkm=%d, uarch=%d", clkm, arch->uarch); return MEMTYPE_UNKNOWN; }
bool clkm_possible_for_uarch(int clkm, struct uarch* arch) {
switch(arch->uarch) {
case UARCH_TESLA: return false;
case UARCH_FERMI: return clkm == 1 || clkm == 2;
case UARCH_KEPLER: return clkm == 1 || clkm == 2;
case UARCH_MAXWELL: return clkm == 1 || clkm == 2;
case UARCH_PASCAL: return clkm == 1 || clkm == 2 || clkm == 4;
case UARCH_VOLTA: return clkm == 1;
case UARCH_TURING: return clkm == 2 || clkm == 4;
case UARCH_AMPERE: return clkm == 1 || clkm == 4 || clkm == 8;
case UARCH_ADA: return clkm == 8;
case UARCH_HOPPER: return clkm == 1;
}
return false;
}
MEMTYPE guess_memtype_from_cmul_and_uarch(int clkm, struct uarch* arch) {
if(arch->uarch == UARCH_UNKNOWN) {
printWarn("guess_memtype_from_cmul_and_uarch: Found unknown uarch");
return MEMTYPE_UNKNOWN;
}
/*
* +---------+------------------+
* | MEMTYPE | Clock multiplier |
* +---------+------------------+
* | DDR3 | 1 |
* | DDR4 | 1 |
* | GDDR5 | 2 |
* | GDDR5X | 4 |
* | GDDR6 | 4 |
* | GDDR6X | 8 |
* | HBM | 1 |
* | HBM2 | 1 |
* +---------+------------------+
*
* archs in parenthesis are not included in this rules
* and will be detected wrongly
*/
CHECK_MEMTYPE_START
// TESLA
CHECK_MEMTYPE(arch, clkm, UARCH_TESLA, CM_ANY, MEMTYPE_UNKNOWN)
// FERMI
CHECK_MEMTYPE(arch, clkm, UARCH_FERMI, 1, MEMTYPE_DDR3)
CHECK_MEMTYPE(arch, clkm, UARCH_FERMI, 2, MEMTYPE_GDDR5)
// KEPLER (jetson)
CHECK_MEMTYPE(arch, clkm, UARCH_KEPLER, 1, MEMTYPE_DDR3)
CHECK_MEMTYPE(arch, clkm, UARCH_KEPLER, 2, MEMTYPE_GDDR5)
// MAXWELL (switch, jetson)
CHECK_MEMTYPE(arch, clkm, UARCH_MAXWELL, 1, MEMTYPE_DDR3)
CHECK_MEMTYPE(arch, clkm, UARCH_MAXWELL, 2, MEMTYPE_GDDR5)
// PASCAL
CHECK_MEMTYPE(arch, clkm, UARCH_PASCAL, 1, MEMTYPE_DDR4)
CHECK_MEMTYPE(arch, clkm, UARCH_PASCAL, 2, MEMTYPE_GDDR5)
CHECK_MEMTYPE(arch, clkm, UARCH_PASCAL, 4, MEMTYPE_GDDR5X)
// VOLTA (jetson)
CHECK_MEMTYPE(arch, clkm, UARCH_VOLTA, CM_ANY, MEMTYPE_HBM2)
// TURING
CHECK_MEMTYPE(arch, clkm, UARCH_TURING, 2, MEMTYPE_GDDR5)
CHECK_MEMTYPE(arch, clkm, UARCH_TURING, 4, MEMTYPE_GDDR6)
// AMPERE
CHECK_MEMTYPE(arch, clkm, UARCH_AMPERE, 1, MEMTYPE_HBM2)
CHECK_MEMTYPE(arch, clkm, UARCH_AMPERE, 4, MEMTYPE_GDDR6)
CHECK_MEMTYPE(arch, clkm, UARCH_AMPERE, 8, MEMTYPE_GDDR6X)
// ADA
CHECK_MEMTYPE(arch, clkm, UARCH_ADA, 8, MEMTYPE_GDDR6X)
// HOPPER
CHECK_MEMTYPE(arch, clkm, UARCH_HOPPER, 1, MEMTYPE_HBM2)
CHECK_MEMTYPE_END
}
char* get_str_cc(struct uarch* arch) {
uint32_t max_size = 4;
char* cc = (char *) ecalloc(max_size, sizeof(char));
snprintf(cc, max_size, "%d.%d", arch->cc_major, arch->cc_minor);
return cc;
}
char* get_str_chip(struct uarch* arch) {
return arch->chip_str;
}
// TODO: What about _ConvertSMVer2ArchName?
const char* get_str_uarch_cuda(struct uarch* arch) {
return uarch_str[arch->uarch];
}
void free_uarch_struct(struct uarch* arch) {
free(arch->uarch_str);
free(arch->chip_str);
free(arch);
}
bool is_chip_TU116(struct uarch* arch) {
return arch->chip == CHIP_TU116 || arch->chip == CHIP_TU116BM ||
arch->chip == CHIP_TU116GL || arch->chip == CHIP_TU116M;
}