Skip to content
97 changes: 73 additions & 24 deletions src/coreclr/gc/unix/gcenv.unix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -876,21 +876,29 @@ bool ReadMemoryValueFromFile(const char* filename, uint64_t* val)
return result;
}

#define CHECK_CACHE_SIZE(CACHE_LEVEL) if (size > cacheSize) { cacheSize = size; cacheLevel = CACHE_LEVEL; }

static size_t GetLogicalProcessorCacheSizeFromOS()
{
size_t cacheLevel = 0;
size_t cacheSize = 0;
size_t size;

#ifdef _SC_LEVEL1_DCACHE_SIZE
cacheSize = std::max(cacheSize, ( size_t) sysconf(_SC_LEVEL1_DCACHE_SIZE));
size = ( size_t) sysconf(_SC_LEVEL1_DCACHE_SIZE);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Q (also applies to the old src) - what does sysconf return if there's no cache of that level? I presume it returns 0, not -1 (the doc says it returns -1 if there's an error).

CHECK_CACHE_SIZE(1)
#endif
#ifdef _SC_LEVEL2_CACHE_SIZE
cacheSize = std::max(cacheSize, ( size_t) sysconf(_SC_LEVEL2_CACHE_SIZE));
size = ( size_t) sysconf(_SC_LEVEL2_CACHE_SIZE);
CHECK_CACHE_SIZE(2)
#endif
#ifdef _SC_LEVEL3_CACHE_SIZE
cacheSize = std::max(cacheSize, ( size_t) sysconf(_SC_LEVEL3_CACHE_SIZE));
size = ( size_t) sysconf(_SC_LEVEL3_CACHE_SIZE);
CHECK_CACHE_SIZE(3)
#endif
#ifdef _SC_LEVEL4_CACHE_SIZE
cacheSize = std::max(cacheSize, ( size_t) sysconf(_SC_LEVEL4_CACHE_SIZE));
size = ( size_t) sysconf(_SC_LEVEL4_CACHE_SIZE);
CHECK_CACHE_SIZE(4)
#endif

#if defined(TARGET_LINUX) && !defined(HOST_ARM) && !defined(HOST_X86)
Expand All @@ -901,25 +909,39 @@ static size_t GetLogicalProcessorCacheSizeFromOS()
// for the platform. Currently musl and arm64 should be only cases to use
// this method to determine cache size.
//
size_t size;

if (ReadMemoryValueFromFile("/sys/devices/system/cpu/cpu0/cache/index0/size", &size))
cacheSize = std::max(cacheSize, size);
if (ReadMemoryValueFromFile("/sys/devices/system/cpu/cpu0/cache/index1/size", &size))
cacheSize = std::max(cacheSize, size);
if (ReadMemoryValueFromFile("/sys/devices/system/cpu/cpu0/cache/index2/size", &size))
cacheSize = std::max(cacheSize, size);
if (ReadMemoryValueFromFile("/sys/devices/system/cpu/cpu0/cache/index3/size", &size))
cacheSize = std::max(cacheSize, size);
if (ReadMemoryValueFromFile("/sys/devices/system/cpu/cpu0/cache/index4/size", &size))
cacheSize = std::max(cacheSize, size);
size_t level;
char path_to_size_file[] = "/sys/devices/system/cpu/cpu0/cache/index-/size";
char path_to_level_file[] = "/sys/devices/system/cpu/cpu0/cache/index-/level";
int index = 40;
assert(path_to_size_file[index] == '-');
assert(path_to_level_file[index] == '-');

for (int i = 0; i < 5; i++)
{
path_to_size_file[index] = (char)(48 + i);

if (ReadMemoryValueFromFile(path_to_size_file, &size))
{
path_to_level_file[index] = (char)(48 + i);

if (ReadMemoryValueFromFile(path_to_level_file, &level))
{
CHECK_CACHE_SIZE(level)
}
else
{
cacheSize = std::max(cacheSize, size);
}
}
}
}
#endif

#if defined(HOST_ARM64) && !defined(TARGET_OSX)
#if (defined(HOST_ARM64) || defined(HOST_LOONGARCH64)) && !defined(TARGET_OSX)
if (cacheSize == 0)
{
// It is currently expected to be missing cache size info
// We expect to get the L3 cache size for Arm64 but currently expected to be missing that info
// from most of the machines.
//
// _SC_LEVEL*_*CACHE_SIZE is not yet present. Work is in progress to enable this for arm64
//
Expand Down Expand Up @@ -964,6 +986,38 @@ static size_t GetLogicalProcessorCacheSizeFromOS()
}
#endif

#if (defined(HOST_ARM64) || defined(HOST_LOONGARCH64)) && !defined(TARGET_OSX)
if (cacheLevel != 3)
{
// We expect to get the L3 cache size for Arm64 but currently expected to be missing that info
// from most of the machines.
// Hence, just use the following heuristics at best depending on the CPU count
// 1 ~ 4 : 4 MB
// 5 ~ 16 : 8 MB
// 17 ~ 64 : 16 MB
// 65+ : 32 MB
DWORD logicalCPUs = g_totalCpuCount;
if (logicalCPUs < 5)
{
cacheSize = 4;
}
else if (logicalCPUs < 17)
{
cacheSize = 8;
}
else if (logicalCPUs < 65)
{
cacheSize = 16;
}
else
{
cacheSize = 32;
}

cacheSize *= (1024 * 1024);
}
#endif

return cacheSize;
}

Expand Down Expand Up @@ -1037,15 +1091,10 @@ size_t GCToOSInterface::GetCacheSizePerLogicalCpu(bool trueSize)
size_t maxSize, maxTrueSize;
maxSize = maxTrueSize = GetLogicalProcessorCacheSizeFromOS(); // Returns the size of the highest level processor cache

#if defined(HOST_ARM64)
// Bigger gen0 size helps arm64 targets
maxSize = maxTrueSize * 3;
#endif

s_maxSize = maxSize;
s_maxTrueSize = maxTrueSize;

// printf("GetCacheSizePerLogicalCpu returns %d, adjusted size %d\n", maxSize, maxTrueSize);
// printf("GetCacheSizePerLogicalCpu returns %zu, adjusted size %zu\n", maxSize, maxTrueSize);
return trueSize ? maxTrueSize : maxSize;
}

Expand Down
51 changes: 44 additions & 7 deletions src/coreclr/gc/windows/gcenv.windows.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -402,6 +402,9 @@ SYSTEM_LOGICAL_PROCESSOR_INFORMATION *GetLPI(PDWORD nEntries)
size_t GetLogicalProcessorCacheSizeFromOS()
{
size_t cache_size = 0;
size_t cache_level = 0;
uint32_t totalCPUCount = 0;

DWORD nEntries = 0;

// Try to use GetLogicalProcessorInformation API and get a valid pointer to the SLPI array if successful. Returns NULL
Expand All @@ -424,7 +427,15 @@ size_t GetLogicalProcessorCacheSizeFromOS()
{
if (pslpi[i].Relationship == RelationCache)
{
last_cache_size = max(last_cache_size, pslpi[i].Cache.Size);
if (last_cache_size < pslpi[i].Cache.Size)
{
last_cache_size = pslpi[i].Cache.Size;
cache_level = pslpi[i].Cache.Level;
}
}
else if (pslpi[i].Relationship == RelationProcessorCore)
{
totalCPUCount++;
}
}
cache_size = last_cache_size;
Expand All @@ -434,6 +445,37 @@ size_t GetLogicalProcessorCacheSizeFromOS()
if(pslpi)
delete[] pslpi; // release the memory allocated for the SLPI array.

#if defined(TARGET_ARM64)
if (cache_level != 3)
{
// We expect to get the L3 cache size for Arm64 but currently expected to be missing that info
// from most of the machines.
// Hence, just use the following heuristics at best depending on the CPU count
// 1 ~ 4 : 4 MB
// 5 ~ 16 : 8 MB
// 17 ~ 64 : 16 MB
// 65+ : 32 MB
if (totalCPUCount < 5)
{
cache_size = 4;
}
else if (totalCPUCount < 17)
{
cache_size = 8;
}
else if (totalCPUCount < 65)
{
cache_size = 16;
}
else
{
cache_size = 32;
}

cache_size *= (1024 * 1024);
}
#endif // TARGET_ARM64

return cache_size;
}

Expand Down Expand Up @@ -836,15 +878,10 @@ size_t GCToOSInterface::GetCacheSizePerLogicalCpu(bool trueSize)

maxSize = maxTrueSize = GetLogicalProcessorCacheSizeFromOS() ; // Returns the size of the highest level processor cache

#if defined(TARGET_ARM64)
// Bigger gen0 size helps arm64 targets
maxSize = maxTrueSize * 3;
#endif

s_maxSize = maxSize;
s_maxTrueSize = maxTrueSize;

// printf("GetCacheSizePerLogicalCpu returns %d, adjusted size %d\n", maxSize, maxTrueSize);
// printf("GetCacheSizePerLogicalCpu returns %zu, adjusted size %zu\n", maxSize, maxTrueSize);
return trueSize ? maxTrueSize : maxSize;
}

Expand Down
91 changes: 72 additions & 19 deletions src/coreclr/pal/src/misc/sysinfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -539,23 +539,31 @@ ReadMemoryValueFromFile(const char* filename, uint64_t* val)
return result;
}

#define CHECK_CACHE_SIZE(CACHE_LEVEL) if (size > cacheSize) { cacheSize = size; cacheLevel = CACHE_LEVEL; }

size_t
PALAPI
PAL_GetLogicalProcessorCacheSizeFromOS()
{
size_t cacheLevel = 0;
size_t cacheSize = 0;
size_t size;

#ifdef _SC_LEVEL1_DCACHE_SIZE
cacheSize = std::max(cacheSize, (size_t)sysconf(_SC_LEVEL1_DCACHE_SIZE));
size = ( size_t) sysconf(_SC_LEVEL1_DCACHE_SIZE);
CHECK_CACHE_SIZE(1)
#endif
#ifdef _SC_LEVEL2_CACHE_SIZE
cacheSize = std::max(cacheSize, (size_t)sysconf(_SC_LEVEL2_CACHE_SIZE));
size = ( size_t) sysconf(_SC_LEVEL2_CACHE_SIZE);
CHECK_CACHE_SIZE(2)
#endif
#ifdef _SC_LEVEL3_CACHE_SIZE
cacheSize = std::max(cacheSize, (size_t)sysconf(_SC_LEVEL3_CACHE_SIZE));
size = ( size_t) sysconf(_SC_LEVEL3_CACHE_SIZE);
CHECK_CACHE_SIZE(3)
#endif
#ifdef _SC_LEVEL4_CACHE_SIZE
cacheSize = std::max(cacheSize, (size_t)sysconf(_SC_LEVEL4_CACHE_SIZE));
size = ( size_t) sysconf(_SC_LEVEL4_CACHE_SIZE);
CHECK_CACHE_SIZE(4)
#endif

#if defined(TARGET_LINUX) && !defined(HOST_ARM) && !defined(HOST_X86)
Expand All @@ -566,25 +574,39 @@ PAL_GetLogicalProcessorCacheSizeFromOS()
// for the platform. Currently musl and arm64 should be only cases to use
// this method to determine cache size.
//
size_t size;

if(ReadMemoryValueFromFile("/sys/devices/system/cpu/cpu0/cache/index0/size", &size))
cacheSize = std::max(cacheSize, size);
if(ReadMemoryValueFromFile("/sys/devices/system/cpu/cpu0/cache/index1/size", &size))
cacheSize = std::max(cacheSize, size);
if(ReadMemoryValueFromFile("/sys/devices/system/cpu/cpu0/cache/index2/size", &size))
cacheSize = std::max(cacheSize, size);
if(ReadMemoryValueFromFile("/sys/devices/system/cpu/cpu0/cache/index3/size", &size))
cacheSize = std::max(cacheSize, size);
if(ReadMemoryValueFromFile("/sys/devices/system/cpu/cpu0/cache/index4/size", &size))
cacheSize = std::max(cacheSize, size);
size_t level;
char path_to_size_file[] = "/sys/devices/system/cpu/cpu0/cache/index-/size";
char path_to_level_file[] = "/sys/devices/system/cpu/cpu0/cache/index-/level";
int index = 40;
_ASSERTE(path_to_size_file[index] == '-');
_ASSERTE(path_to_level_file[index] == '-');

for (int i = 0; i < 5; i++)
{
path_to_size_file[index] = (char)(48 + i);

if (ReadMemoryValueFromFile(path_to_size_file, &size))
{
path_to_level_file[index] = (char)(48 + i);

if (ReadMemoryValueFromFile(path_to_level_file, &level))
{
CHECK_CACHE_SIZE(level)
}
else
{
cacheSize = std::max(cacheSize, size);
}
}
}
}
#endif

#if (defined(HOST_ARM64) || defined(HOST_LOONGARCH64)) && !defined(TARGET_OSX)
if (cacheSize == 0)
{
// It is currently expected to be missing cache size info
// We expect to get the L3 cache size for Arm64 but currently expected to be missing that info
// from most of the machines with an exceptions on some machines.
//
// _SC_LEVEL*_*CACHE_SIZE is not yet present. Work is in progress to enable this for arm64
//
Expand Down Expand Up @@ -621,12 +643,43 @@ PAL_GetLogicalProcessorCacheSizeFromOS()
|| sysctlbyname("hw.l3cachesize", &cacheSizeFromSysctl, &sz, nullptr, 0) == 0
|| sysctlbyname("hw.l2cachesize", &cacheSizeFromSysctl, &sz, nullptr, 0) == 0
|| sysctlbyname("hw.l1dcachesize", &cacheSizeFromSysctl, &sz, nullptr, 0) == 0;

if (success)
{
_ASSERTE(cacheSizeFromSysctl > 0);
cacheSize = (size_t) cacheSizeFromSysctl;
cacheSize = ( size_t) cacheSizeFromSysctl;
}
}
#endif

#if (defined(HOST_ARM64) || defined(HOST_LOONGARCH64)) && !defined(TARGET_OSX)
if (cacheLevel != 3)
{
// We expect to get the L3 cache size for Arm64 but currently expected to be missing that info
// from most of the machines.
// Hence, just use the following heuristics at best depending on the CPU count
// 1 ~ 4 : 4 MB
// 5 ~ 16 : 8 MB
// 17 ~ 64 : 16 MB
// 65+ : 32 MB
DWORD logicalCPUs = PAL_GetLogicalCpuCountFromOS();
if (logicalCPUs < 5)
{
cacheSize = 4;
}
else if (logicalCPUs < 17)
{
cacheSize = 8;
}
else if (logicalCPUs < 65)
{
cacheSize = 16;
}
else
{
cacheSize = 32;
}

cacheSize *= (1024 * 1024);
}
#endif

Expand Down
Loading