Skip to content

Commit 94023c1

Browse files
authored
Add allocMem support for allocating multiple data chunks (#123378)
Rework `allocMem` to support allocating an arbitrary number of data chunks. The motivation is to allow ilc to put async resumption info chunks into sections that support relocations to .text, but additionally will allow for sharing read only data between multiple functions.
1 parent 6b70e93 commit 94023c1

File tree

26 files changed

+443
-520
lines changed

26 files changed

+443
-520
lines changed

src/coreclr/inc/corjit.h

Lines changed: 23 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -46,12 +46,10 @@ enum CorJitResult
4646
// to guide the memory allocation for the code, readonly data, and read-write data
4747
enum CorJitAllocMemFlag
4848
{
49-
CORJIT_ALLOCMEM_DEFAULT_CODE_ALIGN = 0x00000000, // The code will use the normal alignment
50-
CORJIT_ALLOCMEM_FLG_16BYTE_ALIGN = 0x00000001, // The code will be 16-byte aligned
51-
CORJIT_ALLOCMEM_FLG_RODATA_16BYTE_ALIGN = 0x00000002, // The read-only data will be 16-byte aligned
52-
CORJIT_ALLOCMEM_FLG_32BYTE_ALIGN = 0x00000004, // The code will be 32-byte aligned
53-
CORJIT_ALLOCMEM_FLG_RODATA_32BYTE_ALIGN = 0x00000008, // The read-only data will be 32-byte aligned
54-
CORJIT_ALLOCMEM_FLG_RODATA_64BYTE_ALIGN = 0x00000010, // The read-only data will be 64-byte aligned
49+
CORJIT_ALLOCMEM_HOT_CODE = 1,
50+
CORJIT_ALLOCMEM_COLD_CODE = 2,
51+
CORJIT_ALLOCMEM_READONLY_DATA = 4,
52+
CORJIT_ALLOCMEM_HAS_POINTERS_TO_CODE = 8,
5553
};
5654

5755
inline CorJitAllocMemFlag operator |(CorJitAllocMemFlag a, CorJitAllocMemFlag b)
@@ -77,22 +75,28 @@ enum CheckedWriteBarrierKinds {
7775
CWBKind_AddrOfLocal, // Store through the address of a local (arguably a bug that this happens at all).
7876
};
7977

78+
struct AllocMemChunk
79+
{
80+
// Alignment of the chunk. Must be a power of two with the following restrictions:
81+
// - For the hot code chunk the max supported alignment is 32.
82+
// - For the cold code chunk the value must always be 1.
83+
// - For read-only data chunks the max supported alignment is 64.
84+
uint32_t alignment;
85+
uint32_t size;
86+
CorJitAllocMemFlag flags;
87+
88+
// out
89+
uint8_t* block;
90+
uint8_t* blockRW;
91+
};
92+
8093
struct AllocMemArgs
8194
{
82-
// Input arguments
83-
uint32_t hotCodeSize;
84-
uint32_t coldCodeSize;
85-
uint32_t roDataSize;
95+
// Chunks to allocate. Supports one hot code chunk, one cold code chunk,
96+
// and an arbitrary number of data chunks.
97+
AllocMemChunk* chunks;
98+
unsigned chunksCount;
8699
uint32_t xcptnsCount;
87-
CorJitAllocMemFlag flag;
88-
89-
// Output arguments
90-
void* hotCodeBlock;
91-
void* hotCodeBlockRW;
92-
void* coldCodeBlock;
93-
void* coldCodeBlockRW;
94-
void* roDataBlock;
95-
void* roDataBlockRW;
96100
};
97101

98102
#include "corjithost.h"

src/coreclr/inc/jiteeversionguid.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -37,11 +37,11 @@
3737

3838
#include <minipal/guid.h>
3939

40-
constexpr GUID JITEEVersionIdentifier = { /* 976a4d6d-d1b2-4096-a3c9-46ddcae71196 */
41-
0x976a4d6d,
42-
0xd1b2,
43-
0x4096,
44-
{0xa3, 0xc9, 0x46, 0xdd, 0xca, 0xe7, 0x11, 0x96}
40+
constexpr GUID JITEEVersionIdentifier = { /* db46fd97-a8e8-4bda-9cec-d7feb061154c */
41+
0xdb46fd97,
42+
0xa8e8,
43+
0x4bda,
44+
{0x9c, 0xec, 0xd7, 0xfe, 0xb0, 0x61, 0x15, 0x4c}
4545
};
4646

4747
#endif // JIT_EE_VERSIONING_GUID_H

src/coreclr/interpreter/eeinterp.cpp

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -128,20 +128,23 @@ CorJitResult CILInterp::compileMethod(ICorJitInfo* compHnd,
128128
uint32_t sizeOfCode = sizeof(InterpMethod*) + IRCodeSize * sizeof(int32_t);
129129
uint8_t unwindInfo[8] = {0, 0, 0, 0, 0, 0, 0, 0};
130130

131+
AllocMemChunk codeChunk {};
132+
codeChunk.alignment = 1;
133+
codeChunk.size = sizeOfCode;
134+
codeChunk.flags = CORJIT_ALLOCMEM_HOT_CODE;
135+
131136
AllocMemArgs args {};
132-
args.hotCodeSize = sizeOfCode;
133-
args.coldCodeSize = 0;
134-
args.roDataSize = 0;
137+
args.chunks = &codeChunk;
138+
args.chunksCount = 1;
135139
args.xcptnsCount = 0;
136-
args.flag = CORJIT_ALLOCMEM_DEFAULT_CODE_ALIGN;
137140
compHnd->allocMem(&args);
138141

139142
// We store first the InterpMethod pointer as the code header, followed by the actual code
140-
*(InterpMethod**)args.hotCodeBlockRW = pMethod;
141-
memcpy ((uint8_t*)args.hotCodeBlockRW + sizeof(InterpMethod*), pIRCode, IRCodeSize * sizeof(int32_t));
143+
*(InterpMethod**)codeChunk.blockRW = pMethod;
144+
memcpy ((uint8_t*)codeChunk.blockRW + sizeof(InterpMethod*), pIRCode, IRCodeSize * sizeof(int32_t));
142145

143-
compiler.UpdateWithFinalMethodByteCodeAddress((InterpByteCodeStart*)args.hotCodeBlock);
144-
*entryAddress = (uint8_t*)args.hotCodeBlock;
146+
compiler.UpdateWithFinalMethodByteCodeAddress((InterpByteCodeStart*)codeChunk.block);
147+
*entryAddress = (uint8_t*)codeChunk.block;
145148
*nativeSizeOfCode = sizeOfCode;
146149

147150
// We can't do this until we've called allocMem

src/coreclr/jit/codegen.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -195,8 +195,6 @@ class CodeGen final : public CodeGenInterface
195195
unsigned codeSize;
196196
void* coldCodePtr;
197197
void* coldCodePtrRW;
198-
void* consPtr;
199-
void* consPtrRW;
200198

201199
// Last instr we have displayed for dspInstrs
202200
unsigned genCurDispOffset;

src/coreclr/jit/codegencommon.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2131,7 +2131,7 @@ void CodeGen::genEmitMachineCode()
21312131
codeSize =
21322132
GetEmitter()->emitEndCodeGen(compiler, trackedStackPtrsContig, GetInterruptible(), IsFullPtrRegMapRequired(),
21332133
compiler->compHndBBtabCount, &prologSize, &epilogSize, codePtr, &codePtrRW,
2134-
&coldCodePtr, &coldCodePtrRW, &consPtr, &consPtrRW DEBUGARG(&instrCount));
2134+
&coldCodePtr, &coldCodePtrRW DEBUGARG(&instrCount));
21352135

21362136
#ifdef DEBUG
21372137
assert(compiler->compCodeGenDone == false);

src/coreclr/jit/compiler.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8542,7 +8542,11 @@ class Compiler
85428542

85438543
// ICorJitInfo wrappers
85448544

8545-
void eeAllocMem(AllocMemArgs* args, const UNATIVE_OFFSET roDataSectionAlignment);
8545+
void eeAllocMem(AllocMemChunk& codeChunk,
8546+
AllocMemChunk* coldCodeChunk,
8547+
AllocMemChunk* dataChunks,
8548+
unsigned numDataChunks,
8549+
unsigned numExceptions);
85468550

85478551
void eeReserveUnwindInfo(bool isFunclet, bool isColdCode, ULONG unwindSize);
85488552

src/coreclr/jit/ee_il_dll.cpp

Lines changed: 77 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1129,63 +1129,117 @@ void Compiler::eeDispLineInfos()
11291129
* (e.g., host AMD64, target ARM64), then VM will get confused anyway.
11301130
*/
11311131

1132-
void Compiler::eeAllocMem(AllocMemArgs* args, const UNATIVE_OFFSET roDataSectionAlignment)
1132+
void Compiler::eeAllocMem(AllocMemChunk& codeChunk,
1133+
AllocMemChunk* coldCodeChunk,
1134+
AllocMemChunk* dataChunks,
1135+
unsigned numDataChunks,
1136+
unsigned numExceptions)
11331137
{
1138+
ArrayStack<AllocMemChunk> chunks(getAllocator(CMK_Codegen));
1139+
1140+
chunks.Push(codeChunk);
1141+
1142+
int coldCodeChunkIndex = -1;
1143+
if (coldCodeChunk != nullptr)
1144+
{
1145+
coldCodeChunkIndex = chunks.Height();
1146+
chunks.Push(*coldCodeChunk);
1147+
}
1148+
11341149
#ifdef DEBUG
11351150

11361151
// Fake splitting implementation: place hot/cold code in contiguous section.
1137-
UNATIVE_OFFSET coldCodeOffset = 0;
1138-
if (JitConfig.JitFakeProcedureSplitting() && (args->coldCodeSize > 0))
1152+
if (JitConfig.JitFakeProcedureSplitting() && (coldCodeChunk != nullptr))
11391153
{
1140-
coldCodeOffset = args->hotCodeSize;
1141-
assert(coldCodeOffset > 0);
1142-
args->hotCodeSize += args->coldCodeSize;
1143-
args->coldCodeSize = 0;
1154+
// Keep offset into hot code in the block/blockRW pointers
1155+
coldCodeChunk->block = (uint8_t*)(uintptr_t)chunks.BottomRef(0).size;
1156+
coldCodeChunk->blockRW = (uint8_t*)(uintptr_t)chunks.BottomRef(0).size;
1157+
chunks.BottomRef(0).size += coldCodeChunk->size;
1158+
// Remove cold chunk
1159+
chunks.Pop();
1160+
coldCodeChunkIndex = -1;
11441161
}
11451162

11461163
#endif // DEBUG
11471164

1165+
int firstDataChunk = chunks.Height();
1166+
11481167
#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
11491168

11501169
// For arm64/LoongArch64/RISCV64, we want to allocate JIT data always adjacent to code similar to what native
11511170
// compiler does.
11521171
// This way allows us to use a single `ldr` to access such data like float constant/jmp table.
11531172
// For LoongArch64 using `pcaddi + ld` to access such data.
11541173

1155-
UNATIVE_OFFSET roDataAlignmentDelta = 0;
1156-
if (args->roDataSize > 0)
1174+
for (unsigned i = 0; i < numDataChunks; i++)
11571175
{
1158-
roDataAlignmentDelta = AlignmentPad(args->hotCodeSize, roDataSectionAlignment);
1176+
// Increase size of the hot code chunk and store offset in data chunk
1177+
AllocMemChunk& codeChunk = chunks.BottomRef(0);
1178+
1179+
codeChunk.size = AlignUp(codeChunk.size, dataChunks[i].alignment);
1180+
dataChunks[i].block = (uint8_t*)(uintptr_t)codeChunk.size;
1181+
dataChunks[i].blockRW = (uint8_t*)(uintptr_t)codeChunk.size;
1182+
codeChunk.size += dataChunks[i].size;
1183+
1184+
codeChunk.alignment = max(codeChunk.alignment, dataChunks[i].alignment);
11591185
}
11601186

1161-
const UNATIVE_OFFSET roDataOffset = args->hotCodeSize + roDataAlignmentDelta;
1162-
args->hotCodeSize = roDataOffset + args->roDataSize;
1163-
args->roDataSize = 0;
1187+
#else
1188+
1189+
for (unsigned i = 0; i < numDataChunks; i++)
1190+
{
1191+
chunks.Push(dataChunks[i]);
1192+
}
11641193

11651194
#endif // defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
11661195

1167-
info.compCompHnd->allocMem(args);
1196+
AllocMemArgs args;
1197+
args.chunks = chunks.Data();
1198+
args.chunksCount = (unsigned)chunks.Height();
1199+
args.xcptnsCount = numExceptions;
1200+
1201+
info.compCompHnd->allocMem(&args);
1202+
1203+
codeChunk.block = chunks.BottomRef(0).block;
1204+
codeChunk.blockRW = chunks.BottomRef(0).blockRW;
1205+
1206+
if (coldCodeChunkIndex != -1)
1207+
{
1208+
coldCodeChunk->block = chunks.BottomRef(coldCodeChunkIndex).block;
1209+
coldCodeChunk->blockRW = chunks.BottomRef(coldCodeChunkIndex).blockRW;
1210+
}
11681211

11691212
#ifdef DEBUG
11701213

1171-
if (JitConfig.JitFakeProcedureSplitting() && (coldCodeOffset > 0))
1214+
if (JitConfig.JitFakeProcedureSplitting() && (coldCodeChunk != nullptr))
11721215
{
11731216
// Fix up cold code pointers. Cold section is adjacent to hot section.
1174-
assert(args->coldCodeBlock == nullptr);
1175-
assert(args->coldCodeBlockRW == nullptr);
1176-
args->coldCodeBlock = ((BYTE*)args->hotCodeBlock) + coldCodeOffset;
1177-
args->coldCodeBlockRW = ((BYTE*)args->hotCodeBlockRW) + coldCodeOffset;
1217+
assert(coldCodeChunk != nullptr);
1218+
coldCodeChunk->block = codeChunk.block + (uintptr_t)coldCodeChunk->block;
1219+
coldCodeChunk->blockRW = codeChunk.blockRW + (uintptr_t)coldCodeChunk->blockRW;
11781220
}
11791221

11801222
#endif // DEBUG
11811223

1224+
int curDataChunk = firstDataChunk;
1225+
11821226
#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
11831227

11841228
// Fix up data section pointers.
1185-
assert(args->roDataBlock == nullptr);
1186-
assert(args->roDataBlockRW == nullptr);
1187-
args->roDataBlock = ((BYTE*)args->hotCodeBlock) + roDataOffset;
1188-
args->roDataBlockRW = ((BYTE*)args->hotCodeBlockRW) + roDataOffset;
1229+
for (unsigned i = 0; i < numDataChunks; i++)
1230+
{
1231+
dataChunks[i].block = codeChunk.block + (size_t)dataChunks[i].block;
1232+
dataChunks[i].blockRW = codeChunk.blockRW + (size_t)dataChunks[i].blockRW;
1233+
}
1234+
1235+
#else
1236+
1237+
for (unsigned i = 0; i < numDataChunks; i++)
1238+
{
1239+
dataChunks[i].block = chunks.BottomRef(curDataChunk).block;
1240+
dataChunks[i].blockRW = chunks.BottomRef(curDataChunk).blockRW;
1241+
curDataChunk++;
1242+
}
11891243

11901244
#endif // defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
11911245
}

0 commit comments

Comments
 (0)