Skip to content

Commit 8527a99

Browse files
authored
Incomplete read ordering in CastCache::Get (#35597)
* Incomplete read ordering in CastCache::Get * Use LoadBarrier on the managed side as well. * Rename to ReadMemoryBarrier * PR feedback in the JIT
1 parent 6d40a8f commit 8527a99

File tree

17 files changed

+110
-40
lines changed

17 files changed

+110
-40
lines changed

src/coreclr/src/System.Private.CoreLib/src/System/Runtime/CompilerServices/CastHelpers.cs

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ private static CastResult TryGet(nuint source, nuint target)
113113
{
114114
ref CastCacheEntry pEntry = ref Element(ref tableData, index);
115115

116-
// must read in this order: version -> entry parts -> version
116+
// must read in this order: version -> [entry parts] -> version
117117
// if version is odd or changes, the entry is inconsistent and thus ignored
118118
int version = Volatile.Read(ref pEntry._version);
119119
nuint entrySource = pEntry._source;
@@ -124,12 +124,19 @@ private static CastResult TryGet(nuint source, nuint target)
124124

125125
if (entrySource == source)
126126
{
127-
nuint entryTargetAndResult = Volatile.Read(ref pEntry._targetAndResult);
127+
nuint entryTargetAndResult = pEntry._targetAndResult;
128128
// target never has its lower bit set.
129129
// a matching entryTargetAndResult would the have same bits, except for the lowest one, which is the result.
130130
entryTargetAndResult ^= target;
131131
if (entryTargetAndResult <= 1)
132132
{
133+
// make sure 'version' is loaded after 'source' and 'targetAndResults'
134+
//
135+
// We can either:
136+
// - use acquires for both _source and _targetAndResults or
137+
// - issue a load barrier before reading _version
138+
// benchmarks on available hardware show that use of a read barrier is cheaper.
139+
Interlocked.ReadMemoryBarrier();
133140
if (version != pEntry._version)
134141
{
135142
// oh, so close, the entry is in inconsistent state.

src/coreclr/src/System.Private.CoreLib/src/System/Threading/Interlocked.CoreCLR.cs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,15 @@ public static long Read(ref long location) =>
232232
[MethodImpl(MethodImplOptions.InternalCall)]
233233
public static extern void MemoryBarrier();
234234

235+
/// <summary>
236+
/// Synchronizes memory access as follows:
237+
/// The processor that executes the current thread cannot reorder instructions in such a way that memory reads before
238+
/// the call to <see cref="ReadMemoryBarrier"/> execute after memory accesses that follow the call to <see cref="ReadMemoryBarrier"/>.
239+
/// </summary>
240+
[Intrinsic]
241+
[MethodImpl(MethodImplOptions.InternalCall)]
242+
internal static extern void ReadMemoryBarrier();
243+
235244
[DllImport(RuntimeHelpers.QCall, CharSet = CharSet.Unicode)]
236245
private static extern void _MemoryBarrierProcessWide();
237246

src/coreclr/src/inc/corinfo.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -947,6 +947,7 @@ enum CorInfoIntrinsics
947947
CORINFO_INTRINSIC_InterlockedCmpXchg32,
948948
CORINFO_INTRINSIC_InterlockedCmpXchg64,
949949
CORINFO_INTRINSIC_MemoryBarrier,
950+
CORINFO_INTRINSIC_MemoryBarrierLoad,
950951
CORINFO_INTRINSIC_GetCurrentManagedThread,
951952
CORINFO_INTRINSIC_GetManagedThreadId,
952953
CORINFO_INTRINSIC_ByReference_Ctor,

src/coreclr/src/inc/volatile.h

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,27 @@ void VolatileStoreWithoutBarrier(T* pt, T val)
286286
#endif
287287
}
288288

289+
//
290+
// Memory ordering barrier that waits for loads in progress to complete.
291+
// Any effects of loads or stores that appear after, in program order, will "happen after" relative to this.
292+
// Other operations such as computation or instruction prefetch are not affected.
293+
//
294+
// Architectural mapping:
295+
// arm64 : dmb ishld
296+
// arm : dmb ish
297+
// x86/64 : compiler fence
298+
inline
299+
void VolatileLoadBarrier()
300+
{
301+
#if defined(HOST_ARM64) && defined(__GNUC__)
302+
asm volatile ("dmb ishld" : : : "memory");
303+
#elif defined(HOST_ARM64) && defined(_MSC_VER)
304+
__dmb(_ARM64_BARRIER_ISHLD);
305+
#else
306+
VOLATILE_MEMORY_BARRIER();
307+
#endif
308+
}
309+
289310
//
290311
// Volatile<T> implements accesses with our volatile semantics over a variable of type T.
291312
// Wherever you would have used a "volatile Foo" or, equivalently, "Foo volatile", use Volatile<Foo>

src/coreclr/src/jit/codegen.h

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1465,11 +1465,13 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
14651465

14661466
void instGen_Return(unsigned stkArgSize);
14671467

1468-
#ifdef TARGET_ARM64
1469-
void instGen_MemoryBarrier(insBarrier barrierType = INS_BARRIER_ISH);
1470-
#else
1471-
void instGen_MemoryBarrier();
1472-
#endif
1468+
enum BarrierKind
1469+
{
1470+
BARRIER_FULL, // full barrier
1471+
BARRIER_LOAD_ONLY, // load barier
1472+
};
1473+
1474+
void instGen_MemoryBarrier(BarrierKind barrierKind = BARRIER_FULL);
14731475

14741476
void instGen_Set_Reg_To_Zero(emitAttr size, regNumber reg, insFlags flags = INS_FLAGS_DONT_CARE);
14751477

src/coreclr/src/jit/codegenarm64.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2665,8 +2665,8 @@ void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode)
26652665

26662666
if (cpObjNode->gtFlags & GTF_BLK_VOLATILE)
26672667
{
2668-
// issue a INS_BARRIER_ISHLD after a volatile CpObj operation
2669-
instGen_MemoryBarrier(INS_BARRIER_ISHLD);
2668+
// issue a load barrier after a volatile CpObj operation
2669+
instGen_MemoryBarrier(BARRIER_LOAD_ONLY);
26702670
}
26712671

26722672
// Clear the gcInfo for REG_WRITE_BARRIER_SRC_BYREF and REG_WRITE_BARRIER_DST_BYREF.
@@ -2775,7 +2775,7 @@ void CodeGen::genLockedInstructions(GenTreeOp* treeNode)
27752775
assert(!"Unexpected treeNode->gtOper");
27762776
}
27772777

2778-
instGen_MemoryBarrier(INS_BARRIER_ISH);
2778+
instGen_MemoryBarrier();
27792779
}
27802780
else
27812781
{
@@ -2855,7 +2855,7 @@ void CodeGen::genLockedInstructions(GenTreeOp* treeNode)
28552855

28562856
GetEmitter()->emitIns_J_R(INS_cbnz, EA_4BYTE, labelRetry, exResultReg);
28572857

2858-
instGen_MemoryBarrier(INS_BARRIER_ISH);
2858+
instGen_MemoryBarrier();
28592859

28602860
gcInfo.gcMarkRegSetNpt(addr->gtGetRegMask());
28612861
}
@@ -2904,7 +2904,7 @@ void CodeGen::genCodeForCmpXchg(GenTreeCmpXchg* treeNode)
29042904
}
29052905
GetEmitter()->emitIns_R_R_R(INS_casal, dataSize, targetReg, dataReg, addrReg);
29062906

2907-
instGen_MemoryBarrier(INS_BARRIER_ISH);
2907+
instGen_MemoryBarrier();
29082908
}
29092909
else
29102910
{
@@ -2984,7 +2984,7 @@ void CodeGen::genCodeForCmpXchg(GenTreeCmpXchg* treeNode)
29842984

29852985
genDefineTempLabel(labelCompareFail);
29862986

2987-
instGen_MemoryBarrier(INS_BARRIER_ISH);
2987+
instGen_MemoryBarrier();
29882988

29892989
gcInfo.gcMarkRegSetNpt(addr->gtGetRegMask());
29902990
}

src/coreclr/src/jit/codegenarmarch.cpp

Lines changed: 14 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -398,8 +398,13 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode)
398398
break;
399399

400400
case GT_MEMORYBARRIER:
401-
instGen_MemoryBarrier();
401+
{
402+
CodeGen::BarrierKind barrierKind =
403+
treeNode->gtFlags & GTF_MEMORYBARRIER_LOAD ? BARRIER_LOAD_ONLY : BARRIER_FULL;
404+
405+
instGen_MemoryBarrier(barrierKind);
402406
break;
407+
}
403408

404409
#ifdef TARGET_ARM64
405410
case GT_XCHG:
@@ -1944,11 +1949,9 @@ void CodeGen::genCodeForIndir(GenTreeIndir* tree)
19441949

19451950
if (emitBarrier)
19461951
{
1947-
#ifdef TARGET_ARM64
1948-
instGen_MemoryBarrier(INS_BARRIER_ISHLD);
1949-
#else
1950-
instGen_MemoryBarrier();
1951-
#endif
1952+
// when INS_ldar* could not be used for a volatile load,
1953+
// we use an ordinary load followed by a load barrier.
1954+
instGen_MemoryBarrier(BARRIER_LOAD_ONLY);
19521955
}
19531956

19541957
genProduceReg(tree);
@@ -1980,13 +1983,8 @@ void CodeGen::genCodeForCpBlkHelper(GenTreeBlk* cpBlkNode)
19801983

19811984
if (cpBlkNode->gtFlags & GTF_BLK_VOLATILE)
19821985
{
1983-
#ifdef TARGET_ARM64
1984-
// issue a INS_BARRIER_ISHLD after a volatile CpBlk operation
1985-
instGen_MemoryBarrier(INS_BARRIER_ISHLD);
1986-
#else
1987-
// issue a full memory barrier after a volatile CpBlk operation
1988-
instGen_MemoryBarrier();
1989-
#endif // TARGET_ARM64
1986+
// issue a load barrier after a volatile CpBlk operation
1987+
instGen_MemoryBarrier(BARRIER_LOAD_ONLY);
19901988
}
19911989
}
19921990

@@ -2207,6 +2205,7 @@ void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* node)
22072205

22082206
if (node->IsVolatile())
22092207
{
2208+
// issue a full memory barrier before a volatile CpBlk operation
22102209
instGen_MemoryBarrier();
22112210
}
22122211

@@ -2304,11 +2303,8 @@ void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* node)
23042303

23052304
if (node->IsVolatile())
23062305
{
2307-
#ifdef TARGET_ARM64
2308-
instGen_MemoryBarrier(INS_BARRIER_ISHLD);
2309-
#else
2310-
instGen_MemoryBarrier();
2311-
#endif
2306+
// issue a load barrier after a volatile CpBlk operation
2307+
instGen_MemoryBarrier(BARRIER_LOAD_ONLY);
23122308
}
23132309
}
23142310

src/coreclr/src/jit/codegenxarch.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1862,8 +1862,13 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode)
18621862
break;
18631863

18641864
case GT_MEMORYBARRIER:
1865-
instGen_MemoryBarrier();
1865+
{
1866+
CodeGen::BarrierKind barrierKind =
1867+
treeNode->gtFlags & GTF_MEMORYBARRIER_LOAD ? BARRIER_LOAD_ONLY : BARRIER_FULL;
1868+
1869+
instGen_MemoryBarrier(barrierKind);
18661870
break;
1871+
}
18671872

18681873
case GT_CMPXCHG:
18691874
genCodeForCmpXchg(treeNode->AsCmpXchg());

src/coreclr/src/jit/gentree.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -814,6 +814,8 @@ struct GenTree
814814
#define GTF_CALL_POP_ARGS 0x04000000 // GT_CALL -- caller pop arguments?
815815
#define GTF_CALL_HOISTABLE 0x02000000 // GT_CALL -- call is hoistable
816816

817+
#define GTF_MEMORYBARRIER_LOAD 0x40000000 // GT_MEMORYBARRIER -- Load barrier
818+
817819
#define GTF_NOP_DEATH 0x40000000 // GT_NOP -- operand dies here
818820

819821
#define GTF_FLD_VOLATILE 0x40000000 // GT_FIELD/GT_CLS_VAR -- same as GTF_IND_VOLATILE

src/coreclr/src/jit/importer.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3627,11 +3627,20 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis,
36273627
#endif // defined(TARGET_XARCH) || defined(TARGET_ARM64)
36283628

36293629
case CORINFO_INTRINSIC_MemoryBarrier:
3630+
case CORINFO_INTRINSIC_MemoryBarrierLoad:
36303631

36313632
assert(sig->numArgs == 0);
36323633

36333634
op1 = new (this, GT_MEMORYBARRIER) GenTree(GT_MEMORYBARRIER, TYP_VOID);
36343635
op1->gtFlags |= GTF_GLOB_REF | GTF_ASG;
3636+
3637+
// On XARCH `CORINFO_INTRINSIC_MemoryBarrierLoad` fences need not be emitted.
3638+
// However, we still need to capture the effect on reordering.
3639+
if (intrinsicID == CORINFO_INTRINSIC_MemoryBarrierLoad)
3640+
{
3641+
op1->gtFlags |= GTF_MEMORYBARRIER_LOAD;
3642+
}
3643+
36353644
retNode = op1;
36363645
break;
36373646

0 commit comments

Comments
 (0)