-
Notifications
You must be signed in to change notification settings - Fork 5.3k
Closed
Labels
area-CodeGen-coreclrCLR JIT compiler in src/coreclr/src/jit and related components such as SuperPMICLR JIT compiler in src/coreclr/src/jit and related components such as SuperPMIoptimizationos-linuxLinux OS (any supported distro)Linux OS (any supported distro)
Milestone
Description
We are currently not promoting incoming multireg structs: https://github.com/dotnet/coreclr/blob/9479f67577bbb02ea611777b00308f42252fb2bc/src/jit/lclvars.cpp#L1914-L1926
Example derived from the discussion in dotnet/corefx#40998:
using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
public class BoundsCheck
{
public static int Main()
{
ReadOnlySpan<byte> span = new ReadOnlySpan<byte>(new byte[7]);
return (int)GetKey(span) + 100;
}
[MethodImpl(MethodImplOptions.NoInlining)]
public static ulong GetKey(ReadOnlySpan<byte> propertyName)
{
const int BitsInByte = 8;
ulong key = 0;
int length = propertyName.Length;
if (length > 3)
{
key = MemoryMarshal.Read<uint>(propertyName);
if (length == 7)
{
key |= (ulong)propertyName[6] << (6 * BitsInByte)
| (ulong)propertyName[5] << (5 * BitsInByte)
| (ulong)propertyName[4] << (4 * BitsInByte)
| (ulong)7 << (7 * BitsInByte);
}
}
return key;
}
}
On Windows x64 the struct is promoted and we eliminate bounds checks:
asm
; Assembly listing for method BoundsCheck:GetKey(struct):long
; Emitting BLENDED_CODE for X64 CPU with AVX - Windows
; optimized code
; rsp based frame
; partially interruptible
; Final local variable assignments
;
; V00 arg0 [V00,T00] ( 4, 8 ) byref -> rcx ld-addr-op
; V01 loc0 [V01,T02] ( 5, 3.50) long -> rdx
; V02 loc1 [V02,T03] ( 3, 2.25) int -> r8
; V03 OutArgs [V03 ] ( 1, 1 ) lclBlk (32) [rsp+0x00] "OutgoingArgSpace"
;* V04 tmp1 [V04 ] ( 0, 0 ) struct (16) zero-ref ld-addr-op "Inlining Arg"
;* V05 tmp2 [V05 ] ( 0, 0 ) int -> zero-ref "impAppendStmt"
;* V06 tmp3 [V06 ] ( 0, 0 ) struct (16) zero-ref ld-addr-op "Inlining Arg"
;* V07 tmp4 [V07,T05] ( 0, 0 ) byref -> zero-ref "Inlining Arg"
; V08 tmp5 [V08,T01] ( 5, 3 ) byref -> rax V14._pointer(offs=0x00) P-INDEP "field V00._pointer (fldOffset=0x0)"
; V09 tmp6 [V09,T04] ( 3, 2.50) int -> rcx V14._length(offs=0x08) P-INDEP "field V00._length (fldOffset=0x8)"
;* V10 tmp7 [V10,T07] ( 0, 0 ) byref -> zero-ref V04._pointer(offs=0x00) P-INDEP "field V04._pointer (fldOffset=0x0)"
;* V11 tmp8 [V11,T08] ( 0, 0 ) int -> zero-ref V04._length(offs=0x08) P-INDEP "field V04._length (fldOffset=0x8)"
;* V12 tmp9 [V12,T06] ( 0, 0 ) byref -> zero-ref V06._pointer(offs=0x00) P-INDEP "field V06._pointer (fldOffset=0x0)"
;* V13 tmp10 [V13 ] ( 0, 0 ) int -> zero-ref V06._length(offs=0x08) P-INDEP "field V06._length (fldOffset=0x8)"
;* V14 tmp11 [V14 ] ( 0, 0 ) struct (16) zero-ref "Promoted implicit byref"
;
; Lcl frame size = 40
G_M58243_IG01:
4883EC28 sub rsp, 40
90 nop
G_M58243_IG02:
488B01 mov rax, bword ptr [rcx]
8B4908 mov ecx, dword ptr [rcx+8]
33D2 xor rdx, rdx
448BC1 mov r8d, ecx
4183F803 cmp r8d, 3
7E3B jle SHORT G_M58243_IG04
4183F804 cmp r8d, 4
7C3D jl SHORT G_M58243_IG06
G_M58243_IG03:
8B10 mov edx, dword ptr [rax]
83F907 cmp ecx, 7
752E jne SHORT G_M58243_IG04
0FB64806 movzx rcx, byte ptr [rax+6] ; no bounds check
48C1E130 shl rcx, 48
480BD1 or rdx, rcx
0FB64805 movzx rcx, byte ptr [rax+5] ; no bounds check
48C1E128 shl rcx, 40
480BD1 or rdx, rcx
0FB64004 movzx rax, byte ptr [rax+4] ; no bounds check
48C1E020 shl rax, 32
480BD0 or rdx, rax
48B80000000000000007 mov rax, 0x700000000000000
480BD0 or rdx, rax
G_M58243_IG04:
488BC2 mov rax, rdx
G_M58243_IG05:
4883C428 add rsp, 40
C3 ret
G_M58243_IG06:
B928000000 mov ecx, 40
E84DFEFFFF call System.ThrowHelper:ThrowArgumentOutOfRangeException(int)
CC int3
; Total bytes of code 100, prolog size 5 for method BoundsCheck:GetKey(struct):long
; ============================================================On Linux x64 we don't promote the struct (since it's an incoming multireg struct) and don't eliminate bounds checks:
asm
; Assembly listing for method BoundsCheck:GetKey(struct):long
; Emitting BLENDED_CODE for X64 CPU with AVX - Unix
; optimized code
; rbp based frame
; partially interruptible
; Final local variable assignments
;
; V00 arg0 [V00 ] ( 10, 6.25) struct (16) [rbp-0x10] do-not-enreg[XSFB] addr-exposed ld-addr-op
; V01 loc0 [V01,T00] ( 5, 3.50) long -> rax
; V02 loc1 [V02,T01] ( 3, 2.50) int -> rdi
;# V03 OutArgs [V03 ] ( 1, 1 ) lclBlk ( 0) [rsp+0x00] "OutgoingArgSpace"
;* V04 tmp1 [V04 ] ( 0, 0 ) struct (16) zero-ref ld-addr-op "Inlining Arg"
;* V05 tmp2 [V05 ] ( 0, 0 ) int -> zero-ref "impAppendStmt"
;* V06 tmp3 [V06 ] ( 0, 0 ) struct (16) zero-ref ld-addr-op "Inlining Arg"
; V07 tmp4 [V07,T02] ( 2, 2 ) byref -> rsi "Inlining Arg"
; V08 tmp5 [V08,T05] ( 2, 0.75) byref -> rsi V04._pointer(offs=0x00) P-INDEP "field V04._pointer (fldOffset=0x0)"
; V09 tmp6 [V09,T06] ( 2, 0.50) int -> rax V04._length(offs=0x08) P-INDEP "field V04._length (fldOffset=0x8)"
; V10 tmp7 [V10,T04] ( 2, 1 ) byref -> rsi V06._pointer(offs=0x00) P-INDEP "field V06._pointer (fldOffset=0x0)"
;* V11 tmp8 [V11 ] ( 0, 0 ) int -> zero-ref V06._length(offs=0x08) P-INDEP "field V06._length (fldOffset=0x8)"
; V12 tmp9 [V12,T03] ( 3, 1.50) byref -> rax "BlockOp address local"
;
; Lcl frame size = 16
G_M58243_IG01:
55 push rbp
4883EC10 sub rsp, 16
488D6C2410 lea rbp, [rsp+10H]
48897DF0 mov bword ptr [rbp-10H], rdi
488975F8 mov qword ptr [rbp-08H], rsi
G_M58243_IG02:
33C0 xor rax, rax
8B7DF8 mov edi, dword ptr [rbp-08H]
83FF03 cmp edi, 3
7E65 jle SHORT G_M58243_IG04
488D45F0 lea rax, bword ptr [rbp-10H]
488B30 mov rsi, bword ptr [rax]
8B4008 mov eax, dword ptr [rax+8]
83F804 cmp eax, 4
7C5C jl SHORT G_M58243_IG05
G_M58243_IG03:
8B06 mov eax, dword ptr [rsi]
83FF07 cmp edi, 7
754F jne SHORT G_M58243_IG04
837DF806 cmp dword ptr [rbp-08H], 6 ; bounds check
765A jbe SHORT G_M58243_IG06
488B7DF0 mov rdi, bword ptr [rbp-10H]
400FB67F06 movzx rdi, byte ptr [rdi+6]
48C1E730 shl rdi, 48
480BC7 or rax, rdi
837DF805 cmp dword ptr [rbp-08H], 5 ; bounds check
7644 jbe SHORT G_M58243_IG06
488B7DF0 mov rdi, bword ptr [rbp-10H]
400FB67F05 movzx rdi, byte ptr [rdi+5]
48C1E728 shl rdi, 40
480BC7 or rax, rdi
837DF804 cmp dword ptr [rbp-08H], 4 ; bounds check
762E jbe SHORT G_M58243_IG06
488B7DF0 mov rdi, bword ptr [rbp-10H]
400FB67F04 movzx rdi, byte ptr [rdi+4]
48C1E720 shl rdi, 32
480BC7 or rax, rdi
48BF0000000000000007 mov rdi, 0x700000000000000
480BC7 or rax, rdi
G_M58243_IG04:
488D6500 lea rsp, [rbp]
5D pop rbp
C3 ret
G_M58243_IG05:
BF28000000 mov edi, 40
E82FFAFFFF call System.ThrowHelper:ThrowArgumentOutOfRangeException(int)
CC int3
G_M58243_IG06:
E8990B3679 call CORINFO_HELP_RNGCHKFAIL
CC int3
; Total bytes of code 152, prolog size 10 for method BoundsCheck:GetKey(struct):long
; ============================================================category:cq
theme:structs
skill-level:expert
cost:large
Metadata
Metadata
Assignees
Labels
area-CodeGen-coreclrCLR JIT compiler in src/coreclr/src/jit and related components such as SuperPMICLR JIT compiler in src/coreclr/src/jit and related components such as SuperPMIoptimizationos-linuxLinux OS (any supported distro)Linux OS (any supported distro)