Skip to content

Implement struct promotion for incoming multireg structs #13417

@erozenfeld

Description

@erozenfeld

We are currently not promoting incoming multireg structs: https://github.com/dotnet/coreclr/blob/9479f67577bbb02ea611777b00308f42252fb2bc/src/jit/lclvars.cpp#L1914-L1926

Example derived from the discussion in dotnet/corefx#40998:

using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;

public class BoundsCheck
{
    public static int Main()
    {
        ReadOnlySpan<byte> span = new ReadOnlySpan<byte>(new byte[7]);
        return (int)GetKey(span) + 100;
    }

    [MethodImpl(MethodImplOptions.NoInlining)]
    public static ulong GetKey(ReadOnlySpan<byte> propertyName)
    {
        const int BitsInByte = 8;
        ulong key = 0;
        int length = propertyName.Length;

        if (length > 3)
        {
            key = MemoryMarshal.Read<uint>(propertyName);

            if (length == 7)
            {
                key |= (ulong)propertyName[6] << (6 * BitsInByte)
                    | (ulong)propertyName[5] << (5 * BitsInByte)
                    | (ulong)propertyName[4] << (4 * BitsInByte)
                    | (ulong)7 << (7 * BitsInByte);
            }
        }

        return key;
    }
}

On Windows x64 the struct is promoted and we eliminate bounds checks:

asm
; Assembly listing for method BoundsCheck:GetKey(struct):long
; Emitting BLENDED_CODE for X64 CPU with AVX - Windows
; optimized code
; rsp based frame
; partially interruptible
; Final local variable assignments
;
;  V00 arg0         [V00,T00] (  4,  8   )   byref  ->  rcx         ld-addr-op
;  V01 loc0         [V01,T02] (  5,  3.50)    long  ->  rdx
;  V02 loc1         [V02,T03] (  3,  2.25)     int  ->   r8
;  V03 OutArgs      [V03    ] (  1,  1   )  lclBlk (32) [rsp+0x00]   "OutgoingArgSpace"
;* V04 tmp1         [V04    ] (  0,  0   )  struct (16) zero-ref    ld-addr-op "Inlining Arg"
;* V05 tmp2         [V05    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
;* V06 tmp3         [V06    ] (  0,  0   )  struct (16) zero-ref    ld-addr-op "Inlining Arg"
;* V07 tmp4         [V07,T05] (  0,  0   )   byref  ->  zero-ref    "Inlining Arg"
;  V08 tmp5         [V08,T01] (  5,  3   )   byref  ->  rax         V14._pointer(offs=0x00) P-INDEP "field V00._pointer (fldOffset=0x0)"
;  V09 tmp6         [V09,T04] (  3,  2.50)     int  ->  rcx         V14._length(offs=0x08) P-INDEP "field V00._length (fldOffset=0x8)"
;* V10 tmp7         [V10,T07] (  0,  0   )   byref  ->  zero-ref    V04._pointer(offs=0x00) P-INDEP "field V04._pointer (fldOffset=0x0)"
;* V11 tmp8         [V11,T08] (  0,  0   )     int  ->  zero-ref    V04._length(offs=0x08) P-INDEP "field V04._length (fldOffset=0x8)"
;* V12 tmp9         [V12,T06] (  0,  0   )   byref  ->  zero-ref    V06._pointer(offs=0x00) P-INDEP "field V06._pointer (fldOffset=0x0)"
;* V13 tmp10        [V13    ] (  0,  0   )     int  ->  zero-ref    V06._length(offs=0x08) P-INDEP "field V06._length (fldOffset=0x8)"
;* V14 tmp11        [V14    ] (  0,  0   )  struct (16) zero-ref    "Promoted implicit byref"
;
; Lcl frame size = 40

G_M58243_IG01:
       4883EC28             sub      rsp, 40
       90                   nop

G_M58243_IG02:
       488B01               mov      rax, bword ptr [rcx]
       8B4908               mov      ecx, dword ptr [rcx+8]
       33D2                 xor      rdx, rdx
       448BC1               mov      r8d, ecx
       4183F803             cmp      r8d, 3
       7E3B                 jle      SHORT G_M58243_IG04
       4183F804             cmp      r8d, 4
       7C3D                 jl       SHORT G_M58243_IG06

G_M58243_IG03:
       8B10                 mov      edx, dword ptr [rax]
       83F907               cmp      ecx, 7
       752E                 jne      SHORT G_M58243_IG04
       0FB64806             movzx    rcx, byte  ptr [rax+6]    ; no bounds check
       48C1E130             shl      rcx, 48
       480BD1               or       rdx, rcx
       0FB64805             movzx    rcx, byte  ptr [rax+5]    ; no bounds check
       48C1E128             shl      rcx, 40
       480BD1               or       rdx, rcx
       0FB64004             movzx    rax, byte  ptr [rax+4]    ; no bounds check
       48C1E020             shl      rax, 32
       480BD0               or       rdx, rax
       48B80000000000000007 mov      rax, 0x700000000000000
       480BD0               or       rdx, rax

G_M58243_IG04:
       488BC2               mov      rax, rdx

G_M58243_IG05:
       4883C428             add      rsp, 40
       C3                   ret

G_M58243_IG06:
       B928000000           mov      ecx, 40
       E84DFEFFFF           call     System.ThrowHelper:ThrowArgumentOutOfRangeException(int)
       CC                   int3

; Total bytes of code 100, prolog size 5 for method BoundsCheck:GetKey(struct):long
; ============================================================

On Linux x64 we don't promote the struct (since it's an incoming multireg struct) and don't eliminate bounds checks:

asm
; Assembly listing for method BoundsCheck:GetKey(struct):long
; Emitting BLENDED_CODE for X64 CPU with AVX - Unix
; optimized code
; rbp based frame
; partially interruptible
; Final local variable assignments
;
;  V00 arg0         [V00    ] ( 10,  6.25)  struct (16) [rbp-0x10]   do-not-enreg[XSFB] addr-exposed ld-addr-op
;  V01 loc0         [V01,T00] (  5,  3.50)    long  ->  rax
;  V02 loc1         [V02,T01] (  3,  2.50)     int  ->  rdi
;# V03 OutArgs      [V03    ] (  1,  1   )  lclBlk ( 0) [rsp+0x00]   "OutgoingArgSpace"
;* V04 tmp1         [V04    ] (  0,  0   )  struct (16) zero-ref    ld-addr-op "Inlining Arg"
;* V05 tmp2         [V05    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
;* V06 tmp3         [V06    ] (  0,  0   )  struct (16) zero-ref    ld-addr-op "Inlining Arg"
;  V07 tmp4         [V07,T02] (  2,  2   )   byref  ->  rsi         "Inlining Arg"
;  V08 tmp5         [V08,T05] (  2,  0.75)   byref  ->  rsi         V04._pointer(offs=0x00) P-INDEP "field V04._pointer (fldOffset=0x0)"
;  V09 tmp6         [V09,T06] (  2,  0.50)     int  ->  rax         V04._length(offs=0x08) P-INDEP "field V04._length (fldOffset=0x8)"
;  V10 tmp7         [V10,T04] (  2,  1   )   byref  ->  rsi         V06._pointer(offs=0x00) P-INDEP "field V06._pointer (fldOffset=0x0)"
;* V11 tmp8         [V11    ] (  0,  0   )     int  ->  zero-ref    V06._length(offs=0x08) P-INDEP "field V06._length (fldOffset=0x8)"
;  V12 tmp9         [V12,T03] (  3,  1.50)   byref  ->  rax         "BlockOp address local"
;
; Lcl frame size = 16

G_M58243_IG01:
       55                   push     rbp
       4883EC10             sub      rsp, 16
       488D6C2410           lea      rbp, [rsp+10H]
       48897DF0             mov      bword ptr [rbp-10H], rdi
       488975F8             mov      qword ptr [rbp-08H], rsi

G_M58243_IG02:
       33C0                 xor      rax, rax
       8B7DF8               mov      edi, dword ptr [rbp-08H]
       83FF03               cmp      edi, 3
       7E65                 jle      SHORT G_M58243_IG04
       488D45F0             lea      rax, bword ptr [rbp-10H]
       488B30               mov      rsi, bword ptr [rax]
       8B4008               mov      eax, dword ptr [rax+8]
       83F804               cmp      eax, 4
       7C5C                 jl       SHORT G_M58243_IG05

G_M58243_IG03:
       8B06                 mov      eax, dword ptr [rsi]
       83FF07               cmp      edi, 7
       754F                 jne      SHORT G_M58243_IG04
       837DF806             cmp      dword ptr [rbp-08H], 6      ; bounds check
       765A                 jbe      SHORT G_M58243_IG06
       488B7DF0             mov      rdi, bword ptr [rbp-10H]
       400FB67F06           movzx    rdi, byte  ptr [rdi+6]
       48C1E730             shl      rdi, 48
       480BC7               or       rax, rdi
       837DF805             cmp      dword ptr [rbp-08H], 5      ; bounds check
       7644                 jbe      SHORT G_M58243_IG06
       488B7DF0             mov      rdi, bword ptr [rbp-10H]
       400FB67F05           movzx    rdi, byte  ptr [rdi+5]
       48C1E728             shl      rdi, 40
       480BC7               or       rax, rdi
       837DF804             cmp      dword ptr [rbp-08H], 4       ; bounds check
       762E                 jbe      SHORT G_M58243_IG06
       488B7DF0             mov      rdi, bword ptr [rbp-10H]
       400FB67F04           movzx    rdi, byte  ptr [rdi+4]
       48C1E720             shl      rdi, 32
       480BC7               or       rax, rdi
       48BF0000000000000007 mov      rdi, 0x700000000000000
       480BC7               or       rax, rdi

G_M58243_IG04:
       488D6500             lea      rsp, [rbp]
       5D                   pop      rbp
       C3                   ret

G_M58243_IG05:
       BF28000000           mov      edi, 40
       E82FFAFFFF           call     System.ThrowHelper:ThrowArgumentOutOfRangeException(int)
       CC                   int3

G_M58243_IG06:
       E8990B3679           call     CORINFO_HELP_RNGCHKFAIL
       CC                   int3

; Total bytes of code 152, prolog size 10 for method BoundsCheck:GetKey(struct):long
; ============================================================

category:cq
theme:structs
skill-level:expert
cost:large

Metadata

Metadata

Assignees

Labels

area-CodeGen-coreclrCLR JIT compiler in src/coreclr/src/jit and related components such as SuperPMIoptimizationos-linuxLinux OS (any supported distro)

Type

No type

Projects

No projects

Milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions