diff --git a/src/coreclr/inc/corinfoinstructionset.h b/src/coreclr/inc/corinfoinstructionset.h index 75ab11d7aeab73..5b7ec3f559c256 100644 --- a/src/coreclr/inc/corinfoinstructionset.h +++ b/src/coreclr/inc/corinfoinstructionset.h @@ -27,14 +27,15 @@ enum CORINFO_InstructionSet InstructionSet_Atomics=9, InstructionSet_Vector64=10, InstructionSet_Vector128=11, - InstructionSet_ArmBase_Arm64=12, - InstructionSet_AdvSimd_Arm64=13, - InstructionSet_Aes_Arm64=14, - InstructionSet_Crc32_Arm64=15, - InstructionSet_Dp_Arm64=16, - InstructionSet_Rdm_Arm64=17, - InstructionSet_Sha1_Arm64=18, - InstructionSet_Sha256_Arm64=19, + InstructionSet_Dczva=12, + InstructionSet_ArmBase_Arm64=13, + InstructionSet_AdvSimd_Arm64=14, + InstructionSet_Aes_Arm64=15, + InstructionSet_Crc32_Arm64=16, + InstructionSet_Dp_Arm64=17, + InstructionSet_Rdm_Arm64=18, + InstructionSet_Sha1_Arm64=19, + InstructionSet_Sha256_Arm64=20, #endif // TARGET_ARM64 #ifdef TARGET_AMD64 InstructionSet_X86Base=1, @@ -457,6 +458,8 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet) return "Vector64"; case InstructionSet_Vector128 : return "Vector128"; + case InstructionSet_Dczva : + return "Dczva"; #endif // TARGET_ARM64 #ifdef TARGET_AMD64 case InstructionSet_X86Base : diff --git a/src/coreclr/inc/jiteeversionguid.h b/src/coreclr/inc/jiteeversionguid.h index 69d85eda196d5c..452e508ddd8626 100644 --- a/src/coreclr/inc/jiteeversionguid.h +++ b/src/coreclr/inc/jiteeversionguid.h @@ -32,11 +32,11 @@ ////////////////////////////////////////////////////////////////////////////////////////////////////////// // -constexpr GUID JITEEVersionIdentifier = { /* 000b3acb-92d2-4003-8760-e545241dd9a8 */ - 0x000b3acb, - 0x92d2, - 0x4003, - {0x87, 0x60, 0xe5, 0x45, 0x24, 0x1d, 0xd9, 0xa8} +constexpr GUID JITEEVersionIdentifier = { /* 960894e2-ec41-4088-82bb-bdcbac4ac2d3 */ + 0x960894e2, + 0xec41, + 0x4088, + {0x82, 0xbb, 0xbd, 0xcb, 0xac, 0x4a, 0xc2, 0xd3} }; ////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index dcd3e08a2284a3..7bff14693a486b 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -54,7 +54,8 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX // Default: false. // // Return Value: -// returns true if the immediate was too large and tmpReg was used and modified. +// returns true if the immediate was small enough to be encoded inside instruction. If not, +// returns false meaning the immediate was too large and tmpReg was used and modified. // bool CodeGen::genInstrWithConstant(instruction ins, emitAttr attr, diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 3933ace5f345f4..90c75009fc085b 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -6148,37 +6148,33 @@ void CodeGen::genZeroInitFrame(int untrLclHi, int untrLclLo, regNumber initReg, if (genUseBlockInit) { assert(untrLclHi > untrLclLo); -#ifdef TARGET_ARMARCH - /* - Generate the following code: - - For cnt less than 10 - - mov rZero1, 0 - mov rZero2, 0 - mov rCnt, - stm ,[rAddr!] - stm ,[rAddr!] - stm ,[rAddr!] - stm ,[rAddr!] - str rZero1,[rAddr] - - For rCnt greater than or equal to 10 - - mov rZero1, 0 - mov rZero2, 0 - mov rCnt, - sub rAddr, sp, OFFS - - loop: - stm ,[rAddr!] - sub rCnt,rCnt,1 - jnz loop - - str rZero1,[rAddr] // When cnt is odd - - NOTE: for ARM64, the instruction is stp, not stm. And we can use ZR instead of allocating registers. - */ +#ifdef TARGET_ARM + // Generate the following code: + // + // For cnt less than 10 + // + // mov rZero1, 0 + // mov rZero2, 0 + // mov rCnt, + // stm ,[rAddr!] + // stm ,[rAddr!] + // stm ,[rAddr!] + // stm ,[rAddr!] + // str rZero1,[rAddr] + // + // For rCnt greater than or equal to 10 + // + // mov rZero1, 0 + // mov rZero2, 0 + // mov rCnt, + // sub rAddr, sp, OFFS + // + // loop: + // stm ,[rAddr!] + // sub rCnt,rCnt,1 + // jnz loop + // + // str rZero1,[rAddr] // When cnt is odd regNumber rAddr; regNumber rCnt = REG_NA; // Invalid @@ -6190,8 +6186,6 @@ void CodeGen::genZeroInitFrame(int untrLclHi, int untrLclLo, regNumber initReg, availMask &= ~genRegMask(initReg); // Remove the pre-calculated initReg as we will zero it and maybe use it for // a large constant. -#if defined(TARGET_ARM) - if (compiler->compLocallocUsed) { availMask &= ~RBM_SAVED_LOCALLOC_SP; // Remove the register reserved when we have a localloc frame @@ -6214,13 +6208,6 @@ void CodeGen::genZeroInitFrame(int untrLclHi, int untrLclLo, regNumber initReg, rAddr = genRegNumFromMask(regMask); availMask &= ~regMask; -#else // !define(TARGET_ARM) - - rAddr = initReg; - *pInitRegZeroed = false; - -#endif // !defined(TARGET_ARM) - bool useLoop = false; unsigned uCntBytes = untrLclHi - untrLclLo; assert((uCntBytes % sizeof(int)) == 0); // The smallest stack slot is always 4 bytes. @@ -6245,11 +6232,7 @@ void CodeGen::genZeroInitFrame(int untrLclHi, int untrLclLo, regNumber initReg, // rAddr is not a live incoming argument reg assert((genRegMask(rAddr) & intRegState.rsCalleeRegArgMaskLiveIn) == 0); -#if defined(TARGET_ARM) if (arm_Valid_Imm_For_Add(untrLclLo, INS_FLAGS_DONT_CARE)) -#else // !TARGET_ARM - if (emitter::emitIns_valid_imm_for_add(untrLclLo, EA_PTRSIZE)) -#endif // !TARGET_ARM { GetEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, rAddr, genFramePointerReg(), untrLclLo); } @@ -6269,65 +6252,212 @@ void CodeGen::genZeroInitFrame(int untrLclHi, int untrLclLo, regNumber initReg, instGen_Set_Reg_To_Imm(EA_PTRSIZE, rCnt, (ssize_t)uCntSlots / 2); } -#if defined(TARGET_ARM) rZero1 = genGetZeroReg(initReg, pInitRegZeroed); instGen_Set_Reg_To_Zero(EA_PTRSIZE, rZero2); target_ssize_t stmImm = (target_ssize_t)(genRegMask(rZero1) | genRegMask(rZero2)); -#endif // TARGET_ARM if (!useLoop) { while (uCntBytes >= REGSIZE_BYTES * 2) { -#ifdef TARGET_ARM GetEmitter()->emitIns_R_I(INS_stm, EA_PTRSIZE, rAddr, stmImm); -#else // !TARGET_ARM - GetEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_ZR, REG_ZR, rAddr, 2 * REGSIZE_BYTES, - INS_OPTS_POST_INDEX); -#endif // !TARGET_ARM uCntBytes -= REGSIZE_BYTES * 2; } } - else // useLoop is true + else { -#ifdef TARGET_ARM GetEmitter()->emitIns_R_I(INS_stm, EA_PTRSIZE, rAddr, stmImm); // zero stack slots GetEmitter()->emitIns_R_I(INS_sub, EA_PTRSIZE, rCnt, 1, INS_FLAGS_SET); -#else // !TARGET_ARM - GetEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_ZR, REG_ZR, rAddr, 2 * REGSIZE_BYTES, - INS_OPTS_POST_INDEX); // zero stack slots - GetEmitter()->emitIns_R_R_I(INS_subs, EA_PTRSIZE, rCnt, rCnt, 1); -#endif // !TARGET_ARM GetEmitter()->emitIns_J(INS_bhi, NULL, -3); uCntBytes %= REGSIZE_BYTES * 2; } if (uCntBytes >= REGSIZE_BYTES) // check and zero the last register-sized stack slot (odd number) { -#ifdef TARGET_ARM GetEmitter()->emitIns_R_R_I(INS_str, EA_PTRSIZE, rZero1, rAddr, 0); -#else // TARGET_ARM - if ((uCntBytes - REGSIZE_BYTES) == 0) + uCntBytes -= REGSIZE_BYTES; + } + + noway_assert(uCntBytes == 0); + +#elif defined(TARGET_ARM64) + int bytesToWrite = untrLclHi - untrLclLo; + + const regNumber zeroSimdReg = REG_ZERO_INIT_FRAME_SIMD; + bool simdRegZeroed = false; + const int simdRegPairSizeBytes = 2 * FP_REGSIZE_BYTES; + + regNumber addrReg = REG_ZERO_INIT_FRAME_REG1; + + if (addrReg == initReg) + { + *pInitRegZeroed = false; + } + + int addrOffset = 0; + + // The following invariants are held below: + // + // 1) [addrReg, #addrOffset] points at a location where next chunk of zero bytes will be written; + // 2) bytesToWrite specifies the number of bytes on the frame to initialize; + // 3) if simdRegZeroed is true then 128-bit wide zeroSimdReg contains zeroes. + + const int bytesUseZeroingLoop = 192; + + if (bytesToWrite >= bytesUseZeroingLoop) + { + // Generates the following code: + // + // When the size of the region is greater than or equal to 256 bytes + // **and** DC ZVA instruction use is permitted + // **and** the instruction block size is configured to 64 bytes: + // + // movi v16.16b, #0 + // add x9, fp, #(untrLclLo+64) + // add x10, fp, #(untrLclHi-64) + // stp q16, q16, [x9, #-64] + // stp q16, q16, [x9, #-32] + // bfm x9, xzr, #0, #5 + // + // loop: + // dc zva, x9 + // add x9, x9, #64 + // cmp x9, x10 + // blo loop + // + // stp q16, q16, [x10] + // stp q16, q16, [x10, #32] + // + // Otherwise: + // + // movi v16.16b, #0 + // add x9, fp, #(untrLclLo-32) + // mov x10, #(bytesToWrite-64) + // + // loop: + // stp q16, q16, [x9, #32] + // stp q16, q16, [x9, #64]! + // subs x10, x10, #64 + // bge loop + + const int bytesUseDataCacheZeroInstruction = 256; + + GetEmitter()->emitIns_R_I(INS_movi, EA_16BYTE, zeroSimdReg, 0, INS_OPTS_16B); + simdRegZeroed = true; + + if ((bytesToWrite >= bytesUseDataCacheZeroInstruction) && + compiler->compOpportunisticallyDependsOn(InstructionSet_Dczva)) { - GetEmitter()->emitIns_R_R_I(INS_str, EA_PTRSIZE, REG_ZR, rAddr, 0); + // The first and the last 64 bytes should be written with two stp q-reg instructions. + // This is in order to avoid **unintended** zeroing of the data by dc zva + // outside of [fp+untrLclLo, fp+untrLclHi) memory region. + + genInstrWithConstant(INS_add, EA_PTRSIZE, addrReg, genFramePointerReg(), untrLclLo + 64, addrReg); + addrOffset = -64; + + const regNumber endAddrReg = REG_ZERO_INIT_FRAME_REG2; + + if (endAddrReg == initReg) + { + *pInitRegZeroed = false; + } + + genInstrWithConstant(INS_add, EA_PTRSIZE, endAddrReg, genFramePointerReg(), untrLclHi - 64, endAddrReg); + + GetEmitter()->emitIns_R_R_R_I(INS_stp, EA_16BYTE, zeroSimdReg, zeroSimdReg, addrReg, addrOffset); + addrOffset += simdRegPairSizeBytes; + + GetEmitter()->emitIns_R_R_R_I(INS_stp, EA_16BYTE, zeroSimdReg, zeroSimdReg, addrReg, addrOffset); + addrOffset += simdRegPairSizeBytes; + + assert(addrOffset == 0); + + GetEmitter()->emitIns_R_R_I_I(INS_bfm, EA_PTRSIZE, addrReg, REG_ZR, 0, 5); + // addrReg points at the beginning of a cache line. + + GetEmitter()->emitIns_R(INS_dczva, EA_PTRSIZE, addrReg); + GetEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, addrReg, addrReg, 64); + GetEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, addrReg, endAddrReg); + GetEmitter()->emitIns_J(INS_blo, NULL, -4); + + addrReg = endAddrReg; + bytesToWrite = 64; } else { - GetEmitter()->emitIns_R_R_I(INS_str, EA_PTRSIZE, REG_ZR, rAddr, REGSIZE_BYTES, INS_OPTS_POST_INDEX); + genInstrWithConstant(INS_add, EA_PTRSIZE, addrReg, genFramePointerReg(), untrLclLo - 32, addrReg); + addrOffset = 32; + + const regNumber countReg = REG_ZERO_INIT_FRAME_REG2; + + if (countReg == initReg) + { + *pInitRegZeroed = false; + } + + instGen_Set_Reg_To_Imm(EA_PTRSIZE, countReg, bytesToWrite - 64); + + GetEmitter()->emitIns_R_R_R_I(INS_stp, EA_16BYTE, zeroSimdReg, zeroSimdReg, addrReg, 32); + GetEmitter()->emitIns_R_R_R_I(INS_stp, EA_16BYTE, zeroSimdReg, zeroSimdReg, addrReg, 64, + INS_OPTS_PRE_INDEX); + + GetEmitter()->emitIns_R_R_I(INS_subs, EA_PTRSIZE, countReg, countReg, 64); + GetEmitter()->emitIns_J(INS_bge, NULL, -4); + + bytesToWrite %= 64; } -#endif // !TARGET_ARM - uCntBytes -= REGSIZE_BYTES; } -#ifdef TARGET_ARM64 - if (uCntBytes > 0) + else { - assert(uCntBytes == sizeof(int)); - GetEmitter()->emitIns_R_R_I(INS_str, EA_4BYTE, REG_ZR, rAddr, 0); - uCntBytes -= sizeof(int); + genInstrWithConstant(INS_add, EA_PTRSIZE, addrReg, genFramePointerReg(), untrLclLo, addrReg); + } + + if (bytesToWrite >= simdRegPairSizeBytes) + { + // Generates the following code: + // + // movi v16.16b, #0 + // stp q16, q16, [x9, #addrOffset] + // stp q16, q16, [x9, #(addrOffset+32)] + // ... + // stp q16, q16, [x9, #(addrOffset+roundDown(bytesToWrite, 32))] + + if (!simdRegZeroed) + { + GetEmitter()->emitIns_R_I(INS_movi, EA_16BYTE, zeroSimdReg, 0, INS_OPTS_16B); + simdRegZeroed = true; + } + + for (; bytesToWrite >= simdRegPairSizeBytes; bytesToWrite -= simdRegPairSizeBytes) + { + GetEmitter()->emitIns_R_R_R_I(INS_stp, EA_16BYTE, zeroSimdReg, zeroSimdReg, addrReg, addrOffset); + addrOffset += simdRegPairSizeBytes; + } + } + + const int regPairSizeBytes = 2 * REGSIZE_BYTES; + + if (bytesToWrite >= regPairSizeBytes) + { + GetEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_ZR, REG_ZR, addrReg, addrOffset); + addrOffset += regPairSizeBytes; + bytesToWrite -= regPairSizeBytes; + } + + if (bytesToWrite >= REGSIZE_BYTES) + { + GetEmitter()->emitIns_R_R_I(INS_str, EA_PTRSIZE, REG_ZR, addrReg, addrOffset); + addrOffset += REGSIZE_BYTES; + bytesToWrite -= REGSIZE_BYTES; + } + + if (bytesToWrite == sizeof(int)) + { + GetEmitter()->emitIns_R_R_I(INS_str, EA_4BYTE, REG_ZR, addrReg, addrOffset); + bytesToWrite = 0; } -#endif // TARGET_ARM64 - noway_assert(uCntBytes == 0); + assert(bytesToWrite == 0); #elif defined(TARGET_XARCH) assert(compiler->getSIMDSupportLevel() >= SIMD_SSE2_Supported); emitter* emit = GetEmitter(); diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 2f78d20a713967..eaac34ff427916 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -385,7 +385,7 @@ void emitter::emitInsSanityCheck(instrDesc* id) case IF_DI_2D: // DI_2D X........Nrrrrrr ssssssnnnnnddddd Rd Rn imr, imms (N,r,s) assert(isValidGeneralDatasize(id->idOpSize())); assert(isGeneralRegister(id->idReg1())); - assert(isGeneralRegister(id->idReg2())); + assert(isGeneralRegisterOrZR(id->idReg2())); assert(isValidImmNRS(emitGetInsSC(id), id->idOpSize())); break; @@ -915,6 +915,12 @@ void emitter::emitInsSanityCheck(instrDesc* id) case IF_SI_0B: // SI_0B ................ ....bbbb........ imm4 - barrier break; + case IF_SR_1A: // SR_1A ................ ...........ttttt Rt (dc zva) + datasize = id->idOpSize(); + assert(isGeneralRegister(id->idReg1())); + assert(datasize == EA_8BYTE); + break; + default: printf("unexpected format %s\n", emitIfName(id->idInsFmt())); assert(!"Unexpected format"); @@ -3683,6 +3689,14 @@ void emitter::emitIns_R(instruction ins, emitAttr attr, regNumber reg) fmt = IF_BR_1A; break; + case INS_dczva: + assert(isGeneralRegister(reg)); + assert(attr == EA_8BYTE); + id = emitNewInstrSmall(attr); + id->idReg1(reg); + fmt = IF_SR_1A; + break; + default: unreached(); } @@ -6941,7 +6955,7 @@ void emitter::emitIns_R_R_I_I( case INS_sbfm: case INS_ubfm: assert(isGeneralRegister(reg1)); - assert(isGeneralRegister(reg2)); + assert((ins == INS_bfm) ? isGeneralRegisterOrZR(reg2) : isGeneralRegister(reg2)); assert(isValidImmShift(imm1, size)); assert(isValidImmShift(imm2, size)); assert(insOptsNone(opt)); @@ -11372,6 +11386,13 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) dst += emitOutput_Instr(dst, code); break; + case IF_SR_1A: // SR_1A ................ ...........ttttt Rt (dc zva) + assert(insOptsNone(id->idInsOpt())); + code = emitInsCode(ins, fmt); + code |= insEncodeReg_Rt(id->idReg1()); // ttttt + dst += emitOutput_Instr(dst, code); + break; + default: assert(!"Unexpected format"); break; @@ -13293,6 +13314,10 @@ void emitter::emitDispIns( emitDispBarrier((insBarrier)emitGetInsSC(id)); break; + case IF_SR_1A: // SR_1A ................ ...........ttttt Rt (dc zva) + emitDispReg(id->idReg1(), size, false); + break; + default: printf("unexpected format %s", emitIfName(id->idInsFmt())); assert(!"unexpectedFormat"); @@ -15366,6 +15391,11 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins result.insLatency = PERFSCORE_LATENCY_1C; break; + case IF_SR_1A: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_1C; + break; + case IF_DV_2T: // addv, saddlv, smaxv, sminv, uaddlv, umaxv, uminv switch (ins) { diff --git a/src/coreclr/jit/emitfmtsarm64.h b/src/coreclr/jit/emitfmtsarm64.h index 05d7d2c83d08d3..81f41085a2ebed 100644 --- a/src/coreclr/jit/emitfmtsarm64.h +++ b/src/coreclr/jit/emitfmtsarm64.h @@ -111,6 +111,7 @@ IF_DEF(EN2Q, IS_NONE, NONE) // Instruction has 2 possible encoding types, type Q // BR :: Branches - Register // SN :: System - No Registers or Immediates // SI :: System - Immediate +// SR :: System - Register // // _ :: a separator char '_' // @@ -226,6 +227,8 @@ IF_DEF(SN_0A, IS_NONE, NONE) // SN_0A ................ ................ IF_DEF(SI_0A, IS_NONE, NONE) // SI_0A ...........iiiii iiiiiiiiiii..... imm16 IF_DEF(SI_0B, IS_NONE, NONE) // SI_0B ................ ....bbbb........ imm4 - barrier +IF_DEF(SR_1A, IS_NONE, NONE) // SR_1A ................ ...........ttttt Rt (dc zva) + IF_DEF(INVALID, IS_NONE, NONE) // ////////////////////////////////////////////////////////////////////////////// diff --git a/src/coreclr/jit/instrsarm64.h b/src/coreclr/jit/instrsarm64.h index cddd9b0a986aa5..1bb1357366832f 100644 --- a/src/coreclr/jit/instrsarm64.h +++ b/src/coreclr/jit/instrsarm64.h @@ -1572,6 +1572,9 @@ INST1(dmb, "dmb", 0, IF_SI_0B, 0xD50330BF) INST1(isb, "isb", 0, IF_SI_0B, 0xD50330DF) // isb barrierKind SI_0B 1101010100000011 0011bbbb11011111 D503 30DF imm4 - barrier kind +INST1(dczva, "dczva", 0, IF_SR_1A, 0xD50B7420) + // dc zva,Rt SR_1A 1101010100001011 01110100001ttttt D50B 7420 Rt + INST1(umov, "umov", 0, IF_DV_2B, 0x0E003C00) // umov Rd,Vn[] DV_2B 0Q001110000iiiii 001111nnnnnddddd 0E00 3C00 Rd,Vn[] diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index d4d501e5fd72d3..559df53e712d80 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -1556,6 +1556,10 @@ typedef unsigned char regNumberSmall; // have encoding that restricts what registers that can be used for the indexed element when the element size is H (i.e. 2 bytes). #define RBM_ASIMD_INDEXED_H_ELEMENT_ALLOWED_REGS (RBM_V0|RBM_V1|RBM_V2|RBM_V3|RBM_V4|RBM_V5|RBM_V6|RBM_V7|RBM_V8|RBM_V9|RBM_V10|RBM_V11|RBM_V12|RBM_V13|RBM_V14|RBM_V15) + #define REG_ZERO_INIT_FRAME_REG1 REG_R9 + #define REG_ZERO_INIT_FRAME_REG2 REG_R10 + #define REG_ZERO_INIT_FRAME_SIMD REG_V16 + #else #error Unsupported or unset target architecture #endif diff --git a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs index e064a954e6087b..7b816cdd9e11d9 100644 --- a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs +++ b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs @@ -42,6 +42,7 @@ public static class ReadyToRunInstructionSetHelper case InstructionSet.ARM64_Atomics: return ReadyToRunInstructionSet.Atomics; case InstructionSet.ARM64_Vector64: return null; case InstructionSet.ARM64_Vector128: return null; + case InstructionSet.ARM64_Dczva: return null; default: throw new Exception("Unknown instruction set"); } diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs b/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs index 4270c1e69c10e0..8a8111d5142687 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs @@ -28,14 +28,15 @@ public enum InstructionSet ARM64_Atomics=9, ARM64_Vector64=10, ARM64_Vector128=11, - ARM64_ArmBase_Arm64=12, - ARM64_AdvSimd_Arm64=13, - ARM64_Aes_Arm64=14, - ARM64_Crc32_Arm64=15, - ARM64_Dp_Arm64=16, - ARM64_Rdm_Arm64=17, - ARM64_Sha1_Arm64=18, - ARM64_Sha256_Arm64=19, + ARM64_Dczva=12, + ARM64_ArmBase_Arm64=13, + ARM64_AdvSimd_Arm64=14, + ARM64_Aes_Arm64=15, + ARM64_Crc32_Arm64=16, + ARM64_Dp_Arm64=17, + ARM64_Rdm_Arm64=18, + ARM64_Sha1_Arm64=19, + ARM64_Sha256_Arm64=20, X64_X86Base=1, X64_SSE=2, X64_SSE2=3, @@ -551,6 +552,7 @@ public static IEnumerable ArchitectureToValidInstructionSets yield return new InstructionSetInfo("lse", "", InstructionSet.ARM64_Atomics, true); yield return new InstructionSetInfo("Vector64", "", InstructionSet.ARM64_Vector64, false); yield return new InstructionSetInfo("Vector128", "", InstructionSet.ARM64_Vector128, false); + yield return new InstructionSetInfo("Dczva", "", InstructionSet.ARM64_Dczva, false); break; case TargetArchitecture.X64: diff --git a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt index d8fe3984a92515..6e64e7e2b02c0a 100644 --- a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt +++ b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt @@ -93,6 +93,7 @@ instructionset ,ARM64 ,Sha256 , ,20 ,Sha256 ,sha2 instructionset ,ARM64 , ,Atomics ,21 ,Atomics ,lse instructionset ,ARM64 , , , ,Vector64 , instructionset ,ARM64 , , , ,Vector128, +instructionset ,ARM64 , , , ,Dczva , instructionset64bit,ARM64 ,ArmBase instructionset64bit,ARM64 ,AdvSimd diff --git a/src/coreclr/vm/arm64/asmhelpers.S b/src/coreclr/vm/arm64/asmhelpers.S index d9c7fcd1eb3428..78790be1d1a2ca 100644 --- a/src/coreclr/vm/arm64/asmhelpers.S +++ b/src/coreclr/vm/arm64/asmhelpers.S @@ -16,6 +16,13 @@ LEAF_ENTRY GetCurrentSP, _TEXT ret lr LEAF_END GetCurrentSP, _TEXT +// DWORD64 __stdcall GetDataCacheZeroIDReg(void) +LEAF_ENTRY GetDataCacheZeroIDReg, _TEXT + mrs x0, dczid_el0 + and x0, x0, 31 + ret lr +LEAF_END GetDataCacheZeroIDReg, _TEXT + //----------------------------------------------------------------------------- // This routine captures the machine state. It is used by helper method frame //----------------------------------------------------------------------------- diff --git a/src/coreclr/vm/arm64/asmhelpers.asm b/src/coreclr/vm/arm64/asmhelpers.asm index 2f9227b1d80df6..304a6592a6de31 100644 --- a/src/coreclr/vm/arm64/asmhelpers.asm +++ b/src/coreclr/vm/arm64/asmhelpers.asm @@ -75,6 +75,13 @@ ret lr LEAF_END +;; DWORD64 __stdcall GetDataCacheZeroIDReg(void); + LEAF_ENTRY GetDataCacheZeroIDReg + mrs x0, dczid_el0 + and x0, x0, 31 + ret lr + LEAF_END + ;;----------------------------------------------------------------------------- ;; This routine captures the machine state. It is used by helper method frame ;;----------------------------------------------------------------------------- diff --git a/src/coreclr/vm/codeman.cpp b/src/coreclr/vm/codeman.cpp index c4f00736defb51..f9338f78751609 100644 --- a/src/coreclr/vm/codeman.cpp +++ b/src/coreclr/vm/codeman.cpp @@ -1263,6 +1263,10 @@ bool DoesOSSupportAVX() #endif // defined(TARGET_X86) || defined(TARGET_AMD64) +#ifdef TARGET_ARM64 +extern "C" DWORD64 __stdcall GetDataCacheZeroIDReg(); +#endif + void EEJitManager::SetCpuInfo() { LIMITED_METHOD_CONTRACT; @@ -1514,6 +1518,16 @@ void EEJitManager::SetCpuInfo() CPUCompileFlags.Set(InstructionSet_Crc32); } #endif // HOST_64BIT +#ifndef CROSSGEN_COMPILE + if (GetDataCacheZeroIDReg() == 4) + { + // DCZID_EL0<4> (DZP) indicates whether use of DC ZVA instructions is permitted (0) or prohibited (1). + // DCZID_EL0<3:0> (BS) specifies Log2 of the block size in words. + // + // We set the flag when the instruction is permitted and the block size is 64 bytes. + CPUCompileFlags.Set(InstructionSet_Dczva); + } +#endif #endif // TARGET_ARM64 CPUCompileFlags.Set64BitInstructionSetVariants();