From 2d94f62e6b41c90a89a442315af78bf3c7999f37 Mon Sep 17 00:00:00 2001 From: Mario Rugiero Date: Wed, 12 Nov 2025 11:12:03 -0300 Subject: [PATCH 1/3] Revert "fix(l1,l2): build on ARM Linux (#5285)" This reverts commit f4905c2ad8045b718df5bab4907aea50ac4a293c. --- .../crypto/keccak/keccak1600-armv8-elf.s | 855 ------------------ ...k1600-armv8-macho.s => keccak1600-armv8.s} | 0 crates/common/crypto/keccak/mod.rs | 6 +- 3 files changed, 2 insertions(+), 859 deletions(-) delete mode 100644 crates/common/crypto/keccak/keccak1600-armv8-elf.s rename crates/common/crypto/keccak/{keccak1600-armv8-macho.s => keccak1600-armv8.s} (100%) diff --git a/crates/common/crypto/keccak/keccak1600-armv8-elf.s b/crates/common/crypto/keccak/keccak1600-armv8-elf.s deleted file mode 100644 index e9f2309573c..00000000000 --- a/crates/common/crypto/keccak/keccak1600-armv8-elf.s +++ /dev/null @@ -1,855 +0,0 @@ -// Modified: -// - Ran `cpp` to substitute constants. -// - Commented out ARM assembly annotations (.size, .type) used only for debugging purposes and not understood by -// Rust. -// - Removed dots from all local labels for correct detection in the frontend. -// Reason: `.L` local labels are ELF-specific. -// - Replaced instance of `adr x??,label` by `adrp x??,label` followed by -// `add x??,x??,:lo12:label`. -// -// TODO: this is probably a matter of selecting the right parameter -// for the translator. - -.align 8 // strategic alignment and padding that allows to use - // address value as loop termination condition... -.quad 0,0,0,0,0,0,0,0 -// .type iotas,%object -iotas: -.quad 0x0000000000000001 -.quad 0x0000000000008082 -.quad 0x800000000000808a -.quad 0x8000000080008000 -.quad 0x000000000000808b -.quad 0x0000000080000001 -.quad 0x8000000080008081 -.quad 0x8000000000008009 -.quad 0x000000000000008a -.quad 0x0000000000000088 -.quad 0x0000000080008009 -.quad 0x000000008000000a -Liotas12: -.quad 0x000000008000808b -.quad 0x800000000000008b -.quad 0x8000000000008089 -.quad 0x8000000000008003 -.quad 0x8000000000008002 -.quad 0x8000000000000080 -.quad 0x000000000000800a -.quad 0x800000008000000a -.quad 0x8000000080008081 -.quad 0x8000000000008080 -.quad 0x0000000080000001 -.quad 0x8000000080008008 -// .size iotas,.-iotas -// .type KeccakF1600_int,%function -.align 5 -KeccakF1600_int: -.inst 0xd503233f // paciasp - stp x28,x30,[sp,#16] // stack is pre-allocated - b Loop -.align 4 -Loop: - ////////////////////////////////////////// Theta - eor x26,x0,x5 - stp x4,x9,[sp,#0] // offload pair... - eor x27,x1,x6 - eor x28,x2,x7 - eor x30,x3,x8 - eor x4,x4,x9 - eor x26,x26,x10 - eor x27,x27,x11 - eor x28,x28,x12 - eor x30,x30,x13 - eor x4,x4,x14 - eor x26,x26,x15 - eor x27,x27,x16 - eor x28,x28,x17 - eor x30,x30,x25 - eor x4,x4,x19 - eor x26,x26,x20 - eor x28,x28,x22 - eor x27,x27,x21 - eor x30,x30,x23 - eor x4,x4,x24 - - eor x9,x26,x28,ror#63 - - eor x1,x1,x9 - eor x6,x6,x9 - eor x11,x11,x9 - eor x16,x16,x9 - eor x21,x21,x9 - - eor x9,x27,x30,ror#63 - eor x28,x28,x4,ror#63 - eor x30,x30,x26,ror#63 - eor x4,x4,x27,ror#63 - - eor x27, x2,x9 // mov x27,x2 - eor x7,x7,x9 - eor x12,x12,x9 - eor x17,x17,x9 - eor x22,x22,x9 - - eor x0,x0,x4 - eor x5,x5,x4 - eor x10,x10,x4 - eor x15,x15,x4 - eor x20,x20,x4 - ldp x4,x9,[sp,#0] // re-load offloaded data - eor x26, x3,x28 // mov x26,x3 - eor x8,x8,x28 - eor x13,x13,x28 - eor x25,x25,x28 - eor x23,x23,x28 - - eor x28, x4,x30 // mov x28,x4 - eor x9,x9,x30 - eor x14,x14,x30 - eor x19,x19,x30 - eor x24,x24,x30 - - ////////////////////////////////////////// Rho+Pi - mov x30,x1 - ror x1,x6,#64-44 - //mov x27,x2 - ror x2,x12,#64-43 - //mov x26,x3 - ror x3,x25,#64-21 // ? - //mov x28,x4 - ror x4,x24,#64-14 // ? - - ror x6,x9,#64-20 // ? - ror x12,x13,#64-25 // ? - ror x25,x17,#64-15 - ror x24,x21,#64-2 // ? - - ror x9,x22,#64-61 - ror x13,x19,#64-8 - ror x17,x11,#64-10 - ror x21,x8,#64-55 - - ror x22,x14,#64-39 - ror x19,x23,#64-56 - ror x11,x7,#64-6 // ? - ror x8,x16,#64-45 - - ror x14,x20,#64-18 - ror x23,x15,#64-41 - ror x7,x10,#64-3 - ror x16,x5,#64-36 // ? - - ror x5,x26,#64-28 // ? - ror x10,x30,#64-1 - ror x15,x28,#64-27 // ? - ror x20,x27,#64-62 // ? - - ////////////////////////////////////////// Chi+Iota - bic x26,x2,x1 - bic x27,x3,x2 - bic x28,x0,x4 - bic x30,x1,x0 - eor x0,x0,x26 - bic x26,x4,x3 - eor x1,x1,x27 - ldr x27,[sp,#16] - eor x3,x3,x28 - eor x4,x4,x30 - eor x2,x2,x26 - ldr x30,[x27],#8 // Iota[i++] - - bic x26,x7,x6 - tst x27,#255 // are we done? - str x27,[sp,#16] - bic x27,x8,x7 - bic x28,x5,x9 - eor x0,x0,x30 // A[0][0] ^= Iota - bic x30,x6,x5 - eor x5,x5,x26 - bic x26,x9,x8 - eor x6,x6,x27 - eor x8,x8,x28 - eor x9,x9,x30 - eor x7,x7,x26 - - bic x26,x12,x11 - bic x27,x13,x12 - bic x28,x10,x14 - bic x30,x11,x10 - eor x10,x10,x26 - bic x26,x14,x13 - eor x11,x11,x27 - eor x13,x13,x28 - eor x14,x14,x30 - eor x12,x12,x26 - - bic x26,x17,x16 - bic x27,x25,x17 - bic x28,x15,x19 - bic x30,x16,x15 - eor x15,x15,x26 - bic x26,x19,x25 - eor x16,x16,x27 - eor x25,x25,x28 - eor x19,x19,x30 - eor x17,x17,x26 - - bic x26,x22,x21 - bic x27,x23,x22 - bic x28,x20,x24 - bic x30,x21,x20 - eor x20,x20,x26 - bic x26,x24,x23 - eor x21,x21,x27 - eor x23,x23,x28 - eor x24,x24,x30 - eor x22,x22,x26 - - bne Loop - - ldr x30,[sp,#16+8] -.inst 0xd50323bf // autiasp - ret -// .size KeccakF1600_int,.-KeccakF1600_int - -// .type KeccakF1600,%function -.align 5 -KeccakF1600: -.inst 0xd503233f // paciasp - stp x29,x30,[sp,#-16*8]! - add x29,sp,#0 - stp x19,x20,[sp,#2*8] - stp x21,x22,[sp,#4*8] - stp x23,x24,[sp,#6*8] - stp x25,x26,[sp,#8*8] - stp x27,x28,[sp,#10*8] - sub sp,sp,#16+4*8 - - str x0,[sp,#16+2*8] // offload argument - mov x26,x0 - ldp x0,x1,[x0,#16*0] - ldp x2,x3,[x26,#16*1] - ldp x4,x5,[x26,#16*2] - ldp x6,x7,[x26,#16*3] - ldp x8,x9,[x26,#16*4] - ldp x10,x11,[x26,#16*5] - ldp x12,x13,[x26,#16*6] - ldp x14,x15,[x26,#16*7] - ldp x16,x17,[x26,#16*8] - ldp x25,x19,[x26,#16*9] - ldp x20,x21,[x26,#16*10] - ldp x22,x23,[x26,#16*11] - ldr x24,[x26,#16*12] - - adrp x28,iotas - add x28,x28,:lo12:iotas - bl KeccakF1600_int - - ldr x26,[sp,#16+2*8] - stp x0,x1,[x26,#16*0] - stp x2,x3,[x26,#16*1] - stp x4,x5,[x26,#16*2] - stp x6,x7,[x26,#16*3] - stp x8,x9,[x26,#16*4] - stp x10,x11,[x26,#16*5] - stp x12,x13,[x26,#16*6] - stp x14,x15,[x26,#16*7] - stp x16,x17,[x26,#16*8] - stp x25,x19,[x26,#16*9] - stp x20,x21,[x26,#16*10] - stp x22,x23,[x26,#16*11] - str x24,[x26,#16*12] - - ldp x19,x20,[x29,#2*8] - add sp,sp,#16+4*8 - ldp x21,x22,[x29,#4*8] - ldp x23,x24,[x29,#6*8] - ldp x25,x26,[x29,#8*8] - ldp x27,x28,[x29,#10*8] - ldp x29,x30,[sp],#16*8 -.inst 0xd50323bf // autiasp - ret -// .size KeccakF1600,.-KeccakF1600 - -.globl SHA3_absorb -// .type SHA3_absorb,%function -.align 5 -SHA3_absorb: -.inst 0xd503233f // paciasp - stp x29,x30,[sp,#-16*8]! - add x29,sp,#0 - stp x19,x20,[sp,#2*8] - stp x21,x22,[sp,#4*8] - stp x23,x24,[sp,#6*8] - stp x25,x26,[sp,#8*8] - stp x27,x28,[sp,#10*8] - sub sp,sp,#16+4*8 +16 - - stp x0,x1,[sp,#16+2*8] // offload arguments - stp x2,x3,[sp,#16+4*8] - - mov x26,x0 // uint64_t A[5][5] - mov x27,x1 // const void *inp - mov x28,x2 // size_t len - mov x30,x3 // size_t bsz - ldp x0,x1,[x26,#16*0] - ldp x2,x3,[x26,#16*1] - ldp x4,x5,[x26,#16*2] - ldp x6,x7,[x26,#16*3] - ldp x8,x9,[x26,#16*4] - ldp x10,x11,[x26,#16*5] - ldp x12,x13,[x26,#16*6] - ldp x14,x15,[x26,#16*7] - ldp x16,x17,[x26,#16*8] - ldp x25,x19,[x26,#16*9] - ldp x20,x21,[x26,#16*10] - ldp x22,x23,[x26,#16*11] - ldr x24,[x26,#16*12] - b Loop_absorb - -.align 4 -Loop_absorb: - subs x26,x28,x30 // len - bsz - blo Labsorbed - - str x26,[sp,#16+4*8] // save len - bsz - cmp x30,#104 - ldr x26,[x27,#0] // A[0][0] ^= *inp++ - - - - eor x0,x0,x26 - ldr x26,[x27,#8] // A[0][1] ^= *inp++ - - - - eor x1,x1,x26 - ldr x26,[x27,#16] // A[0][2] ^= *inp++ - - - - eor x2,x2,x26 - ldr x26,[x27,#24] // A[0][3] ^= *inp++ - - - - eor x3,x3,x26 - ldr x26,[x27,#32] // A[0][4] ^= *inp++ - - - - eor x4,x4,x26 - ldr x26,[x27,#40] // A[1][0] ^= *inp++ - - - - eor x5,x5,x26 - ldr x26,[x27,#48] // A[1][1] ^= *inp++ - - - - eor x6,x6,x26 - ldr x26,[x27,#56] // A[1][2] ^= *inp++ - - - - eor x7,x7,x26 - ldr x26,[x27,#64] // A[1][3] ^= *inp++ - - - - eor x8,x8,x26 - blo Lprocess_block - - ldr x26,[x27,#72] // A[1][4] ^= *inp++ - - - - eor x9,x9,x26 - ldr x26,[x27,#80] // A[2][0] ^= *inp++ - - - - eor x10,x10,x26 - ldr x26,[x27,#88] // A[2][1] ^= *inp++ - - - - eor x11,x11,x26 - ldr x26,[x27,#96] // A[2][2] ^= *inp++ - - - - eor x12,x12,x26 - beq Lprocess_block - - cmp x30,#144 - ldr x26,[x27,#104] // A[2][3] ^= *inp++ - - - - eor x13,x13,x26 - ldr x26,[x27,#112] // A[2][4] ^= *inp++ - - - - eor x14,x14,x26 - ldr x26,[x27,#120] // A[3][0] ^= *inp++ - - - - eor x15,x15,x26 - ldr x26,[x27,#128] // A[3][1] ^= *inp++ - - - - eor x16,x16,x26 - blo Lprocess_block - - ldr x26,[x27,#136] // A[3][2] ^= *inp++ - - - - eor x17,x17,x26 - beq Lprocess_block - - ldr x26,[x27,#144] // A[3][3] ^= *inp++ - - - - eor x25,x25,x26 - ldr x26,[x27,#152] // A[3][4] ^= *inp++ - - - - eor x19,x19,x26 - ldr x26,[x27,#160] // A[4][0] ^= *inp++ - - - - eor x20,x20,x26 - -Lprocess_block: - add x27,x27,x30 - str x27,[sp,#16+3*8] // save inp - - adrp x28,iotas - add x28,x28,:lo12:iotas - bl KeccakF1600_int - - ldr x27,[sp,#16+3*8] // restore arguments - ldp x28,x30,[sp,#16+4*8] - b Loop_absorb - -.align 4 -Labsorbed: - ldr x27,[sp,#16+2*8] - stp x0,x1,[x27,#16*0] - stp x2,x3,[x27,#16*1] - stp x4,x5,[x27,#16*2] - stp x6,x7,[x27,#16*3] - stp x8,x9,[x27,#16*4] - stp x10,x11,[x27,#16*5] - stp x12,x13,[x27,#16*6] - stp x14,x15,[x27,#16*7] - stp x16,x17,[x27,#16*8] - stp x25,x19,[x27,#16*9] - stp x20,x21,[x27,#16*10] - stp x22,x23,[x27,#16*11] - str x24,[x27,#16*12] - - mov x0,x28 // return value - ldp x19,x20,[x29,#2*8] - add sp,sp,#16+4*8 +16 - ldp x21,x22,[x29,#4*8] - ldp x23,x24,[x29,#6*8] - ldp x25,x26,[x29,#8*8] - ldp x27,x28,[x29,#10*8] - ldp x29,x30,[sp],#16*8 -.inst 0xd50323bf // autiasp - ret -// .size SHA3_absorb,.-SHA3_absorb -.globl SHA3_squeeze -// .type SHA3_squeeze,%function -.align 5 -SHA3_squeeze: -.inst 0xd503233f // paciasp - stp x29,x30,[sp,#-6*8]! - add x29,sp,#0 - stp x19,x20,[sp,#2*8] - stp x21,x22,[sp,#4*8] - - mov x19,x0 // put aside arguments - mov x20,x1 - mov x21,x2 - mov x22,x3 - -Loop_squeeze: - ldr x4,[x0],#8 - cmp x21,#8 - blo Lsqueeze_tail - - - - str x4,[x20],#8 - subs x21,x21,#8 - beq Lsqueeze_done - - subs x3,x3,#8 - bhi Loop_squeeze - - mov x0,x19 - bl KeccakF1600 - mov x0,x19 - mov x3,x22 - b Loop_squeeze - -.align 4 -Lsqueeze_tail: - strb w4,[x20],#1 - lsr x4,x4,#8 - subs x21,x21,#1 - beq Lsqueeze_done - strb w4,[x20],#1 - lsr x4,x4,#8 - subs x21,x21,#1 - beq Lsqueeze_done - strb w4,[x20],#1 - lsr x4,x4,#8 - subs x21,x21,#1 - beq Lsqueeze_done - strb w4,[x20],#1 - lsr x4,x4,#8 - subs x21,x21,#1 - beq Lsqueeze_done - strb w4,[x20],#1 - lsr x4,x4,#8 - subs x21,x21,#1 - beq Lsqueeze_done - strb w4,[x20],#1 - lsr x4,x4,#8 - subs x21,x21,#1 - beq Lsqueeze_done - strb w4,[x20],#1 - -Lsqueeze_done: - ldp x19,x20,[sp,#2*8] - ldp x21,x22,[sp,#4*8] - ldp x29,x30,[sp],#6*8 -.inst 0xd50323bf // autiasp - ret -// .size SHA3_squeeze,.-SHA3_squeeze -// .type KeccakF1600_ce,%function -.align 5 -KeccakF1600_ce: -Loop_ce: - ////////////////////////////////////////////////// Theta -.inst 0xce0f2a99 //eor3 v25.16b,v20.16b,v15.16b,v10.16b -.inst 0xce102eba //eor3 v26.16b,v21.16b,v16.16b,v11.16b -.inst 0xce1132db //eor3 v27.16b,v22.16b,v17.16b,v12.16b -.inst 0xce1236fc //eor3 v28.16b,v23.16b,v18.16b,v13.16b -.inst 0xce133b1d //eor3 v29.16b,v24.16b,v19.16b,v14.16b -.inst 0xce050339 //eor3 v25.16b,v25.16b, v5.16b,v0.16b -.inst 0xce06075a //eor3 v26.16b,v26.16b, v6.16b,v1.16b -.inst 0xce070b7b //eor3 v27.16b,v27.16b, v7.16b,v2.16b -.inst 0xce080f9c //eor3 v28.16b,v28.16b, v8.16b,v3.16b -.inst 0xce0913bd //eor3 v29.16b,v29.16b, v9.16b,v4.16b - -.inst 0xce7b8f3e //rax1 v30.2d,v25.2d,v27.2d // D[1] -.inst 0xce7c8f5f //rax1 v31.2d,v26.2d,v28.2d // D[2] -.inst 0xce7d8f7b //rax1 v27.2d,v27.2d,v29.2d // D[3] -.inst 0xce798f9c //rax1 v28.2d,v28.2d,v25.2d // D[4] -.inst 0xce7a8fbd //rax1 v29.2d,v29.2d,v26.2d // D[0] - - ////////////////////////////////////////////////// Theta+Rho+Pi -.inst 0xce9efc39 //xar v25.2d, v1.2d,v30.2d,#64-1 // C[0]=A[2][0] - -.inst 0xce9e50c1 //xar v1.2d,v6.2d,v30.2d,#64-44 -.inst 0xce9cb126 //xar v6.2d,v9.2d,v28.2d,#64-20 -.inst 0xce9f0ec9 //xar v9.2d,v22.2d,v31.2d,#64-61 -.inst 0xce9c65d6 //xar v22.2d,v14.2d,v28.2d,#64-39 -.inst 0xce9dba8e //xar v14.2d,v20.2d,v29.2d,#64-18 - -.inst 0xce9f085a //xar v26.2d, v2.2d,v31.2d,#64-62 // C[1]=A[4][0] - -.inst 0xce9f5582 //xar v2.2d,v12.2d,v31.2d,#64-43 -.inst 0xce9b9dac //xar v12.2d,v13.2d,v27.2d,#64-25 -.inst 0xce9ce26d //xar v13.2d,v19.2d,v28.2d,#64-8 -.inst 0xce9b22f3 //xar v19.2d,v23.2d,v27.2d,#64-56 -.inst 0xce9d5df7 //xar v23.2d,v15.2d,v29.2d,#64-41 - -.inst 0xce9c948f //xar v15.2d,v4.2d,v28.2d,#64-27 - -.inst 0xce9ccb1c //xar v28.2d, v24.2d,v28.2d,#64-14 // D[4]=A[0][4] -.inst 0xce9efab8 //xar v24.2d,v21.2d,v30.2d,#64-2 -.inst 0xce9b2508 //xar v8.2d,v8.2d,v27.2d,#64-55 // A[1][3]=A[4][1] -.inst 0xce9e4e04 //xar v4.2d,v16.2d,v30.2d,#64-45 // A[0][4]=A[1][3] -.inst 0xce9d70b0 //xar v16.2d,v5.2d,v29.2d,#64-36 - -.inst 0xce9b9065 //xar v5.2d,v3.2d,v27.2d,#64-28 - - eor v0.16b,v0.16b,v29.16b - -.inst 0xce9bae5b //xar v27.2d, v18.2d,v27.2d,#64-21 // D[3]=A[0][3] -.inst 0xce9fc623 //xar v3.2d,v17.2d,v31.2d,#64-15 // A[0][3]=A[3][3] -.inst 0xce9ed97e //xar v30.2d, v11.2d,v30.2d,#64-10 // D[1]=A[3][2] -.inst 0xce9fe8ff //xar v31.2d, v7.2d,v31.2d,#64-6 // D[2]=A[2][1] -.inst 0xce9df55d //xar v29.2d, v10.2d,v29.2d,#64-3 // D[0]=A[1][2] - - ////////////////////////////////////////////////// Chi+Iota -.inst 0xce362354 //bcax v20.16b,v26.16b, v22.16b,v8.16b // A[1][3]=A[4][1] -.inst 0xce375915 //bcax v21.16b,v8.16b,v23.16b,v22.16b // A[1][3]=A[4][1] -.inst 0xce385ed6 //bcax v22.16b,v22.16b,v24.16b,v23.16b -.inst 0xce3a62f7 //bcax v23.16b,v23.16b,v26.16b, v24.16b -.inst 0xce286b18 //bcax v24.16b,v24.16b,v8.16b,v26.16b // A[1][3]=A[4][1] - - ld1r {v26.2d},[x10],#8 - -.inst 0xce330fd1 //bcax v17.16b,v30.16b, v19.16b,v3.16b // A[0][3]=A[3][3] -.inst 0xce2f4c72 //bcax v18.16b,v3.16b,v15.16b,v19.16b // A[0][3]=A[3][3] -.inst 0xce303e73 //bcax v19.16b,v19.16b,v16.16b,v15.16b -.inst 0xce3e41ef //bcax v15.16b,v15.16b,v30.16b, v16.16b -.inst 0xce237a10 //bcax v16.16b,v16.16b,v3.16b,v30.16b // A[0][3]=A[3][3] - -.inst 0xce2c7f2a //bcax v10.16b,v25.16b, v12.16b,v31.16b -.inst 0xce2d33eb //bcax v11.16b,v31.16b, v13.16b,v12.16b -.inst 0xce2e358c //bcax v12.16b,v12.16b,v14.16b,v13.16b -.inst 0xce3939ad //bcax v13.16b,v13.16b,v25.16b, v14.16b -.inst 0xce3f65ce //bcax v14.16b,v14.16b,v31.16b, v25.16b - -.inst 0xce2913a7 //bcax v7.16b,v29.16b, v9.16b,v4.16b // A[0][4]=A[1][3] -.inst 0xce252488 //bcax v8.16b,v4.16b,v5.16b,v9.16b // A[0][4]=A[1][3] -.inst 0xce261529 //bcax v9.16b,v9.16b,v6.16b,v5.16b -.inst 0xce3d18a5 //bcax v5.16b,v5.16b,v29.16b, v6.16b -.inst 0xce2474c6 //bcax v6.16b,v6.16b,v4.16b,v29.16b // A[0][4]=A[1][3] - -.inst 0xce207363 //bcax v3.16b,v27.16b, v0.16b,v28.16b -.inst 0xce210384 //bcax v4.16b,v28.16b, v1.16b,v0.16b -.inst 0xce220400 //bcax v0.16b,v0.16b,v2.16b,v1.16b -.inst 0xce3b0821 //bcax v1.16b,v1.16b,v27.16b, v2.16b -.inst 0xce3c6c42 //bcax v2.16b,v2.16b,v28.16b, v27.16b - - eor v0.16b,v0.16b,v26.16b - - tst x10,#255 - bne Loop_ce - - ret -// .size KeccakF1600_ce,.-KeccakF1600_ce - -// .type KeccakF1600_cext,%function -.align 5 -KeccakF1600_cext: -.inst 0xd503233f // paciasp - stp x29,x30,[sp,#-2*8 -64]! - add x29,sp,#0 - stp d8,d9,[sp,#2*8 +0] // per ABI requirement - stp d10,d11,[sp,#2*8 +16] - stp d12,d13,[sp,#2*8 +32] - stp d14,d15,[sp,#2*8 +48] - ldp d0,d1,[x0,#8*0] - ldp d2,d3,[x0,#8*2] - ldp d4,d5,[x0,#8*4] - ldp d6,d7,[x0,#8*6] - ldp d8,d9,[x0,#8*8] - ldp d10,d11,[x0,#8*10] - ldp d12,d13,[x0,#8*12] - ldp d14,d15,[x0,#8*14] - ldp d16,d17,[x0,#8*16] - ldp d18,d19,[x0,#8*18] - ldp d20,d21,[x0,#8*20] - ldp d22,d23,[x0,#8*22] - ldr d24,[x0,#8*24] - adrp x10,iotas - add x10,x10,:lo12:iotas - bl KeccakF1600_ce - ldr x30,[sp,#8] - stp d0,d1,[x0,#8*0] - stp d2,d3,[x0,#8*2] - stp d4,d5,[x0,#8*4] - stp d6,d7,[x0,#8*6] - stp d8,d9,[x0,#8*8] - stp d10,d11,[x0,#8*10] - stp d12,d13,[x0,#8*12] - stp d14,d15,[x0,#8*14] - stp d16,d17,[x0,#8*16] - stp d18,d19,[x0,#8*18] - stp d20,d21,[x0,#8*20] - stp d22,d23,[x0,#8*22] - str d24,[x0,#8*24] - - ldp d8,d9,[sp,#2*8 +0] - ldp d10,d11,[sp,#2*8 +16] - ldp d12,d13,[sp,#2*8 +32] - ldp d14,d15,[sp,#2*8 +48] - ldr x29,[sp],#2*8 +64 -.inst 0xd50323bf // autiasp - ret -// .size KeccakF1600_cext,.-KeccakF1600_cext -.globl SHA3_absorb_cext -// .type SHA3_absorb_cext,%function -.align 5 -SHA3_absorb_cext: -.inst 0xd503233f // paciasp - stp x29,x30,[sp,#-2*8 -64]! - add x29,sp,#0 - stp d8,d9,[sp,#2*8 +0] // per ABI requirement - stp d10,d11,[sp,#2*8 +16] - stp d12,d13,[sp,#2*8 +32] - stp d14,d15,[sp,#2*8 +48] - ldp d0,d1,[x0,#8*0] - ldp d2,d3,[x0,#8*2] - ldp d4,d5,[x0,#8*4] - ldp d6,d7,[x0,#8*6] - ldp d8,d9,[x0,#8*8] - ldp d10,d11,[x0,#8*10] - ldp d12,d13,[x0,#8*12] - ldp d14,d15,[x0,#8*14] - ldp d16,d17,[x0,#8*16] - ldp d18,d19,[x0,#8*18] - ldp d20,d21,[x0,#8*20] - ldp d22,d23,[x0,#8*22] - ldr d24,[x0,#8*24] - b Loop_absorb_ce - -.align 4 -Loop_absorb_ce: - subs x2,x2,x3 // len - bsz - blo Labsorbed_ce - - cmp x3,#104 - ld1 {v27.8b,v28.8b,v29.8b,v30.8b},[x1],#32 - eor v0.16b,v0.16b,v27.16b - eor v1.16b,v1.16b,v28.16b - eor v2.16b,v2.16b,v29.16b - eor v3.16b,v3.16b,v30.16b - ld1 {v27.8b,v28.8b,v29.8b,v30.8b},[x1],#32 - eor v4.16b,v4.16b,v27.16b - eor v5.16b,v5.16b,v28.16b - eor v6.16b,v6.16b,v29.16b - eor v7.16b,v7.16b,v30.16b - ld1 {v31.8b},[x1],#8 // A[1][4] ^= *inp++ - eor v8.16b,v8.16b,v31.16b - blo Lprocess_block_ce - - ld1 {v27.8b,v28.8b,v29.8b,v30.8b},[x1],#32 - eor v9.16b,v9.16b,v27.16b - eor v10.16b,v10.16b,v28.16b - eor v11.16b,v11.16b,v29.16b - eor v12.16b,v12.16b,v30.16b - beq Lprocess_block_ce - - cmp x3,#144 - ld1 {v27.8b,v28.8b,v29.8b,v30.8b},[x1],#32 - eor v13.16b,v13.16b,v27.16b - eor v14.16b,v14.16b,v28.16b - eor v15.16b,v15.16b,v29.16b - eor v16.16b,v16.16b,v30.16b - blo Lprocess_block_ce - - ld1 {v31.8b},[x1],#8 // A[3][3] ^= *inp++ - eor v17.16b,v17.16b,v31.16b - beq Lprocess_block_ce - - ld1 {v28.8b,v29.8b,v30.8b},[x1],#24 - eor v18.16b,v18.16b,v28.16b - eor v19.16b,v19.16b,v29.16b - eor v20.16b,v20.16b,v30.16b - -Lprocess_block_ce: - adrp x10,iotas - add x10,x10,:lo12:iotas - bl KeccakF1600_ce - - b Loop_absorb_ce - -.align 4 -Labsorbed_ce: - stp d0,d1,[x0,#8*0] - stp d2,d3,[x0,#8*2] - stp d4,d5,[x0,#8*4] - stp d6,d7,[x0,#8*6] - stp d8,d9,[x0,#8*8] - stp d10,d11,[x0,#8*10] - stp d12,d13,[x0,#8*12] - stp d14,d15,[x0,#8*14] - stp d16,d17,[x0,#8*16] - stp d18,d19,[x0,#8*18] - stp d20,d21,[x0,#8*20] - stp d22,d23,[x0,#8*22] - str d24,[x0,#8*24] - add x0,x2,x3 // return value - - ldp d8,d9,[sp,#2*8 +0] - ldp d10,d11,[sp,#2*8 +16] - ldp d12,d13,[sp,#2*8 +32] - ldp d14,d15,[sp,#2*8 +48] - ldp x29,x30,[sp],#2*8 +64 -.inst 0xd50323bf // autiasp - ret -// .size SHA3_absorb_cext,.-SHA3_absorb_cext -.globl SHA3_squeeze_cext -// .type SHA3_squeeze_cext,%function -.align 5 -SHA3_squeeze_cext: -.inst 0xd503233f // paciasp - stp x29,x30,[sp,#-2*8]! - add x29,sp,#0 - mov x9,x0 - mov x10,x3 - -Loop_squeeze_ce: - ldr x4,[x9],#8 - cmp x2,#8 - blo Lsqueeze_tail_ce - - - - str x4,[x1],#8 - beq Lsqueeze_done_ce - - sub x2,x2,#8 - subs x10,x10,#8 - bhi Loop_squeeze_ce - - bl KeccakF1600_cext - ldr x30,[sp,#8] - mov x9,x0 - mov x10,x3 - b Loop_squeeze_ce - -.align 4 -Lsqueeze_tail_ce: - strb w4,[x1],#1 - lsr x4,x4,#8 - subs x2,x2,#1 - beq Lsqueeze_done_ce - strb w4,[x1],#1 - lsr x4,x4,#8 - subs x2,x2,#1 - beq Lsqueeze_done_ce - strb w4,[x1],#1 - lsr x4,x4,#8 - subs x2,x2,#1 - beq Lsqueeze_done_ce - strb w4,[x1],#1 - lsr x4,x4,#8 - subs x2,x2,#1 - beq Lsqueeze_done_ce - strb w4,[x1],#1 - lsr x4,x4,#8 - subs x2,x2,#1 - beq Lsqueeze_done_ce - strb w4,[x1],#1 - lsr x4,x4,#8 - subs x2,x2,#1 - beq Lsqueeze_done_ce - strb w4,[x1],#1 - -Lsqueeze_done_ce: - ldr x29,[sp],#2*8 -.inst 0xd50323bf // autiasp - ret -// .size SHA3_squeeze_cext,.-SHA3_squeeze_cext -.byte 75,101,99,99,97,107,45,49,54,48,48,32,97,98,115,111,114,98,32,97,110,100,32,115,113,117,101,101,122,101,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,64,100,111,116,45,97,115,109,0 -.align 2 diff --git a/crates/common/crypto/keccak/keccak1600-armv8-macho.s b/crates/common/crypto/keccak/keccak1600-armv8.s similarity index 100% rename from crates/common/crypto/keccak/keccak1600-armv8-macho.s rename to crates/common/crypto/keccak/keccak1600-armv8.s diff --git a/crates/common/crypto/keccak/mod.rs b/crates/common/crypto/keccak/mod.rs index 79d25eacdd5..f3d5e9c06f8 100644 --- a/crates/common/crypto/keccak/mod.rs +++ b/crates/common/crypto/keccak/mod.rs @@ -1,7 +1,5 @@ -#[cfg(all(target_arch = "aarch64", target_os = "linux"))] -std::arch::global_asm!(include_str!("keccak1600-armv8-elf.s"), options(raw)); -#[cfg(all(target_arch = "aarch64", target_os = "macos"))] -std::arch::global_asm!(include_str!("keccak1600-armv8-macho.s"), options(raw)); +#[cfg(target_arch = "aarch64")] +std::arch::global_asm!(include_str!("keccak1600-armv8.s"), options(raw)); #[cfg(target_arch = "x86_64")] std::arch::global_asm!(include_str!("keccak1600-x86_64.s"), options(att_syntax)); From 16835e07a864d64dce168d8925a812147d767af5 Mon Sep 17 00:00:00 2001 From: Mario Rugiero Date: Wed, 12 Nov 2025 02:25:33 -0300 Subject: [PATCH 2/3] fix(l1,l2): extract ARM symbol handling to Rust Linux and Mac linkers need different syntax to resolve `adr` (actually `adrp+add`) arguments, and name exported symbols differently. This patch makes a few minor changes to the ARM assembly and mod.rs for Keccak to: - Pass a flag indicating which kind of object file will be created; - Use that flag in a macro to use the right symbol resolution syntax; - Pass the exported function symbols to ensure they are correctly found in both OSes. --- .../common/crypto/keccak/keccak1600-armv8.s | 46 +++++++++++-------- crates/common/crypto/keccak/mod.rs | 14 ++++-- 2 files changed, 35 insertions(+), 25 deletions(-) diff --git a/crates/common/crypto/keccak/keccak1600-armv8.s b/crates/common/crypto/keccak/keccak1600-armv8.s index de354ecaba0..df22d9e3dff 100644 --- a/crates/common/crypto/keccak/keccak1600-armv8.s +++ b/crates/common/crypto/keccak/keccak1600-armv8.s @@ -10,6 +10,16 @@ // TODO: this is probably a matter of selecting the right parameter // for the translator. +.macro adr reg, label +.if {elf} +adrp \reg,\label +add \reg,\reg,:lo12:\label +.else +adrp \reg, \label@PAGE +add \reg, \reg, \label@PAGEOFF +.endif +.endm + .align 8 // strategic alignment and padding that allows to use // address value as loop termination condition... .quad 0,0,0,0,0,0,0,0 @@ -241,8 +251,7 @@ KeccakF1600: ldp x22,x23,[x26,#16*11] ldr x24,[x26,#16*12] - adrp x28,iotas@PAGE - add x28,x28,iotas@PAGEOFF + adr x28,iotas bl KeccakF1600_int ldr x26,[sp,#16+2*8] @@ -271,10 +280,10 @@ KeccakF1600: ret // .size KeccakF1600,.-KeccakF1600 -.globl _SHA3_absorb +.globl {SHA3_absorb} // .type SHA3_absorb,%function .align 5 -_SHA3_absorb: +{SHA3_absorb}: .inst 0xd503233f // paciasp stp x29,x30,[sp,#-16*8]! add x29,sp,#0 @@ -433,8 +442,7 @@ Lprocess_block: add x27,x27,x30 str x27,[sp,#16+3*8] // save inp - adrp x28,iotas@PAGE - add x28,x28,iotas@PAGEOFF + adr x28,iotas bl KeccakF1600_int ldr x27,[sp,#16+3*8] // restore arguments @@ -469,10 +477,10 @@ Labsorbed: .inst 0xd50323bf // autiasp ret // .size SHA3_absorb,.-SHA3_absorb -.globl _SHA3_squeeze +.globl {SHA3_squeeze} // .type SHA3_squeeze,%function .align 5 -_SHA3_squeeze: +{SHA3_squeeze}: .inst 0xd503233f // paciasp stp x29,x30,[sp,#-6*8]! add x29,sp,#0 @@ -603,7 +611,7 @@ Loop_ce: .inst 0xce3a62f7 //bcax v23.16b,v23.16b,v26.16b, v24.16b .inst 0xce286b18 //bcax v24.16b,v24.16b,v8.16b,v26.16b // A[1][3]=A[4][1] - ld1r {v26.2d},[x10],#8 + ld1r {{v26.2d}},[x10],#8 .inst 0xce330fd1 //bcax v17.16b,v30.16b, v19.16b,v3.16b // A[0][3]=A[3][3] .inst 0xce2f4c72 //bcax v18.16b,v3.16b,v15.16b,v19.16b // A[0][3]=A[3][3] @@ -660,8 +668,7 @@ KeccakF1600_cext: ldp d20,d21,[x0,#8*20] ldp d22,d23,[x0,#8*22] ldr d24,[x0,#8*24] - adrp x10,iotas@PAGE - add x10,x10,iotas@PAGEOFF + adr x10,iotas bl KeccakF1600_ce ldr x30,[sp,#8] stp d0,d1,[x0,#8*0] @@ -718,21 +725,21 @@ Loop_absorb_ce: blo Labsorbed_ce cmp x3,#104 - ld1 {v27.8b,v28.8b,v29.8b,v30.8b},[x1],#32 + ld1 {{v27.8b,v28.8b,v29.8b,v30.8b}},[x1],#32 eor v0.16b,v0.16b,v27.16b eor v1.16b,v1.16b,v28.16b eor v2.16b,v2.16b,v29.16b eor v3.16b,v3.16b,v30.16b - ld1 {v27.8b,v28.8b,v29.8b,v30.8b},[x1],#32 + ld1 {{v27.8b,v28.8b,v29.8b,v30.8b}},[x1],#32 eor v4.16b,v4.16b,v27.16b eor v5.16b,v5.16b,v28.16b eor v6.16b,v6.16b,v29.16b eor v7.16b,v7.16b,v30.16b - ld1 {v31.8b},[x1],#8 // A[1][4] ^= *inp++ + ld1 {{v31.8b}},[x1],#8 // A[1][4] ^= *inp++ eor v8.16b,v8.16b,v31.16b blo Lprocess_block_ce - ld1 {v27.8b,v28.8b,v29.8b,v30.8b},[x1],#32 + ld1 {{v27.8b,v28.8b,v29.8b,v30.8b}},[x1],#32 eor v9.16b,v9.16b,v27.16b eor v10.16b,v10.16b,v28.16b eor v11.16b,v11.16b,v29.16b @@ -740,25 +747,24 @@ Loop_absorb_ce: beq Lprocess_block_ce cmp x3,#144 - ld1 {v27.8b,v28.8b,v29.8b,v30.8b},[x1],#32 + ld1 {{v27.8b,v28.8b,v29.8b,v30.8b}},[x1],#32 eor v13.16b,v13.16b,v27.16b eor v14.16b,v14.16b,v28.16b eor v15.16b,v15.16b,v29.16b eor v16.16b,v16.16b,v30.16b blo Lprocess_block_ce - ld1 {v31.8b},[x1],#8 // A[3][3] ^= *inp++ + ld1 {{v31.8b}},[x1],#8 // A[3][3] ^= *inp++ eor v17.16b,v17.16b,v31.16b beq Lprocess_block_ce - ld1 {v28.8b,v29.8b,v30.8b},[x1],#24 + ld1 {{v28.8b,v29.8b,v30.8b}},[x1],#24 eor v18.16b,v18.16b,v28.16b eor v19.16b,v19.16b,v29.16b eor v20.16b,v20.16b,v30.16b Lprocess_block_ce: - adrp x10,iotas@PAGE - add x10,x10,iotas@PAGEOFF + adr x10,iotas bl KeccakF1600_ce b Loop_absorb_ce diff --git a/crates/common/crypto/keccak/mod.rs b/crates/common/crypto/keccak/mod.rs index f3d5e9c06f8..6c08e7cd303 100644 --- a/crates/common/crypto/keccak/mod.rs +++ b/crates/common/crypto/keccak/mod.rs @@ -1,5 +1,5 @@ #[cfg(target_arch = "aarch64")] -std::arch::global_asm!(include_str!("keccak1600-armv8.s"), options(raw)); +std::arch::global_asm!(include_str!("keccak1600-armv8.s"), elf=const cfg!(target_os="linux") as u32, SHA3_absorb=sym SHA3_absorb, SHA3_squeeze=sym SHA3_squeeze); #[cfg(target_arch = "x86_64")] std::arch::global_asm!(include_str!("keccak1600-x86_64.s"), options(att_syntax)); @@ -11,12 +11,16 @@ mod imp { #[derive(Default, Clone, Copy)] #[repr(transparent)] - struct State([u64; 25]); + pub(super) struct State([u64; 25]); unsafe extern "C" { - #[link_name = "SHA3_absorb"] - unsafe fn SHA3_absorb(state: *mut State, buf: *const u8, len: usize, r: usize) -> usize; - unsafe fn SHA3_squeeze(state: *mut State, buf: *mut u8, len: usize, r: usize); + pub(super) unsafe fn SHA3_absorb( + state: *mut State, + buf: *const u8, + len: usize, + r: usize, + ) -> usize; + pub(super) unsafe fn SHA3_squeeze(state: *mut State, buf: *mut u8, len: usize, r: usize); } pub fn keccak_hash(data: impl AsRef<[u8]>) -> [u8; 32] { From 719810241a0283ffb9565bd84033bc96290541d9 Mon Sep 17 00:00:00 2001 From: Mario Rugiero Date: Wed, 12 Nov 2025 11:18:59 -0300 Subject: [PATCH 3/3] document the changes --- crates/common/crypto/keccak/keccak1600-armv8.s | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/crates/common/crypto/keccak/keccak1600-armv8.s b/crates/common/crypto/keccak/keccak1600-armv8.s index df22d9e3dff..68d546ba706 100644 --- a/crates/common/crypto/keccak/keccak1600-armv8.s +++ b/crates/common/crypto/keccak/keccak1600-armv8.s @@ -4,11 +4,11 @@ // Rust. // - Removed dots from all local labels for correct detection in the frontend. // Reason: `.L` local labels are ELF-specific. -// - Replaced instance of `adr x??,label` by `adrp x??,label@PAGE` followed by -// `add x??,x??,label@PAGEOFF`. -// -// TODO: this is probably a matter of selecting the right parameter -// for the translator. +// - Replaced instance of `adr x??,label` by a macro constructing calls to `adrp x??,label@PAGE` followed by +// `add x??,x??,label@PAGEOFF` or `adrp x??,label` followed by `add x??,x??,:lo12:label` depending on the target +// OS. +// - Replaced existing instances of curly braces by pairs of them, to avoid confusing with Rust's templates. +// - Replaced exported symbols by parameters representing their name mangled versions. .macro adr reg, label .if {elf}