From 75408b28bf6461e6d9e2ef94352620f4470ca60f Mon Sep 17 00:00:00 2001 From: Mario Rugiero Date: Tue, 4 Nov 2025 20:10:38 -0300 Subject: [PATCH 01/40] wip: integrate cryptogam assembly versions of keccak --- crates/common/crypto/keccak/README.md | 71 ++ .../crypto/keccak/keccak1600-armv8-sha3.s | 842 +++++++++++++++++ .../common/crypto/keccak/keccak1600-armv8.s | 841 +++++++++++++++++ .../common/crypto/keccak/keccak1600-x86_64.s | 536 +++++++++++ crates/common/crypto/keccak/mod.rs | 848 ++++++++++++++++++ crates/common/crypto/lib.rs | 1 + 6 files changed, 3139 insertions(+) create mode 100644 crates/common/crypto/keccak/README.md create mode 100644 crates/common/crypto/keccak/keccak1600-armv8-sha3.s create mode 100644 crates/common/crypto/keccak/keccak1600-armv8.s create mode 100644 crates/common/crypto/keccak/keccak1600-x86_64.s create mode 100644 crates/common/crypto/keccak/mod.rs diff --git a/crates/common/crypto/keccak/README.md b/crates/common/crypto/keccak/README.md new file mode 100644 index 00000000000..5fbd2630205 --- /dev/null +++ b/crates/common/crypto/keccak/README.md @@ -0,0 +1,71 @@ +# Keccak Module + +A thin layer over assembly implementations of (intentionally few) optimized Keccak for ARMv8 and x86_64. +The code is adapted from the output of the scripts written by the [cryptogams](https://github.com/dot-asm/cryptogams) project. See [#copyright-notice] for a copy of the licence. You can find the original text at [their repository](https://github.com/dot-asm/cryptogams/blob/680f98c1765a7cb89c193db169ed048599f92186/LICENSE). + +> [!NOTE] +> This library is not endorsed nor supported by the original _Cryptogams_ team. +> The code has been modified to integrate to Rust in the simplest possible way and to avoid the need of extra toolchains to build the project. + +## Goals + +The goal of this module is to have an efficient implementation of Keccak256 for Ethrex, reusing audited code as much as possible, while keeping complexity as low as possible. +To achieve low complexity, we leave explicitly out of scope implementing `Digest`, having implementations for all variants of CPUs (we keep a selected subset of those provided by _Cryptogams_) and compile-time translation of source files. +The module exposes a single function: +```rust +pub fn keccak_hash(data: impl AsRef<[u8]>) -> [u8; 32]; +``` +There are no feature flags. If building for `x86_64`, it will link an optimized assembly implementation. Because it uses generic `x86_64` code, no fallback is needed. If building for `ARMv8`, it will link an optimized implementation using on `sha3` instructions and a fallback one using generic `ARMv8` instructions. This detection is performed at runtime, i.e. dynamic dispatch. +For other architectures, it falls back to `tiny_keccak`. This is specially necessary for proving, as the ZKVMs are RISC-V based, but they are not guaranteed to support all of its extensions. We may revisit adding assembly versions for them at a later time. + +## Code Generation + +The implementation is currently rather manual: +- Code is generated by running the scripts in the _Cryptogams_ project (currently at commit `680f98c1765a7cb89c193db169ed048599f92186`), as follows: +```shell +$ cd cryptogams/arm +$ ./keccak-1600-armv8.pl linux64 keccak1600-armv8.s +$ ./keccak-1600-armv8.pl linux64+sha3 keccak1600-armv8.s +$ cd ../x86_64 +$ ./keccak1600-x86_64.pl linux64 keccak1600-x86_64.s +``` +- With the code generated, we manually copy the functions defined to the matching `naked_asm` blocks inside the module. + +## Copyright Notice + +Copyright (c) 2006, CRYPTOGAMS by +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + + * Redistributions of source code must retain copyright notices, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials + provided with the distribution. + + * Neither the name of the CRYPTOGAMS nor the names of its + copyright holder and contributors may be used to endorse or + promote products derived from this software without specific + prior written permission. + +ALTERNATIVELY, provided that this notice is retained in full, this +product may be distributed under the terms of the GNU General Public +License (GPL), in which case the provisions of the GPL apply INSTEAD OF +those given above. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/crates/common/crypto/keccak/keccak1600-armv8-sha3.s b/crates/common/crypto/keccak/keccak1600-armv8-sha3.s new file mode 100644 index 00000000000..b6a60da5e92 --- /dev/null +++ b/crates/common/crypto/keccak/keccak1600-armv8-sha3.s @@ -0,0 +1,842 @@ +.arch armv8.2-a+sha3 +.text + +.align 8 // strategic alignment and padding that allows to use + // address value as loop termination condition... +.quad 0,0,0,0,0,0,0,0 +.type iotas,%object +iotas: +.quad 0x0000000000000001 +.quad 0x0000000000008082 +.quad 0x800000000000808a +.quad 0x8000000080008000 +.quad 0x000000000000808b +.quad 0x0000000080000001 +.quad 0x8000000080008081 +.quad 0x8000000000008009 +.quad 0x000000000000008a +.quad 0x0000000000000088 +.quad 0x0000000080008009 +.quad 0x000000008000000a +.Liotas12: +.quad 0x000000008000808b +.quad 0x800000000000008b +.quad 0x8000000000008089 +.quad 0x8000000000008003 +.quad 0x8000000000008002 +.quad 0x8000000000000080 +.quad 0x000000000000800a +.quad 0x800000008000000a +.quad 0x8000000080008081 +.quad 0x8000000000008080 +.quad 0x0000000080000001 +.quad 0x8000000080008008 +.size iotas,.-iotas +.type KeccakF1600_int,%function +.align 5 +KeccakF1600_int: +.inst 0xd503233f // paciasp + stp x28,x30,[sp,#16] // stack is pre-allocated + b .Loop +.align 4 +.Loop: + ////////////////////////////////////////// Theta + eor x26,x0,x5 + stp x4,x9,[sp,#0] // offload pair... + eor x27,x1,x6 + eor x28,x2,x7 + eor x30,x3,x8 + eor x4,x4,x9 + eor x26,x26,x10 + eor x27,x27,x11 + eor x28,x28,x12 + eor x30,x30,x13 + eor x4,x4,x14 + eor x26,x26,x15 + eor x27,x27,x16 + eor x28,x28,x17 + eor x30,x30,x25 + eor x4,x4,x19 + eor x26,x26,x20 + eor x28,x28,x22 + eor x27,x27,x21 + eor x30,x30,x23 + eor x4,x4,x24 + + eor x9,x26,x28,ror#63 + + eor x1,x1,x9 + eor x6,x6,x9 + eor x11,x11,x9 + eor x16,x16,x9 + eor x21,x21,x9 + + eor x9,x27,x30,ror#63 + eor x28,x28,x4,ror#63 + eor x30,x30,x26,ror#63 + eor x4,x4,x27,ror#63 + + eor x27, x2,x9 // mov x27,x2 + eor x7,x7,x9 + eor x12,x12,x9 + eor x17,x17,x9 + eor x22,x22,x9 + + eor x0,x0,x4 + eor x5,x5,x4 + eor x10,x10,x4 + eor x15,x15,x4 + eor x20,x20,x4 + ldp x4,x9,[sp,#0] // re-load offloaded data + eor x26, x3,x28 // mov x26,x3 + eor x8,x8,x28 + eor x13,x13,x28 + eor x25,x25,x28 + eor x23,x23,x28 + + eor x28, x4,x30 // mov x28,x4 + eor x9,x9,x30 + eor x14,x14,x30 + eor x19,x19,x30 + eor x24,x24,x30 + + ////////////////////////////////////////// Rho+Pi + mov x30,x1 + ror x1,x6,#64-44 + //mov x27,x2 + ror x2,x12,#64-43 + //mov x26,x3 + ror x3,x25,#64-21 // ? + //mov x28,x4 + ror x4,x24,#64-14 // ? + + ror x6,x9,#64-20 // ? + ror x12,x13,#64-25 // ? + ror x25,x17,#64-15 + ror x24,x21,#64-2 // ? + + ror x9,x22,#64-61 + ror x13,x19,#64-8 + ror x17,x11,#64-10 + ror x21,x8,#64-55 + + ror x22,x14,#64-39 + ror x19,x23,#64-56 + ror x11,x7,#64-6 // ? + ror x8,x16,#64-45 + + ror x14,x20,#64-18 + ror x23,x15,#64-41 + ror x7,x10,#64-3 + ror x16,x5,#64-36 // ? + + ror x5,x26,#64-28 // ? + ror x10,x30,#64-1 + ror x15,x28,#64-27 // ? + ror x20,x27,#64-62 // ? + + ////////////////////////////////////////// Chi+Iota + bic x26,x2,x1 + bic x27,x3,x2 + bic x28,x0,x4 + bic x30,x1,x0 + eor x0,x0,x26 + bic x26,x4,x3 + eor x1,x1,x27 + ldr x27,[sp,#16] + eor x3,x3,x28 + eor x4,x4,x30 + eor x2,x2,x26 + ldr x30,[x27],#8 // Iota[i++] + + bic x26,x7,x6 + tst x27,#255 // are we done? + str x27,[sp,#16] + bic x27,x8,x7 + bic x28,x5,x9 + eor x0,x0,x30 // A[0][0] ^= Iota + bic x30,x6,x5 + eor x5,x5,x26 + bic x26,x9,x8 + eor x6,x6,x27 + eor x8,x8,x28 + eor x9,x9,x30 + eor x7,x7,x26 + + bic x26,x12,x11 + bic x27,x13,x12 + bic x28,x10,x14 + bic x30,x11,x10 + eor x10,x10,x26 + bic x26,x14,x13 + eor x11,x11,x27 + eor x13,x13,x28 + eor x14,x14,x30 + eor x12,x12,x26 + + bic x26,x17,x16 + bic x27,x25,x17 + bic x28,x15,x19 + bic x30,x16,x15 + eor x15,x15,x26 + bic x26,x19,x25 + eor x16,x16,x27 + eor x25,x25,x28 + eor x19,x19,x30 + eor x17,x17,x26 + + bic x26,x22,x21 + bic x27,x23,x22 + bic x28,x20,x24 + bic x30,x21,x20 + eor x20,x20,x26 + bic x26,x24,x23 + eor x21,x21,x27 + eor x23,x23,x28 + eor x24,x24,x30 + eor x22,x22,x26 + + bne .Loop + + ldr x30,[sp,#16+__SIZEOF_POINTER__] +.inst 0xd50323bf // autiasp + ret +.size KeccakF1600_int,.-KeccakF1600_int + +.type KeccakF1600,%function +.align 5 +KeccakF1600: +.inst 0xd503233f // paciasp + stp x29,x30,[sp,#-16*__SIZEOF_POINTER__]! + add x29,sp,#0 + stp x19,x20,[sp,#2*__SIZEOF_POINTER__] + stp x21,x22,[sp,#4*__SIZEOF_POINTER__] + stp x23,x24,[sp,#6*__SIZEOF_POINTER__] + stp x25,x26,[sp,#8*__SIZEOF_POINTER__] + stp x27,x28,[sp,#10*__SIZEOF_POINTER__] + sub sp,sp,#16+4*__SIZEOF_POINTER__ + + str x0,[sp,#16+2*__SIZEOF_POINTER__] // offload argument + mov x26,x0 + ldp x0,x1,[x0,#16*0] + ldp x2,x3,[x26,#16*1] + ldp x4,x5,[x26,#16*2] + ldp x6,x7,[x26,#16*3] + ldp x8,x9,[x26,#16*4] + ldp x10,x11,[x26,#16*5] + ldp x12,x13,[x26,#16*6] + ldp x14,x15,[x26,#16*7] + ldp x16,x17,[x26,#16*8] + ldp x25,x19,[x26,#16*9] + ldp x20,x21,[x26,#16*10] + ldp x22,x23,[x26,#16*11] + ldr x24,[x26,#16*12] + + adr x28,iotas + bl KeccakF1600_int + + ldr x26,[sp,#16+2*__SIZEOF_POINTER__] + stp x0,x1,[x26,#16*0] + stp x2,x3,[x26,#16*1] + stp x4,x5,[x26,#16*2] + stp x6,x7,[x26,#16*3] + stp x8,x9,[x26,#16*4] + stp x10,x11,[x26,#16*5] + stp x12,x13,[x26,#16*6] + stp x14,x15,[x26,#16*7] + stp x16,x17,[x26,#16*8] + stp x25,x19,[x26,#16*9] + stp x20,x21,[x26,#16*10] + stp x22,x23,[x26,#16*11] + str x24,[x26,#16*12] + + ldp x19,x20,[x29,#2*__SIZEOF_POINTER__] + add sp,sp,#16+4*__SIZEOF_POINTER__ + ldp x21,x22,[x29,#4*__SIZEOF_POINTER__] + ldp x23,x24,[x29,#6*__SIZEOF_POINTER__] + ldp x25,x26,[x29,#8*__SIZEOF_POINTER__] + ldp x27,x28,[x29,#10*__SIZEOF_POINTER__] + ldp x29,x30,[sp],#16*__SIZEOF_POINTER__ +.inst 0xd50323bf // autiasp + ret +.size KeccakF1600,.-KeccakF1600 + +.globl SHA3_absorb +.type SHA3_absorb,%function +.align 5 +SHA3_absorb: +.inst 0xd503233f // paciasp + stp x29,x30,[sp,#-16*__SIZEOF_POINTER__]! + add x29,sp,#0 + stp x19,x20,[sp,#2*__SIZEOF_POINTER__] + stp x21,x22,[sp,#4*__SIZEOF_POINTER__] + stp x23,x24,[sp,#6*__SIZEOF_POINTER__] + stp x25,x26,[sp,#8*__SIZEOF_POINTER__] + stp x27,x28,[sp,#10*__SIZEOF_POINTER__] + sub sp,sp,#16+4*__SIZEOF_POINTER__+16 + + stp x0,x1,[sp,#16+2*__SIZEOF_POINTER__] // offload arguments + stp x2,x3,[sp,#16+4*__SIZEOF_POINTER__] + + mov x26,x0 // uint64_t A[5][5] + mov x27,x1 // const void *inp + mov x28,x2 // size_t len + mov x30,x3 // size_t bsz + ldp x0,x1,[x26,#16*0] + ldp x2,x3,[x26,#16*1] + ldp x4,x5,[x26,#16*2] + ldp x6,x7,[x26,#16*3] + ldp x8,x9,[x26,#16*4] + ldp x10,x11,[x26,#16*5] + ldp x12,x13,[x26,#16*6] + ldp x14,x15,[x26,#16*7] + ldp x16,x17,[x26,#16*8] + ldp x25,x19,[x26,#16*9] + ldp x20,x21,[x26,#16*10] + ldp x22,x23,[x26,#16*11] + ldr x24,[x26,#16*12] + b .Loop_absorb + +.align 4 +.Loop_absorb: + subs x26,x28,x30 // len - bsz + blo .Labsorbed + + str x26,[sp,#16+4*__SIZEOF_POINTER__] // save len - bsz + cmp x30,#104 + ldr x26,[x27,#0] // A[0][0] ^= *inp++ +#ifdef __AARCH64EB__ + rev x26,x26 +#endif + eor x0,x0,x26 + ldr x26,[x27,#8] // A[0][1] ^= *inp++ +#ifdef __AARCH64EB__ + rev x26,x26 +#endif + eor x1,x1,x26 + ldr x26,[x27,#16] // A[0][2] ^= *inp++ +#ifdef __AARCH64EB__ + rev x26,x26 +#endif + eor x2,x2,x26 + ldr x26,[x27,#24] // A[0][3] ^= *inp++ +#ifdef __AARCH64EB__ + rev x26,x26 +#endif + eor x3,x3,x26 + ldr x26,[x27,#32] // A[0][4] ^= *inp++ +#ifdef __AARCH64EB__ + rev x26,x26 +#endif + eor x4,x4,x26 + ldr x26,[x27,#40] // A[1][0] ^= *inp++ +#ifdef __AARCH64EB__ + rev x26,x26 +#endif + eor x5,x5,x26 + ldr x26,[x27,#48] // A[1][1] ^= *inp++ +#ifdef __AARCH64EB__ + rev x26,x26 +#endif + eor x6,x6,x26 + ldr x26,[x27,#56] // A[1][2] ^= *inp++ +#ifdef __AARCH64EB__ + rev x26,x26 +#endif + eor x7,x7,x26 + ldr x26,[x27,#64] // A[1][3] ^= *inp++ +#ifdef __AARCH64EB__ + rev x26,x26 +#endif + eor x8,x8,x26 + blo .Lprocess_block + + ldr x26,[x27,#72] // A[1][4] ^= *inp++ +#ifdef __AARCH64EB__ + rev x26,x26 +#endif + eor x9,x9,x26 + ldr x26,[x27,#80] // A[2][0] ^= *inp++ +#ifdef __AARCH64EB__ + rev x26,x26 +#endif + eor x10,x10,x26 + ldr x26,[x27,#88] // A[2][1] ^= *inp++ +#ifdef __AARCH64EB__ + rev x26,x26 +#endif + eor x11,x11,x26 + ldr x26,[x27,#96] // A[2][2] ^= *inp++ +#ifdef __AARCH64EB__ + rev x26,x26 +#endif + eor x12,x12,x26 + beq .Lprocess_block + + cmp x30,#144 + ldr x26,[x27,#104] // A[2][3] ^= *inp++ +#ifdef __AARCH64EB__ + rev x26,x26 +#endif + eor x13,x13,x26 + ldr x26,[x27,#112] // A[2][4] ^= *inp++ +#ifdef __AARCH64EB__ + rev x26,x26 +#endif + eor x14,x14,x26 + ldr x26,[x27,#120] // A[3][0] ^= *inp++ +#ifdef __AARCH64EB__ + rev x26,x26 +#endif + eor x15,x15,x26 + ldr x26,[x27,#128] // A[3][1] ^= *inp++ +#ifdef __AARCH64EB__ + rev x26,x26 +#endif + eor x16,x16,x26 + blo .Lprocess_block + + ldr x26,[x27,#136] // A[3][2] ^= *inp++ +#ifdef __AARCH64EB__ + rev x26,x26 +#endif + eor x17,x17,x26 + beq .Lprocess_block + + ldr x26,[x27,#144] // A[3][3] ^= *inp++ +#ifdef __AARCH64EB__ + rev x26,x26 +#endif + eor x25,x25,x26 + ldr x26,[x27,#152] // A[3][4] ^= *inp++ +#ifdef __AARCH64EB__ + rev x26,x26 +#endif + eor x19,x19,x26 + ldr x26,[x27,#160] // A[4][0] ^= *inp++ +#ifdef __AARCH64EB__ + rev x26,x26 +#endif + eor x20,x20,x26 + +.Lprocess_block: + add x27,x27,x30 + str x27,[sp,#16+3*__SIZEOF_POINTER__] // save inp + + adr x28,iotas + bl KeccakF1600_int + + ldr x27,[sp,#16+3*__SIZEOF_POINTER__] // restore arguments + ldp x28,x30,[sp,#16+4*__SIZEOF_POINTER__] + b .Loop_absorb + +.align 4 +.Labsorbed: + ldr x27,[sp,#16+2*__SIZEOF_POINTER__] + stp x0,x1,[x27,#16*0] + stp x2,x3,[x27,#16*1] + stp x4,x5,[x27,#16*2] + stp x6,x7,[x27,#16*3] + stp x8,x9,[x27,#16*4] + stp x10,x11,[x27,#16*5] + stp x12,x13,[x27,#16*6] + stp x14,x15,[x27,#16*7] + stp x16,x17,[x27,#16*8] + stp x25,x19,[x27,#16*9] + stp x20,x21,[x27,#16*10] + stp x22,x23,[x27,#16*11] + str x24,[x27,#16*12] + + mov x0,x28 // return value + ldp x19,x20,[x29,#2*__SIZEOF_POINTER__] + add sp,sp,#16+4*__SIZEOF_POINTER__+16 + ldp x21,x22,[x29,#4*__SIZEOF_POINTER__] + ldp x23,x24,[x29,#6*__SIZEOF_POINTER__] + ldp x25,x26,[x29,#8*__SIZEOF_POINTER__] + ldp x27,x28,[x29,#10*__SIZEOF_POINTER__] + ldp x29,x30,[sp],#16*__SIZEOF_POINTER__ +.inst 0xd50323bf // autiasp + ret +.size SHA3_absorb,.-SHA3_absorb +.globl SHA3_squeeze +.type SHA3_squeeze,%function +.align 5 +SHA3_squeeze: +.inst 0xd503233f // paciasp + stp x29,x30,[sp,#-6*__SIZEOF_POINTER__]! + add x29,sp,#0 + stp x19,x20,[sp,#2*__SIZEOF_POINTER__] + stp x21,x22,[sp,#4*__SIZEOF_POINTER__] + + mov x19,x0 // put aside arguments + mov x20,x1 + mov x21,x2 + mov x22,x3 + +.Loop_squeeze: + ldr x4,[x0],#8 + cmp x21,#8 + blo .Lsqueeze_tail +#ifdef __AARCH64EB__ + rev x4,x4 +#endif + str x4,[x20],#8 + subs x21,x21,#8 + beq .Lsqueeze_done + + subs x3,x3,#8 + bhi .Loop_squeeze + + mov x0,x19 + bl KeccakF1600 + mov x0,x19 + mov x3,x22 + b .Loop_squeeze + +.align 4 +.Lsqueeze_tail: + strb w4,[x20],#1 + lsr x4,x4,#8 + subs x21,x21,#1 + beq .Lsqueeze_done + strb w4,[x20],#1 + lsr x4,x4,#8 + subs x21,x21,#1 + beq .Lsqueeze_done + strb w4,[x20],#1 + lsr x4,x4,#8 + subs x21,x21,#1 + beq .Lsqueeze_done + strb w4,[x20],#1 + lsr x4,x4,#8 + subs x21,x21,#1 + beq .Lsqueeze_done + strb w4,[x20],#1 + lsr x4,x4,#8 + subs x21,x21,#1 + beq .Lsqueeze_done + strb w4,[x20],#1 + lsr x4,x4,#8 + subs x21,x21,#1 + beq .Lsqueeze_done + strb w4,[x20],#1 + +.Lsqueeze_done: + ldp x19,x20,[sp,#2*__SIZEOF_POINTER__] + ldp x21,x22,[sp,#4*__SIZEOF_POINTER__] + ldp x29,x30,[sp],#6*__SIZEOF_POINTER__ +.inst 0xd50323bf // autiasp + ret +.size SHA3_squeeze,.-SHA3_squeeze +.type KeccakF1600_ce,%function +.align 5 +KeccakF1600_ce: +.Loop_ce: + ////////////////////////////////////////////////// Theta + eor3 v25.16b,v20.16b,v15.16b,v10.16b + eor3 v26.16b,v21.16b,v16.16b,v11.16b + eor3 v27.16b,v22.16b,v17.16b,v12.16b + eor3 v28.16b,v23.16b,v18.16b,v13.16b + eor3 v29.16b,v24.16b,v19.16b,v14.16b + eor3 v25.16b,v25.16b, v5.16b,v0.16b + eor3 v26.16b,v26.16b, v6.16b,v1.16b + eor3 v27.16b,v27.16b, v7.16b,v2.16b + eor3 v28.16b,v28.16b, v8.16b,v3.16b + eor3 v29.16b,v29.16b, v9.16b,v4.16b + + rax1 v30.2d,v25.2d,v27.2d // D[1] + rax1 v31.2d,v26.2d,v28.2d // D[2] + rax1 v27.2d,v27.2d,v29.2d // D[3] + rax1 v28.2d,v28.2d,v25.2d // D[4] + rax1 v29.2d,v29.2d,v26.2d // D[0] + + ////////////////////////////////////////////////// Theta+Rho+Pi + xar v25.2d, v1.2d,v30.2d,#64-1 // C[0]=A[2][0] + + xar v1.2d,v6.2d,v30.2d,#64-44 + xar v6.2d,v9.2d,v28.2d,#64-20 + xar v9.2d,v22.2d,v31.2d,#64-61 + xar v22.2d,v14.2d,v28.2d,#64-39 + xar v14.2d,v20.2d,v29.2d,#64-18 + + xar v26.2d, v2.2d,v31.2d,#64-62 // C[1]=A[4][0] + + xar v2.2d,v12.2d,v31.2d,#64-43 + xar v12.2d,v13.2d,v27.2d,#64-25 + xar v13.2d,v19.2d,v28.2d,#64-8 + xar v19.2d,v23.2d,v27.2d,#64-56 + xar v23.2d,v15.2d,v29.2d,#64-41 + + xar v15.2d,v4.2d,v28.2d,#64-27 + + xar v28.2d, v24.2d,v28.2d,#64-14 // D[4]=A[0][4] + xar v24.2d,v21.2d,v30.2d,#64-2 + xar v8.2d,v8.2d,v27.2d,#64-55 // A[1][3]=A[4][1] + xar v4.2d,v16.2d,v30.2d,#64-45 // A[0][4]=A[1][3] + xar v16.2d,v5.2d,v29.2d,#64-36 + + xar v5.2d,v3.2d,v27.2d,#64-28 + + eor v0.16b,v0.16b,v29.16b + + xar v27.2d, v18.2d,v27.2d,#64-21 // D[3]=A[0][3] + xar v3.2d,v17.2d,v31.2d,#64-15 // A[0][3]=A[3][3] + xar v30.2d, v11.2d,v30.2d,#64-10 // D[1]=A[3][2] + xar v31.2d, v7.2d,v31.2d,#64-6 // D[2]=A[2][1] + xar v29.2d, v10.2d,v29.2d,#64-3 // D[0]=A[1][2] + + ////////////////////////////////////////////////// Chi+Iota + bcax v20.16b,v26.16b, v22.16b,v8.16b // A[1][3]=A[4][1] + bcax v21.16b,v8.16b,v23.16b,v22.16b // A[1][3]=A[4][1] + bcax v22.16b,v22.16b,v24.16b,v23.16b + bcax v23.16b,v23.16b,v26.16b, v24.16b + bcax v24.16b,v24.16b,v8.16b,v26.16b // A[1][3]=A[4][1] + + ld1r {v26.2d},[x10],#8 + + bcax v17.16b,v30.16b, v19.16b,v3.16b // A[0][3]=A[3][3] + bcax v18.16b,v3.16b,v15.16b,v19.16b // A[0][3]=A[3][3] + bcax v19.16b,v19.16b,v16.16b,v15.16b + bcax v15.16b,v15.16b,v30.16b, v16.16b + bcax v16.16b,v16.16b,v3.16b,v30.16b // A[0][3]=A[3][3] + + bcax v10.16b,v25.16b, v12.16b,v31.16b + bcax v11.16b,v31.16b, v13.16b,v12.16b + bcax v12.16b,v12.16b,v14.16b,v13.16b + bcax v13.16b,v13.16b,v25.16b, v14.16b + bcax v14.16b,v14.16b,v31.16b, v25.16b + + bcax v7.16b,v29.16b, v9.16b,v4.16b // A[0][4]=A[1][3] + bcax v8.16b,v4.16b,v5.16b,v9.16b // A[0][4]=A[1][3] + bcax v9.16b,v9.16b,v6.16b,v5.16b + bcax v5.16b,v5.16b,v29.16b, v6.16b + bcax v6.16b,v6.16b,v4.16b,v29.16b // A[0][4]=A[1][3] + + bcax v3.16b,v27.16b, v0.16b,v28.16b + bcax v4.16b,v28.16b, v1.16b,v0.16b + bcax v0.16b,v0.16b,v2.16b,v1.16b + bcax v1.16b,v1.16b,v27.16b, v2.16b + bcax v2.16b,v2.16b,v28.16b, v27.16b + + eor v0.16b,v0.16b,v26.16b + + tst x10,#255 + bne .Loop_ce + + ret +.size KeccakF1600_ce,.-KeccakF1600_ce + +.type KeccakF1600_cext,%function +.align 5 +KeccakF1600_cext: +.inst 0xd503233f // paciasp + stp x29,x30,[sp,#-2*__SIZEOF_POINTER__-64]! + add x29,sp,#0 + stp d8,d9,[sp,#2*__SIZEOF_POINTER__+0] // per ABI requirement + stp d10,d11,[sp,#2*__SIZEOF_POINTER__+16] + stp d12,d13,[sp,#2*__SIZEOF_POINTER__+32] + stp d14,d15,[sp,#2*__SIZEOF_POINTER__+48] + ldp d0,d1,[x0,#8*0] + ldp d2,d3,[x0,#8*2] + ldp d4,d5,[x0,#8*4] + ldp d6,d7,[x0,#8*6] + ldp d8,d9,[x0,#8*8] + ldp d10,d11,[x0,#8*10] + ldp d12,d13,[x0,#8*12] + ldp d14,d15,[x0,#8*14] + ldp d16,d17,[x0,#8*16] + ldp d18,d19,[x0,#8*18] + ldp d20,d21,[x0,#8*20] + ldp d22,d23,[x0,#8*22] + ldr d24,[x0,#8*24] + adr x10,iotas + bl KeccakF1600_ce + ldr x30,[sp,#__SIZEOF_POINTER__] + stp d0,d1,[x0,#8*0] + stp d2,d3,[x0,#8*2] + stp d4,d5,[x0,#8*4] + stp d6,d7,[x0,#8*6] + stp d8,d9,[x0,#8*8] + stp d10,d11,[x0,#8*10] + stp d12,d13,[x0,#8*12] + stp d14,d15,[x0,#8*14] + stp d16,d17,[x0,#8*16] + stp d18,d19,[x0,#8*18] + stp d20,d21,[x0,#8*20] + stp d22,d23,[x0,#8*22] + str d24,[x0,#8*24] + + ldp d8,d9,[sp,#2*__SIZEOF_POINTER__+0] + ldp d10,d11,[sp,#2*__SIZEOF_POINTER__+16] + ldp d12,d13,[sp,#2*__SIZEOF_POINTER__+32] + ldp d14,d15,[sp,#2*__SIZEOF_POINTER__+48] + ldr x29,[sp],#2*__SIZEOF_POINTER__+64 +.inst 0xd50323bf // autiasp + ret +.size KeccakF1600_cext,.-KeccakF1600_cext +.globl SHA3_absorb_cext +.type SHA3_absorb_cext,%function +.align 5 +SHA3_absorb_cext: +.inst 0xd503233f // paciasp + stp x29,x30,[sp,#-2*__SIZEOF_POINTER__-64]! + add x29,sp,#0 + stp d8,d9,[sp,#2*__SIZEOF_POINTER__+0] // per ABI requirement + stp d10,d11,[sp,#2*__SIZEOF_POINTER__+16] + stp d12,d13,[sp,#2*__SIZEOF_POINTER__+32] + stp d14,d15,[sp,#2*__SIZEOF_POINTER__+48] + ldp d0,d1,[x0,#8*0] + ldp d2,d3,[x0,#8*2] + ldp d4,d5,[x0,#8*4] + ldp d6,d7,[x0,#8*6] + ldp d8,d9,[x0,#8*8] + ldp d10,d11,[x0,#8*10] + ldp d12,d13,[x0,#8*12] + ldp d14,d15,[x0,#8*14] + ldp d16,d17,[x0,#8*16] + ldp d18,d19,[x0,#8*18] + ldp d20,d21,[x0,#8*20] + ldp d22,d23,[x0,#8*22] + ldr d24,[x0,#8*24] + b .Loop_absorb_ce + +.align 4 +.Loop_absorb_ce: + subs x2,x2,x3 // len - bsz + blo .Labsorbed_ce + + cmp x3,#104 + ld1 {v27.8b,v28.8b,v29.8b,v30.8b},[x1],#32 + eor v0.16b,v0.16b,v27.16b + eor v1.16b,v1.16b,v28.16b + eor v2.16b,v2.16b,v29.16b + eor v3.16b,v3.16b,v30.16b + ld1 {v27.8b,v28.8b,v29.8b,v30.8b},[x1],#32 + eor v4.16b,v4.16b,v27.16b + eor v5.16b,v5.16b,v28.16b + eor v6.16b,v6.16b,v29.16b + eor v7.16b,v7.16b,v30.16b + ld1 {v31.8b},[x1],#8 // A[1][4] ^= *inp++ + eor v8.16b,v8.16b,v31.16b + blo .Lprocess_block_ce + + ld1 {v27.8b,v28.8b,v29.8b,v30.8b},[x1],#32 + eor v9.16b,v9.16b,v27.16b + eor v10.16b,v10.16b,v28.16b + eor v11.16b,v11.16b,v29.16b + eor v12.16b,v12.16b,v30.16b + beq .Lprocess_block_ce + + cmp x3,#144 + ld1 {v27.8b,v28.8b,v29.8b,v30.8b},[x1],#32 + eor v13.16b,v13.16b,v27.16b + eor v14.16b,v14.16b,v28.16b + eor v15.16b,v15.16b,v29.16b + eor v16.16b,v16.16b,v30.16b + blo .Lprocess_block_ce + + ld1 {v31.8b},[x1],#8 // A[3][3] ^= *inp++ + eor v17.16b,v17.16b,v31.16b + beq .Lprocess_block_ce + + ld1 {v28.8b,v29.8b,v30.8b},[x1],#24 + eor v18.16b,v18.16b,v28.16b + eor v19.16b,v19.16b,v29.16b + eor v20.16b,v20.16b,v30.16b + +.Lprocess_block_ce: + adr x10,iotas + bl KeccakF1600_ce + + b .Loop_absorb_ce + +.align 4 +.Labsorbed_ce: + stp d0,d1,[x0,#8*0] + stp d2,d3,[x0,#8*2] + stp d4,d5,[x0,#8*4] + stp d6,d7,[x0,#8*6] + stp d8,d9,[x0,#8*8] + stp d10,d11,[x0,#8*10] + stp d12,d13,[x0,#8*12] + stp d14,d15,[x0,#8*14] + stp d16,d17,[x0,#8*16] + stp d18,d19,[x0,#8*18] + stp d20,d21,[x0,#8*20] + stp d22,d23,[x0,#8*22] + str d24,[x0,#8*24] + add x0,x2,x3 // return value + + ldp d8,d9,[sp,#2*__SIZEOF_POINTER__+0] + ldp d10,d11,[sp,#2*__SIZEOF_POINTER__+16] + ldp d12,d13,[sp,#2*__SIZEOF_POINTER__+32] + ldp d14,d15,[sp,#2*__SIZEOF_POINTER__+48] + ldp x29,x30,[sp],#2*__SIZEOF_POINTER__+64 +.inst 0xd50323bf // autiasp + ret +.size SHA3_absorb_cext,.-SHA3_absorb_cext +.globl SHA3_squeeze_cext +.type SHA3_squeeze_cext,%function +.align 5 +SHA3_squeeze_cext: +.inst 0xd503233f // paciasp + stp x29,x30,[sp,#-2*__SIZEOF_POINTER__]! + add x29,sp,#0 + mov x9,x0 + mov x10,x3 + +.Loop_squeeze_ce: + ldr x4,[x9],#8 + cmp x2,#8 + blo .Lsqueeze_tail_ce +#ifdef __AARCH64EB__ + rev x4,x4 +#endif + str x4,[x1],#8 + beq .Lsqueeze_done_ce + + sub x2,x2,#8 + subs x10,x10,#8 + bhi .Loop_squeeze_ce + + bl KeccakF1600_cext + ldr x30,[sp,#__SIZEOF_POINTER__] + mov x9,x0 + mov x10,x3 + b .Loop_squeeze_ce + +.align 4 +.Lsqueeze_tail_ce: + strb w4,[x1],#1 + lsr x4,x4,#8 + subs x2,x2,#1 + beq .Lsqueeze_done_ce + strb w4,[x1],#1 + lsr x4,x4,#8 + subs x2,x2,#1 + beq .Lsqueeze_done_ce + strb w4,[x1],#1 + lsr x4,x4,#8 + subs x2,x2,#1 + beq .Lsqueeze_done_ce + strb w4,[x1],#1 + lsr x4,x4,#8 + subs x2,x2,#1 + beq .Lsqueeze_done_ce + strb w4,[x1],#1 + lsr x4,x4,#8 + subs x2,x2,#1 + beq .Lsqueeze_done_ce + strb w4,[x1],#1 + lsr x4,x4,#8 + subs x2,x2,#1 + beq .Lsqueeze_done_ce + strb w4,[x1],#1 + +.Lsqueeze_done_ce: + ldr x29,[sp],#2*__SIZEOF_POINTER__ +.inst 0xd50323bf // autiasp + ret +.size SHA3_squeeze_cext,.-SHA3_squeeze_cext +.byte 75,101,99,99,97,107,45,49,54,48,48,32,97,98,115,111,114,98,32,97,110,100,32,115,113,117,101,101,122,101,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,64,100,111,116,45,97,115,109,0 +.align 2 diff --git a/crates/common/crypto/keccak/keccak1600-armv8.s b/crates/common/crypto/keccak/keccak1600-armv8.s new file mode 100644 index 00000000000..934e6ac9c99 --- /dev/null +++ b/crates/common/crypto/keccak/keccak1600-armv8.s @@ -0,0 +1,841 @@ +.text + +.align 8 // strategic alignment and padding that allows to use + // address value as loop termination condition... +.quad 0,0,0,0,0,0,0,0 +.type iotas,%object +iotas: +.quad 0x0000000000000001 +.quad 0x0000000000008082 +.quad 0x800000000000808a +.quad 0x8000000080008000 +.quad 0x000000000000808b +.quad 0x0000000080000001 +.quad 0x8000000080008081 +.quad 0x8000000000008009 +.quad 0x000000000000008a +.quad 0x0000000000000088 +.quad 0x0000000080008009 +.quad 0x000000008000000a +.Liotas12: +.quad 0x000000008000808b +.quad 0x800000000000008b +.quad 0x8000000000008089 +.quad 0x8000000000008003 +.quad 0x8000000000008002 +.quad 0x8000000000000080 +.quad 0x000000000000800a +.quad 0x800000008000000a +.quad 0x8000000080008081 +.quad 0x8000000000008080 +.quad 0x0000000080000001 +.quad 0x8000000080008008 +.size iotas,.-iotas +.type KeccakF1600_int,%function +.align 5 +KeccakF1600_int: +.inst 0xd503233f // paciasp + stp x28,x30,[sp,#16] // stack is pre-allocated + b .Loop +.align 4 +.Loop: + ////////////////////////////////////////// Theta + eor x26,x0,x5 + stp x4,x9,[sp,#0] // offload pair... + eor x27,x1,x6 + eor x28,x2,x7 + eor x30,x3,x8 + eor x4,x4,x9 + eor x26,x26,x10 + eor x27,x27,x11 + eor x28,x28,x12 + eor x30,x30,x13 + eor x4,x4,x14 + eor x26,x26,x15 + eor x27,x27,x16 + eor x28,x28,x17 + eor x30,x30,x25 + eor x4,x4,x19 + eor x26,x26,x20 + eor x28,x28,x22 + eor x27,x27,x21 + eor x30,x30,x23 + eor x4,x4,x24 + + eor x9,x26,x28,ror#63 + + eor x1,x1,x9 + eor x6,x6,x9 + eor x11,x11,x9 + eor x16,x16,x9 + eor x21,x21,x9 + + eor x9,x27,x30,ror#63 + eor x28,x28,x4,ror#63 + eor x30,x30,x26,ror#63 + eor x4,x4,x27,ror#63 + + eor x27, x2,x9 // mov x27,x2 + eor x7,x7,x9 + eor x12,x12,x9 + eor x17,x17,x9 + eor x22,x22,x9 + + eor x0,x0,x4 + eor x5,x5,x4 + eor x10,x10,x4 + eor x15,x15,x4 + eor x20,x20,x4 + ldp x4,x9,[sp,#0] // re-load offloaded data + eor x26, x3,x28 // mov x26,x3 + eor x8,x8,x28 + eor x13,x13,x28 + eor x25,x25,x28 + eor x23,x23,x28 + + eor x28, x4,x30 // mov x28,x4 + eor x9,x9,x30 + eor x14,x14,x30 + eor x19,x19,x30 + eor x24,x24,x30 + + ////////////////////////////////////////// Rho+Pi + mov x30,x1 + ror x1,x6,#64-44 + //mov x27,x2 + ror x2,x12,#64-43 + //mov x26,x3 + ror x3,x25,#64-21 // ? + //mov x28,x4 + ror x4,x24,#64-14 // ? + + ror x6,x9,#64-20 // ? + ror x12,x13,#64-25 // ? + ror x25,x17,#64-15 + ror x24,x21,#64-2 // ? + + ror x9,x22,#64-61 + ror x13,x19,#64-8 + ror x17,x11,#64-10 + ror x21,x8,#64-55 + + ror x22,x14,#64-39 + ror x19,x23,#64-56 + ror x11,x7,#64-6 // ? + ror x8,x16,#64-45 + + ror x14,x20,#64-18 + ror x23,x15,#64-41 + ror x7,x10,#64-3 + ror x16,x5,#64-36 // ? + + ror x5,x26,#64-28 // ? + ror x10,x30,#64-1 + ror x15,x28,#64-27 // ? + ror x20,x27,#64-62 // ? + + ////////////////////////////////////////// Chi+Iota + bic x26,x2,x1 + bic x27,x3,x2 + bic x28,x0,x4 + bic x30,x1,x0 + eor x0,x0,x26 + bic x26,x4,x3 + eor x1,x1,x27 + ldr x27,[sp,#16] + eor x3,x3,x28 + eor x4,x4,x30 + eor x2,x2,x26 + ldr x30,[x27],#8 // Iota[i++] + + bic x26,x7,x6 + tst x27,#255 // are we done? + str x27,[sp,#16] + bic x27,x8,x7 + bic x28,x5,x9 + eor x0,x0,x30 // A[0][0] ^= Iota + bic x30,x6,x5 + eor x5,x5,x26 + bic x26,x9,x8 + eor x6,x6,x27 + eor x8,x8,x28 + eor x9,x9,x30 + eor x7,x7,x26 + + bic x26,x12,x11 + bic x27,x13,x12 + bic x28,x10,x14 + bic x30,x11,x10 + eor x10,x10,x26 + bic x26,x14,x13 + eor x11,x11,x27 + eor x13,x13,x28 + eor x14,x14,x30 + eor x12,x12,x26 + + bic x26,x17,x16 + bic x27,x25,x17 + bic x28,x15,x19 + bic x30,x16,x15 + eor x15,x15,x26 + bic x26,x19,x25 + eor x16,x16,x27 + eor x25,x25,x28 + eor x19,x19,x30 + eor x17,x17,x26 + + bic x26,x22,x21 + bic x27,x23,x22 + bic x28,x20,x24 + bic x30,x21,x20 + eor x20,x20,x26 + bic x26,x24,x23 + eor x21,x21,x27 + eor x23,x23,x28 + eor x24,x24,x30 + eor x22,x22,x26 + + bne .Loop + + ldr x30,[sp,#16+__SIZEOF_POINTER__] +.inst 0xd50323bf // autiasp + ret +.size KeccakF1600_int,.-KeccakF1600_int + +.type KeccakF1600,%function +.align 5 +KeccakF1600: +.inst 0xd503233f // paciasp + stp x29,x30,[sp,#-16*__SIZEOF_POINTER__]! + add x29,sp,#0 + stp x19,x20,[sp,#2*__SIZEOF_POINTER__] + stp x21,x22,[sp,#4*__SIZEOF_POINTER__] + stp x23,x24,[sp,#6*__SIZEOF_POINTER__] + stp x25,x26,[sp,#8*__SIZEOF_POINTER__] + stp x27,x28,[sp,#10*__SIZEOF_POINTER__] + sub sp,sp,#16+4*__SIZEOF_POINTER__ + + str x0,[sp,#16+2*__SIZEOF_POINTER__] // offload argument + mov x26,x0 + ldp x0,x1,[x0,#16*0] + ldp x2,x3,[x26,#16*1] + ldp x4,x5,[x26,#16*2] + ldp x6,x7,[x26,#16*3] + ldp x8,x9,[x26,#16*4] + ldp x10,x11,[x26,#16*5] + ldp x12,x13,[x26,#16*6] + ldp x14,x15,[x26,#16*7] + ldp x16,x17,[x26,#16*8] + ldp x25,x19,[x26,#16*9] + ldp x20,x21,[x26,#16*10] + ldp x22,x23,[x26,#16*11] + ldr x24,[x26,#16*12] + + adr x28,iotas + bl KeccakF1600_int + + ldr x26,[sp,#16+2*__SIZEOF_POINTER__] + stp x0,x1,[x26,#16*0] + stp x2,x3,[x26,#16*1] + stp x4,x5,[x26,#16*2] + stp x6,x7,[x26,#16*3] + stp x8,x9,[x26,#16*4] + stp x10,x11,[x26,#16*5] + stp x12,x13,[x26,#16*6] + stp x14,x15,[x26,#16*7] + stp x16,x17,[x26,#16*8] + stp x25,x19,[x26,#16*9] + stp x20,x21,[x26,#16*10] + stp x22,x23,[x26,#16*11] + str x24,[x26,#16*12] + + ldp x19,x20,[x29,#2*__SIZEOF_POINTER__] + add sp,sp,#16+4*__SIZEOF_POINTER__ + ldp x21,x22,[x29,#4*__SIZEOF_POINTER__] + ldp x23,x24,[x29,#6*__SIZEOF_POINTER__] + ldp x25,x26,[x29,#8*__SIZEOF_POINTER__] + ldp x27,x28,[x29,#10*__SIZEOF_POINTER__] + ldp x29,x30,[sp],#16*__SIZEOF_POINTER__ +.inst 0xd50323bf // autiasp + ret +.size KeccakF1600,.-KeccakF1600 + +.globl SHA3_absorb +.type SHA3_absorb,%function +.align 5 +SHA3_absorb: +.inst 0xd503233f // paciasp + stp x29,x30,[sp,#-16*__SIZEOF_POINTER__]! + add x29,sp,#0 + stp x19,x20,[sp,#2*__SIZEOF_POINTER__] + stp x21,x22,[sp,#4*__SIZEOF_POINTER__] + stp x23,x24,[sp,#6*__SIZEOF_POINTER__] + stp x25,x26,[sp,#8*__SIZEOF_POINTER__] + stp x27,x28,[sp,#10*__SIZEOF_POINTER__] + sub sp,sp,#16+4*__SIZEOF_POINTER__+16 + + stp x0,x1,[sp,#16+2*__SIZEOF_POINTER__] // offload arguments + stp x2,x3,[sp,#16+4*__SIZEOF_POINTER__] + + mov x26,x0 // uint64_t A[5][5] + mov x27,x1 // const void *inp + mov x28,x2 // size_t len + mov x30,x3 // size_t bsz + ldp x0,x1,[x26,#16*0] + ldp x2,x3,[x26,#16*1] + ldp x4,x5,[x26,#16*2] + ldp x6,x7,[x26,#16*3] + ldp x8,x9,[x26,#16*4] + ldp x10,x11,[x26,#16*5] + ldp x12,x13,[x26,#16*6] + ldp x14,x15,[x26,#16*7] + ldp x16,x17,[x26,#16*8] + ldp x25,x19,[x26,#16*9] + ldp x20,x21,[x26,#16*10] + ldp x22,x23,[x26,#16*11] + ldr x24,[x26,#16*12] + b .Loop_absorb + +.align 4 +.Loop_absorb: + subs x26,x28,x30 // len - bsz + blo .Labsorbed + + str x26,[sp,#16+4*__SIZEOF_POINTER__] // save len - bsz + cmp x30,#104 + ldr x26,[x27,#0] // A[0][0] ^= *inp++ +#ifdef __AARCH64EB__ + rev x26,x26 +#endif + eor x0,x0,x26 + ldr x26,[x27,#8] // A[0][1] ^= *inp++ +#ifdef __AARCH64EB__ + rev x26,x26 +#endif + eor x1,x1,x26 + ldr x26,[x27,#16] // A[0][2] ^= *inp++ +#ifdef __AARCH64EB__ + rev x26,x26 +#endif + eor x2,x2,x26 + ldr x26,[x27,#24] // A[0][3] ^= *inp++ +#ifdef __AARCH64EB__ + rev x26,x26 +#endif + eor x3,x3,x26 + ldr x26,[x27,#32] // A[0][4] ^= *inp++ +#ifdef __AARCH64EB__ + rev x26,x26 +#endif + eor x4,x4,x26 + ldr x26,[x27,#40] // A[1][0] ^= *inp++ +#ifdef __AARCH64EB__ + rev x26,x26 +#endif + eor x5,x5,x26 + ldr x26,[x27,#48] // A[1][1] ^= *inp++ +#ifdef __AARCH64EB__ + rev x26,x26 +#endif + eor x6,x6,x26 + ldr x26,[x27,#56] // A[1][2] ^= *inp++ +#ifdef __AARCH64EB__ + rev x26,x26 +#endif + eor x7,x7,x26 + ldr x26,[x27,#64] // A[1][3] ^= *inp++ +#ifdef __AARCH64EB__ + rev x26,x26 +#endif + eor x8,x8,x26 + blo .Lprocess_block + + ldr x26,[x27,#72] // A[1][4] ^= *inp++ +#ifdef __AARCH64EB__ + rev x26,x26 +#endif + eor x9,x9,x26 + ldr x26,[x27,#80] // A[2][0] ^= *inp++ +#ifdef __AARCH64EB__ + rev x26,x26 +#endif + eor x10,x10,x26 + ldr x26,[x27,#88] // A[2][1] ^= *inp++ +#ifdef __AARCH64EB__ + rev x26,x26 +#endif + eor x11,x11,x26 + ldr x26,[x27,#96] // A[2][2] ^= *inp++ +#ifdef __AARCH64EB__ + rev x26,x26 +#endif + eor x12,x12,x26 + beq .Lprocess_block + + cmp x30,#144 + ldr x26,[x27,#104] // A[2][3] ^= *inp++ +#ifdef __AARCH64EB__ + rev x26,x26 +#endif + eor x13,x13,x26 + ldr x26,[x27,#112] // A[2][4] ^= *inp++ +#ifdef __AARCH64EB__ + rev x26,x26 +#endif + eor x14,x14,x26 + ldr x26,[x27,#120] // A[3][0] ^= *inp++ +#ifdef __AARCH64EB__ + rev x26,x26 +#endif + eor x15,x15,x26 + ldr x26,[x27,#128] // A[3][1] ^= *inp++ +#ifdef __AARCH64EB__ + rev x26,x26 +#endif + eor x16,x16,x26 + blo .Lprocess_block + + ldr x26,[x27,#136] // A[3][2] ^= *inp++ +#ifdef __AARCH64EB__ + rev x26,x26 +#endif + eor x17,x17,x26 + beq .Lprocess_block + + ldr x26,[x27,#144] // A[3][3] ^= *inp++ +#ifdef __AARCH64EB__ + rev x26,x26 +#endif + eor x25,x25,x26 + ldr x26,[x27,#152] // A[3][4] ^= *inp++ +#ifdef __AARCH64EB__ + rev x26,x26 +#endif + eor x19,x19,x26 + ldr x26,[x27,#160] // A[4][0] ^= *inp++ +#ifdef __AARCH64EB__ + rev x26,x26 +#endif + eor x20,x20,x26 + +.Lprocess_block: + add x27,x27,x30 + str x27,[sp,#16+3*__SIZEOF_POINTER__] // save inp + + adr x28,iotas + bl KeccakF1600_int + + ldr x27,[sp,#16+3*__SIZEOF_POINTER__] // restore arguments + ldp x28,x30,[sp,#16+4*__SIZEOF_POINTER__] + b .Loop_absorb + +.align 4 +.Labsorbed: + ldr x27,[sp,#16+2*__SIZEOF_POINTER__] + stp x0,x1,[x27,#16*0] + stp x2,x3,[x27,#16*1] + stp x4,x5,[x27,#16*2] + stp x6,x7,[x27,#16*3] + stp x8,x9,[x27,#16*4] + stp x10,x11,[x27,#16*5] + stp x12,x13,[x27,#16*6] + stp x14,x15,[x27,#16*7] + stp x16,x17,[x27,#16*8] + stp x25,x19,[x27,#16*9] + stp x20,x21,[x27,#16*10] + stp x22,x23,[x27,#16*11] + str x24,[x27,#16*12] + + mov x0,x28 // return value + ldp x19,x20,[x29,#2*__SIZEOF_POINTER__] + add sp,sp,#16+4*__SIZEOF_POINTER__+16 + ldp x21,x22,[x29,#4*__SIZEOF_POINTER__] + ldp x23,x24,[x29,#6*__SIZEOF_POINTER__] + ldp x25,x26,[x29,#8*__SIZEOF_POINTER__] + ldp x27,x28,[x29,#10*__SIZEOF_POINTER__] + ldp x29,x30,[sp],#16*__SIZEOF_POINTER__ +.inst 0xd50323bf // autiasp + ret +.size SHA3_absorb,.-SHA3_absorb +.globl SHA3_squeeze +.type SHA3_squeeze,%function +.align 5 +SHA3_squeeze: +.inst 0xd503233f // paciasp + stp x29,x30,[sp,#-6*__SIZEOF_POINTER__]! + add x29,sp,#0 + stp x19,x20,[sp,#2*__SIZEOF_POINTER__] + stp x21,x22,[sp,#4*__SIZEOF_POINTER__] + + mov x19,x0 // put aside arguments + mov x20,x1 + mov x21,x2 + mov x22,x3 + +.Loop_squeeze: + ldr x4,[x0],#8 + cmp x21,#8 + blo .Lsqueeze_tail +#ifdef __AARCH64EB__ + rev x4,x4 +#endif + str x4,[x20],#8 + subs x21,x21,#8 + beq .Lsqueeze_done + + subs x3,x3,#8 + bhi .Loop_squeeze + + mov x0,x19 + bl KeccakF1600 + mov x0,x19 + mov x3,x22 + b .Loop_squeeze + +.align 4 +.Lsqueeze_tail: + strb w4,[x20],#1 + lsr x4,x4,#8 + subs x21,x21,#1 + beq .Lsqueeze_done + strb w4,[x20],#1 + lsr x4,x4,#8 + subs x21,x21,#1 + beq .Lsqueeze_done + strb w4,[x20],#1 + lsr x4,x4,#8 + subs x21,x21,#1 + beq .Lsqueeze_done + strb w4,[x20],#1 + lsr x4,x4,#8 + subs x21,x21,#1 + beq .Lsqueeze_done + strb w4,[x20],#1 + lsr x4,x4,#8 + subs x21,x21,#1 + beq .Lsqueeze_done + strb w4,[x20],#1 + lsr x4,x4,#8 + subs x21,x21,#1 + beq .Lsqueeze_done + strb w4,[x20],#1 + +.Lsqueeze_done: + ldp x19,x20,[sp,#2*__SIZEOF_POINTER__] + ldp x21,x22,[sp,#4*__SIZEOF_POINTER__] + ldp x29,x30,[sp],#6*__SIZEOF_POINTER__ +.inst 0xd50323bf // autiasp + ret +.size SHA3_squeeze,.-SHA3_squeeze +.type KeccakF1600_ce,%function +.align 5 +KeccakF1600_ce: +.Loop_ce: + ////////////////////////////////////////////////// Theta +.inst 0xce0f2a99 //eor3 v25.16b,v20.16b,v15.16b,v10.16b +.inst 0xce102eba //eor3 v26.16b,v21.16b,v16.16b,v11.16b +.inst 0xce1132db //eor3 v27.16b,v22.16b,v17.16b,v12.16b +.inst 0xce1236fc //eor3 v28.16b,v23.16b,v18.16b,v13.16b +.inst 0xce133b1d //eor3 v29.16b,v24.16b,v19.16b,v14.16b +.inst 0xce050339 //eor3 v25.16b,v25.16b, v5.16b,v0.16b +.inst 0xce06075a //eor3 v26.16b,v26.16b, v6.16b,v1.16b +.inst 0xce070b7b //eor3 v27.16b,v27.16b, v7.16b,v2.16b +.inst 0xce080f9c //eor3 v28.16b,v28.16b, v8.16b,v3.16b +.inst 0xce0913bd //eor3 v29.16b,v29.16b, v9.16b,v4.16b + +.inst 0xce7b8f3e //rax1 v30.2d,v25.2d,v27.2d // D[1] +.inst 0xce7c8f5f //rax1 v31.2d,v26.2d,v28.2d // D[2] +.inst 0xce7d8f7b //rax1 v27.2d,v27.2d,v29.2d // D[3] +.inst 0xce798f9c //rax1 v28.2d,v28.2d,v25.2d // D[4] +.inst 0xce7a8fbd //rax1 v29.2d,v29.2d,v26.2d // D[0] + + ////////////////////////////////////////////////// Theta+Rho+Pi +.inst 0xce9efc39 //xar v25.2d, v1.2d,v30.2d,#64-1 // C[0]=A[2][0] + +.inst 0xce9e50c1 //xar v1.2d,v6.2d,v30.2d,#64-44 +.inst 0xce9cb126 //xar v6.2d,v9.2d,v28.2d,#64-20 +.inst 0xce9f0ec9 //xar v9.2d,v22.2d,v31.2d,#64-61 +.inst 0xce9c65d6 //xar v22.2d,v14.2d,v28.2d,#64-39 +.inst 0xce9dba8e //xar v14.2d,v20.2d,v29.2d,#64-18 + +.inst 0xce9f085a //xar v26.2d, v2.2d,v31.2d,#64-62 // C[1]=A[4][0] + +.inst 0xce9f5582 //xar v2.2d,v12.2d,v31.2d,#64-43 +.inst 0xce9b9dac //xar v12.2d,v13.2d,v27.2d,#64-25 +.inst 0xce9ce26d //xar v13.2d,v19.2d,v28.2d,#64-8 +.inst 0xce9b22f3 //xar v19.2d,v23.2d,v27.2d,#64-56 +.inst 0xce9d5df7 //xar v23.2d,v15.2d,v29.2d,#64-41 + +.inst 0xce9c948f //xar v15.2d,v4.2d,v28.2d,#64-27 + +.inst 0xce9ccb1c //xar v28.2d, v24.2d,v28.2d,#64-14 // D[4]=A[0][4] +.inst 0xce9efab8 //xar v24.2d,v21.2d,v30.2d,#64-2 +.inst 0xce9b2508 //xar v8.2d,v8.2d,v27.2d,#64-55 // A[1][3]=A[4][1] +.inst 0xce9e4e04 //xar v4.2d,v16.2d,v30.2d,#64-45 // A[0][4]=A[1][3] +.inst 0xce9d70b0 //xar v16.2d,v5.2d,v29.2d,#64-36 + +.inst 0xce9b9065 //xar v5.2d,v3.2d,v27.2d,#64-28 + + eor v0.16b,v0.16b,v29.16b + +.inst 0xce9bae5b //xar v27.2d, v18.2d,v27.2d,#64-21 // D[3]=A[0][3] +.inst 0xce9fc623 //xar v3.2d,v17.2d,v31.2d,#64-15 // A[0][3]=A[3][3] +.inst 0xce9ed97e //xar v30.2d, v11.2d,v30.2d,#64-10 // D[1]=A[3][2] +.inst 0xce9fe8ff //xar v31.2d, v7.2d,v31.2d,#64-6 // D[2]=A[2][1] +.inst 0xce9df55d //xar v29.2d, v10.2d,v29.2d,#64-3 // D[0]=A[1][2] + + ////////////////////////////////////////////////// Chi+Iota +.inst 0xce362354 //bcax v20.16b,v26.16b, v22.16b,v8.16b // A[1][3]=A[4][1] +.inst 0xce375915 //bcax v21.16b,v8.16b,v23.16b,v22.16b // A[1][3]=A[4][1] +.inst 0xce385ed6 //bcax v22.16b,v22.16b,v24.16b,v23.16b +.inst 0xce3a62f7 //bcax v23.16b,v23.16b,v26.16b, v24.16b +.inst 0xce286b18 //bcax v24.16b,v24.16b,v8.16b,v26.16b // A[1][3]=A[4][1] + + ld1r {v26.2d},[x10],#8 + +.inst 0xce330fd1 //bcax v17.16b,v30.16b, v19.16b,v3.16b // A[0][3]=A[3][3] +.inst 0xce2f4c72 //bcax v18.16b,v3.16b,v15.16b,v19.16b // A[0][3]=A[3][3] +.inst 0xce303e73 //bcax v19.16b,v19.16b,v16.16b,v15.16b +.inst 0xce3e41ef //bcax v15.16b,v15.16b,v30.16b, v16.16b +.inst 0xce237a10 //bcax v16.16b,v16.16b,v3.16b,v30.16b // A[0][3]=A[3][3] + +.inst 0xce2c7f2a //bcax v10.16b,v25.16b, v12.16b,v31.16b +.inst 0xce2d33eb //bcax v11.16b,v31.16b, v13.16b,v12.16b +.inst 0xce2e358c //bcax v12.16b,v12.16b,v14.16b,v13.16b +.inst 0xce3939ad //bcax v13.16b,v13.16b,v25.16b, v14.16b +.inst 0xce3f65ce //bcax v14.16b,v14.16b,v31.16b, v25.16b + +.inst 0xce2913a7 //bcax v7.16b,v29.16b, v9.16b,v4.16b // A[0][4]=A[1][3] +.inst 0xce252488 //bcax v8.16b,v4.16b,v5.16b,v9.16b // A[0][4]=A[1][3] +.inst 0xce261529 //bcax v9.16b,v9.16b,v6.16b,v5.16b +.inst 0xce3d18a5 //bcax v5.16b,v5.16b,v29.16b, v6.16b +.inst 0xce2474c6 //bcax v6.16b,v6.16b,v4.16b,v29.16b // A[0][4]=A[1][3] + +.inst 0xce207363 //bcax v3.16b,v27.16b, v0.16b,v28.16b +.inst 0xce210384 //bcax v4.16b,v28.16b, v1.16b,v0.16b +.inst 0xce220400 //bcax v0.16b,v0.16b,v2.16b,v1.16b +.inst 0xce3b0821 //bcax v1.16b,v1.16b,v27.16b, v2.16b +.inst 0xce3c6c42 //bcax v2.16b,v2.16b,v28.16b, v27.16b + + eor v0.16b,v0.16b,v26.16b + + tst x10,#255 + bne .Loop_ce + + ret +.size KeccakF1600_ce,.-KeccakF1600_ce + +.type KeccakF1600_cext,%function +.align 5 +KeccakF1600_cext: +.inst 0xd503233f // paciasp + stp x29,x30,[sp,#-2*__SIZEOF_POINTER__-64]! + add x29,sp,#0 + stp d8,d9,[sp,#2*__SIZEOF_POINTER__+0] // per ABI requirement + stp d10,d11,[sp,#2*__SIZEOF_POINTER__+16] + stp d12,d13,[sp,#2*__SIZEOF_POINTER__+32] + stp d14,d15,[sp,#2*__SIZEOF_POINTER__+48] + ldp d0,d1,[x0,#8*0] + ldp d2,d3,[x0,#8*2] + ldp d4,d5,[x0,#8*4] + ldp d6,d7,[x0,#8*6] + ldp d8,d9,[x0,#8*8] + ldp d10,d11,[x0,#8*10] + ldp d12,d13,[x0,#8*12] + ldp d14,d15,[x0,#8*14] + ldp d16,d17,[x0,#8*16] + ldp d18,d19,[x0,#8*18] + ldp d20,d21,[x0,#8*20] + ldp d22,d23,[x0,#8*22] + ldr d24,[x0,#8*24] + adr x10,iotas + bl KeccakF1600_ce + ldr x30,[sp,#__SIZEOF_POINTER__] + stp d0,d1,[x0,#8*0] + stp d2,d3,[x0,#8*2] + stp d4,d5,[x0,#8*4] + stp d6,d7,[x0,#8*6] + stp d8,d9,[x0,#8*8] + stp d10,d11,[x0,#8*10] + stp d12,d13,[x0,#8*12] + stp d14,d15,[x0,#8*14] + stp d16,d17,[x0,#8*16] + stp d18,d19,[x0,#8*18] + stp d20,d21,[x0,#8*20] + stp d22,d23,[x0,#8*22] + str d24,[x0,#8*24] + + ldp d8,d9,[sp,#2*__SIZEOF_POINTER__+0] + ldp d10,d11,[sp,#2*__SIZEOF_POINTER__+16] + ldp d12,d13,[sp,#2*__SIZEOF_POINTER__+32] + ldp d14,d15,[sp,#2*__SIZEOF_POINTER__+48] + ldr x29,[sp],#2*__SIZEOF_POINTER__+64 +.inst 0xd50323bf // autiasp + ret +.size KeccakF1600_cext,.-KeccakF1600_cext +.globl SHA3_absorb_cext +.type SHA3_absorb_cext,%function +.align 5 +SHA3_absorb_cext: +.inst 0xd503233f // paciasp + stp x29,x30,[sp,#-2*__SIZEOF_POINTER__-64]! + add x29,sp,#0 + stp d8,d9,[sp,#2*__SIZEOF_POINTER__+0] // per ABI requirement + stp d10,d11,[sp,#2*__SIZEOF_POINTER__+16] + stp d12,d13,[sp,#2*__SIZEOF_POINTER__+32] + stp d14,d15,[sp,#2*__SIZEOF_POINTER__+48] + ldp d0,d1,[x0,#8*0] + ldp d2,d3,[x0,#8*2] + ldp d4,d5,[x0,#8*4] + ldp d6,d7,[x0,#8*6] + ldp d8,d9,[x0,#8*8] + ldp d10,d11,[x0,#8*10] + ldp d12,d13,[x0,#8*12] + ldp d14,d15,[x0,#8*14] + ldp d16,d17,[x0,#8*16] + ldp d18,d19,[x0,#8*18] + ldp d20,d21,[x0,#8*20] + ldp d22,d23,[x0,#8*22] + ldr d24,[x0,#8*24] + b .Loop_absorb_ce + +.align 4 +.Loop_absorb_ce: + subs x2,x2,x3 // len - bsz + blo .Labsorbed_ce + + cmp x3,#104 + ld1 {v27.8b,v28.8b,v29.8b,v30.8b},[x1],#32 + eor v0.16b,v0.16b,v27.16b + eor v1.16b,v1.16b,v28.16b + eor v2.16b,v2.16b,v29.16b + eor v3.16b,v3.16b,v30.16b + ld1 {v27.8b,v28.8b,v29.8b,v30.8b},[x1],#32 + eor v4.16b,v4.16b,v27.16b + eor v5.16b,v5.16b,v28.16b + eor v6.16b,v6.16b,v29.16b + eor v7.16b,v7.16b,v30.16b + ld1 {v31.8b},[x1],#8 // A[1][4] ^= *inp++ + eor v8.16b,v8.16b,v31.16b + blo .Lprocess_block_ce + + ld1 {v27.8b,v28.8b,v29.8b,v30.8b},[x1],#32 + eor v9.16b,v9.16b,v27.16b + eor v10.16b,v10.16b,v28.16b + eor v11.16b,v11.16b,v29.16b + eor v12.16b,v12.16b,v30.16b + beq .Lprocess_block_ce + + cmp x3,#144 + ld1 {v27.8b,v28.8b,v29.8b,v30.8b},[x1],#32 + eor v13.16b,v13.16b,v27.16b + eor v14.16b,v14.16b,v28.16b + eor v15.16b,v15.16b,v29.16b + eor v16.16b,v16.16b,v30.16b + blo .Lprocess_block_ce + + ld1 {v31.8b},[x1],#8 // A[3][3] ^= *inp++ + eor v17.16b,v17.16b,v31.16b + beq .Lprocess_block_ce + + ld1 {v28.8b,v29.8b,v30.8b},[x1],#24 + eor v18.16b,v18.16b,v28.16b + eor v19.16b,v19.16b,v29.16b + eor v20.16b,v20.16b,v30.16b + +.Lprocess_block_ce: + adr x10,iotas + bl KeccakF1600_ce + + b .Loop_absorb_ce + +.align 4 +.Labsorbed_ce: + stp d0,d1,[x0,#8*0] + stp d2,d3,[x0,#8*2] + stp d4,d5,[x0,#8*4] + stp d6,d7,[x0,#8*6] + stp d8,d9,[x0,#8*8] + stp d10,d11,[x0,#8*10] + stp d12,d13,[x0,#8*12] + stp d14,d15,[x0,#8*14] + stp d16,d17,[x0,#8*16] + stp d18,d19,[x0,#8*18] + stp d20,d21,[x0,#8*20] + stp d22,d23,[x0,#8*22] + str d24,[x0,#8*24] + add x0,x2,x3 // return value + + ldp d8,d9,[sp,#2*__SIZEOF_POINTER__+0] + ldp d10,d11,[sp,#2*__SIZEOF_POINTER__+16] + ldp d12,d13,[sp,#2*__SIZEOF_POINTER__+32] + ldp d14,d15,[sp,#2*__SIZEOF_POINTER__+48] + ldp x29,x30,[sp],#2*__SIZEOF_POINTER__+64 +.inst 0xd50323bf // autiasp + ret +.size SHA3_absorb_cext,.-SHA3_absorb_cext +.globl SHA3_squeeze_cext +.type SHA3_squeeze_cext,%function +.align 5 +SHA3_squeeze_cext: +.inst 0xd503233f // paciasp + stp x29,x30,[sp,#-2*__SIZEOF_POINTER__]! + add x29,sp,#0 + mov x9,x0 + mov x10,x3 + +.Loop_squeeze_ce: + ldr x4,[x9],#8 + cmp x2,#8 + blo .Lsqueeze_tail_ce +#ifdef __AARCH64EB__ + rev x4,x4 +#endif + str x4,[x1],#8 + beq .Lsqueeze_done_ce + + sub x2,x2,#8 + subs x10,x10,#8 + bhi .Loop_squeeze_ce + + bl KeccakF1600_cext + ldr x30,[sp,#__SIZEOF_POINTER__] + mov x9,x0 + mov x10,x3 + b .Loop_squeeze_ce + +.align 4 +.Lsqueeze_tail_ce: + strb w4,[x1],#1 + lsr x4,x4,#8 + subs x2,x2,#1 + beq .Lsqueeze_done_ce + strb w4,[x1],#1 + lsr x4,x4,#8 + subs x2,x2,#1 + beq .Lsqueeze_done_ce + strb w4,[x1],#1 + lsr x4,x4,#8 + subs x2,x2,#1 + beq .Lsqueeze_done_ce + strb w4,[x1],#1 + lsr x4,x4,#8 + subs x2,x2,#1 + beq .Lsqueeze_done_ce + strb w4,[x1],#1 + lsr x4,x4,#8 + subs x2,x2,#1 + beq .Lsqueeze_done_ce + strb w4,[x1],#1 + lsr x4,x4,#8 + subs x2,x2,#1 + beq .Lsqueeze_done_ce + strb w4,[x1],#1 + +.Lsqueeze_done_ce: + ldr x29,[sp],#2*__SIZEOF_POINTER__ +.inst 0xd50323bf // autiasp + ret +.size SHA3_squeeze_cext,.-SHA3_squeeze_cext +.byte 75,101,99,99,97,107,45,49,54,48,48,32,97,98,115,111,114,98,32,97,110,100,32,115,113,117,101,101,122,101,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,64,100,111,116,45,97,115,109,0 +.align 2 diff --git a/crates/common/crypto/keccak/keccak1600-x86_64.s b/crates/common/crypto/keccak/keccak1600-x86_64.s new file mode 100644 index 00000000000..d7652991330 --- /dev/null +++ b/crates/common/crypto/keccak/keccak1600-x86_64.s @@ -0,0 +1,536 @@ +.text + +.type __KeccakF1600,@function +.align 32 +__KeccakF1600: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq 60(%rdi),%rax + movq 68(%rdi),%rbx + movq 76(%rdi),%rcx + movq 84(%rdi),%rdx + movq 92(%rdi),%rbp + jmp .Loop + +.align 32 +.Loop: + movq -100(%rdi),%r8 + movq -52(%rdi),%r9 + movq -4(%rdi),%r10 + movq 44(%rdi),%r11 + + xorq -84(%rdi),%rcx + xorq -76(%rdi),%rdx + xorq %r8,%rax + xorq -92(%rdi),%rbx + xorq -44(%rdi),%rcx + xorq -60(%rdi),%rax + movq %rbp,%r12 + xorq -68(%rdi),%rbp + + xorq %r10,%rcx + xorq -20(%rdi),%rax + xorq -36(%rdi),%rdx + xorq %r9,%rbx + xorq -28(%rdi),%rbp + + xorq 36(%rdi),%rcx + xorq 20(%rdi),%rax + xorq 4(%rdi),%rdx + xorq -12(%rdi),%rbx + xorq 12(%rdi),%rbp + + movq %rcx,%r13 + rolq $1,%rcx + xorq %rax,%rcx + xorq %r11,%rdx + + rolq $1,%rax + xorq %rdx,%rax + xorq 28(%rdi),%rbx + + rolq $1,%rdx + xorq %rbx,%rdx + xorq 52(%rdi),%rbp + + rolq $1,%rbx + xorq %rbp,%rbx + + rolq $1,%rbp + xorq %r13,%rbp + xorq %rcx,%r9 + xorq %rdx,%r10 + rolq $44,%r9 + xorq %rbp,%r11 + xorq %rax,%r12 + rolq $43,%r10 + xorq %rbx,%r8 + movq %r9,%r13 + rolq $21,%r11 + orq %r10,%r9 + xorq %r8,%r9 + rolq $14,%r12 + + xorq (%r15),%r9 + leaq 8(%r15),%r15 + + movq %r12,%r14 + andq %r11,%r12 + movq %r9,-100(%rsi) + xorq %r10,%r12 + notq %r10 + movq %r12,-84(%rsi) + + orq %r11,%r10 + movq 76(%rdi),%r12 + xorq %r13,%r10 + movq %r10,-92(%rsi) + + andq %r8,%r13 + movq -28(%rdi),%r9 + xorq %r14,%r13 + movq -20(%rdi),%r10 + movq %r13,-68(%rsi) + + orq %r8,%r14 + movq -76(%rdi),%r8 + xorq %r11,%r14 + movq 28(%rdi),%r11 + movq %r14,-76(%rsi) + + + xorq %rbp,%r8 + xorq %rdx,%r12 + rolq $28,%r8 + xorq %rcx,%r11 + xorq %rax,%r9 + rolq $61,%r12 + rolq $45,%r11 + xorq %rbx,%r10 + rolq $20,%r9 + movq %r8,%r13 + orq %r12,%r8 + rolq $3,%r10 + + xorq %r11,%r8 + movq %r8,-36(%rsi) + + movq %r9,%r14 + andq %r13,%r9 + movq -92(%rdi),%r8 + xorq %r12,%r9 + notq %r12 + movq %r9,-28(%rsi) + + orq %r11,%r12 + movq -44(%rdi),%r9 + xorq %r10,%r12 + movq %r12,-44(%rsi) + + andq %r10,%r11 + movq 60(%rdi),%r12 + xorq %r14,%r11 + movq %r11,-52(%rsi) + + orq %r10,%r14 + movq 4(%rdi),%r10 + xorq %r13,%r14 + movq 52(%rdi),%r11 + movq %r14,-60(%rsi) + + + xorq %rbp,%r10 + xorq %rax,%r11 + rolq $25,%r10 + xorq %rdx,%r9 + rolq $8,%r11 + xorq %rbx,%r12 + rolq $6,%r9 + xorq %rcx,%r8 + rolq $18,%r12 + movq %r10,%r13 + andq %r11,%r10 + rolq $1,%r8 + + notq %r11 + xorq %r9,%r10 + movq %r10,-12(%rsi) + + movq %r12,%r14 + andq %r11,%r12 + movq -12(%rdi),%r10 + xorq %r13,%r12 + movq %r12,-4(%rsi) + + orq %r9,%r13 + movq 84(%rdi),%r12 + xorq %r8,%r13 + movq %r13,-20(%rsi) + + andq %r8,%r9 + xorq %r14,%r9 + movq %r9,12(%rsi) + + orq %r8,%r14 + movq -60(%rdi),%r9 + xorq %r11,%r14 + movq 36(%rdi),%r11 + movq %r14,4(%rsi) + + + movq -68(%rdi),%r8 + + xorq %rcx,%r10 + xorq %rdx,%r11 + rolq $10,%r10 + xorq %rbx,%r9 + rolq $15,%r11 + xorq %rbp,%r12 + rolq $36,%r9 + xorq %rax,%r8 + rolq $56,%r12 + movq %r10,%r13 + orq %r11,%r10 + rolq $27,%r8 + + notq %r11 + xorq %r9,%r10 + movq %r10,28(%rsi) + + movq %r12,%r14 + orq %r11,%r12 + xorq %r13,%r12 + movq %r12,36(%rsi) + + andq %r9,%r13 + xorq %r8,%r13 + movq %r13,20(%rsi) + + orq %r8,%r9 + xorq %r14,%r9 + movq %r9,52(%rsi) + + andq %r14,%r8 + xorq %r11,%r8 + movq %r8,44(%rsi) + + + xorq -84(%rdi),%rdx + xorq -36(%rdi),%rbp + rolq $62,%rdx + xorq 68(%rdi),%rcx + rolq $55,%rbp + xorq 12(%rdi),%rax + rolq $2,%rcx + xorq 20(%rdi),%rbx + xchgq %rsi,%rdi + rolq $39,%rax + rolq $41,%rbx + movq %rdx,%r13 + andq %rbp,%rdx + notq %rbp + xorq %rcx,%rdx + movq %rdx,92(%rdi) + + movq %rax,%r14 + andq %rbp,%rax + xorq %r13,%rax + movq %rax,60(%rdi) + + orq %rcx,%r13 + xorq %rbx,%r13 + movq %r13,84(%rdi) + + andq %rbx,%rcx + xorq %r14,%rcx + movq %rcx,76(%rdi) + + orq %r14,%rbx + xorq %rbp,%rbx + movq %rbx,68(%rdi) + + movq %rdx,%rbp + movq %r13,%rdx + + testq $255,%r15 + jnz .Loop + + leaq -192(%r15),%r15 + .byte 0xf3,0xc3 +.cfi_endproc +.size __KeccakF1600,.-__KeccakF1600 + +.globl KeccakF1600 +.type KeccakF1600,@function +.align 32 +KeccakF1600: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + + leaq 100(%rdi),%rdi + subq $200,%rsp +.cfi_adjust_cfa_offset 200 + + + notq -92(%rdi) + notq -84(%rdi) + notq -36(%rdi) + notq -4(%rdi) + notq 36(%rdi) + notq 60(%rdi) + + leaq iotas(%rip),%r15 + leaq 100(%rsp),%rsi + + call __KeccakF1600 + + notq -92(%rdi) + notq -84(%rdi) + notq -36(%rdi) + notq -4(%rdi) + notq 36(%rdi) + notq 60(%rdi) + leaq -100(%rdi),%rdi + + leaq 248(%rsp),%r11 +.cfi_def_cfa %r11,8 + movq -48(%r11),%r15 + movq -40(%r11),%r14 + movq -32(%r11),%r13 + movq -24(%r11),%r12 + movq -16(%r11),%rbp + movq -8(%r11),%rbx + leaq (%r11),%rsp +.cfi_restore %r12 +.cfi_restore %r13 +.cfi_restore %r14 +.cfi_restore %r15 +.cfi_restore %rbp +.cfi_restore %rbx + .byte 0xf3,0xc3 +.cfi_endproc +.size KeccakF1600,.-KeccakF1600 +.globl SHA3_absorb +.type SHA3_absorb,@function +.align 32 +SHA3_absorb: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + + leaq 100(%rdi),%rdi + subq $232,%rsp +.cfi_adjust_cfa_offset 232 + + + movq %rsi,%r9 + leaq 100(%rsp),%rsi + + notq -92(%rdi) + notq -84(%rdi) + notq -36(%rdi) + notq -4(%rdi) + notq 36(%rdi) + notq 60(%rdi) + leaq iotas(%rip),%r15 + + movq %rcx,216-100(%rsi) + +.Loop_absorb: + cmpq %rcx,%rdx + jc .Ldone_absorb + + shrq $3,%rcx + leaq -100(%rdi),%r8 + +.Lblock_absorb: + movq (%r9),%rax + leaq 8(%r9),%r9 + xorq (%r8),%rax + leaq 8(%r8),%r8 + subq $8,%rdx + movq %rax,-8(%r8) + subq $1,%rcx + jnz .Lblock_absorb + + movq %r9,200-100(%rsi) + movq %rdx,208-100(%rsi) + call __KeccakF1600 + movq 200-100(%rsi),%r9 + movq 208-100(%rsi),%rdx + movq 216-100(%rsi),%rcx + jmp .Loop_absorb + +.align 32 +.Ldone_absorb: + movq %rdx,%rax + + notq -92(%rdi) + notq -84(%rdi) + notq -36(%rdi) + notq -4(%rdi) + notq 36(%rdi) + notq 60(%rdi) + + leaq 280(%rsp),%r11 +.cfi_def_cfa %r11,8 + movq -48(%r11),%r15 + movq -40(%r11),%r14 + movq -32(%r11),%r13 + movq -24(%r11),%r12 + movq -16(%r11),%rbp + movq -8(%r11),%rbx + leaq (%r11),%rsp +.cfi_restore %r12 +.cfi_restore %r13 +.cfi_restore %r14 +.cfi_restore %r15 +.cfi_restore %rbp +.cfi_restore %rbx + .byte 0xf3,0xc3 +.cfi_endproc +.size SHA3_absorb,.-SHA3_absorb +.globl SHA3_squeeze +.type SHA3_squeeze,@function +.align 32 +SHA3_squeeze: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-16 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-24 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-32 + subq $32,%rsp +.cfi_adjust_cfa_offset 32 + + + shrq $3,%rcx + movq %rdi,%r8 + movq %rsi,%r12 + movq %rdx,%r13 + movq %rcx,%r14 + jmp .Loop_squeeze + +.align 32 +.Loop_squeeze: + cmpq $8,%r13 + jb .Ltail_squeeze + + movq (%r8),%rax + leaq 8(%r8),%r8 + movq %rax,(%r12) + leaq 8(%r12),%r12 + subq $8,%r13 + jz .Ldone_squeeze + + subq $1,%rcx + jnz .Loop_squeeze + + movq %rdi,%rcx + call KeccakF1600 + movq %rdi,%r8 + movq %r14,%rcx + jmp .Loop_squeeze + +.Ltail_squeeze: + movq %r8,%rsi + movq %r12,%rdi + movq %r13,%rcx +.byte 0xf3,0xa4 + +.Ldone_squeeze: + movq 32(%rsp),%r14 + movq 40(%rsp),%r13 + movq 48(%rsp),%r12 + addq $56,%rsp +.cfi_adjust_cfa_offset -56 +.cfi_restore %r12 +.cfi_restore %r13 +.cfi_restore %r14 + .byte 0xf3,0xc3 +.cfi_endproc +.size SHA3_squeeze,.-SHA3_squeeze +.align 256 +.quad 0,0,0,0,0,0,0,0 +.type iotas,@object +iotas: +.quad 0x0000000000000001 +.quad 0x0000000000008082 +.quad 0x800000000000808a +.quad 0x8000000080008000 +.quad 0x000000000000808b +.quad 0x0000000080000001 +.quad 0x8000000080008081 +.quad 0x8000000000008009 +.quad 0x000000000000008a +.quad 0x0000000000000088 +.quad 0x0000000080008009 +.quad 0x000000008000000a +.quad 0x000000008000808b +.quad 0x800000000000008b +.quad 0x8000000000008089 +.quad 0x8000000000008003 +.quad 0x8000000000008002 +.quad 0x8000000000000080 +.quad 0x000000000000800a +.quad 0x800000008000000a +.quad 0x8000000080008081 +.quad 0x8000000000008080 +.quad 0x0000000080000001 +.quad 0x8000000080008008 +.size iotas,.-iotas +.byte 75,101,99,99,97,107,45,49,54,48,48,32,97,98,115,111,114,98,32,97,110,100,32,115,113,117,101,101,122,101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 + +.section .note.gnu.property,"a",@note + .long 4,2f-1f,5 + .byte 0x47,0x4E,0x55,0 +1: .long 0xc0000002,4,3 +.align 8 +2: diff --git a/crates/common/crypto/keccak/mod.rs b/crates/common/crypto/keccak/mod.rs new file mode 100644 index 00000000000..fd7fa5acda9 --- /dev/null +++ b/crates/common/crypto/keccak/mod.rs @@ -0,0 +1,848 @@ +#[unsafe(naked)] +extern "C" fn keccak1600() { + core::arch::naked_asm!( + r#" + .text + + .align 8 // strategic alignment and padding that allows to use + // address value as loop termination condition... + .quad 0,0,0,0,0,0,0,0 + .type iotas,%object + iotas: + .quad 0x0000000000000001 + .quad 0x0000000000008082 + .quad 0x800000000000808a + .quad 0x8000000080008000 + .quad 0x000000000000808b + .quad 0x0000000080000001 + .quad 0x8000000080008081 + .quad 0x8000000000008009 + .quad 0x000000000000008a + .quad 0x0000000000000088 + .quad 0x0000000080008009 + .quad 0x000000008000000a + .Liotas12: + .quad 0x000000008000808b + .quad 0x800000000000008b + .quad 0x8000000000008089 + .quad 0x8000000000008003 + .quad 0x8000000000008002 + .quad 0x8000000000000080 + .quad 0x000000000000800a + .quad 0x800000008000000a + .quad 0x8000000080008081 + .quad 0x8000000000008080 + .quad 0x0000000080000001 + .quad 0x8000000080008008 + .size iotas,.-iotas + .type KeccakF1600_int,%function + .align 5 + KeccakF1600_int: + .inst 0xd503233f // paciasp + stp x28,x30,[sp,#16] // stack is pre-allocated + b .Loop + .align 4 + .Loop: + ////////////////////////////////////////// Theta + eor x26,x0,x5 + stp x4,x9,[sp,#0] // offload pair... + eor x27,x1,x6 + eor x28,x2,x7 + eor x30,x3,x8 + eor x4,x4,x9 + eor x26,x26,x10 + eor x27,x27,x11 + eor x28,x28,x12 + eor x30,x30,x13 + eor x4,x4,x14 + eor x26,x26,x15 + eor x27,x27,x16 + eor x28,x28,x17 + eor x30,x30,x25 + eor x4,x4,x19 + eor x26,x26,x20 + eor x28,x28,x22 + eor x27,x27,x21 + eor x30,x30,x23 + eor x4,x4,x24 + + eor x9,x26,x28,ror#63 + + eor x1,x1,x9 + eor x6,x6,x9 + eor x11,x11,x9 + eor x16,x16,x9 + eor x21,x21,x9 + + eor x9,x27,x30,ror#63 + eor x28,x28,x4,ror#63 + eor x30,x30,x26,ror#63 + eor x4,x4,x27,ror#63 + + eor x27, x2,x9 // mov x27,x2 + eor x7,x7,x9 + eor x12,x12,x9 + eor x17,x17,x9 + eor x22,x22,x9 + + eor x0,x0,x4 + eor x5,x5,x4 + eor x10,x10,x4 + eor x15,x15,x4 + eor x20,x20,x4 + ldp x4,x9,[sp,#0] // re-load offloaded data + eor x26, x3,x28 // mov x26,x3 + eor x8,x8,x28 + eor x13,x13,x28 + eor x25,x25,x28 + eor x23,x23,x28 + + eor x28, x4,x30 // mov x28,x4 + eor x9,x9,x30 + eor x14,x14,x30 + eor x19,x19,x30 + eor x24,x24,x30 + + ////////////////////////////////////////// Rho+Pi + mov x30,x1 + ror x1,x6,#64-44 + //mov x27,x2 + ror x2,x12,#64-43 + //mov x26,x3 + ror x3,x25,#64-21 // ? + //mov x28,x4 + ror x4,x24,#64-14 // ? + + ror x6,x9,#64-20 // ? + ror x12,x13,#64-25 // ? + ror x25,x17,#64-15 + ror x24,x21,#64-2 // ? + + ror x9,x22,#64-61 + ror x13,x19,#64-8 + ror x17,x11,#64-10 + ror x21,x8,#64-55 + + ror x22,x14,#64-39 + ror x19,x23,#64-56 + ror x11,x7,#64-6 // ? + ror x8,x16,#64-45 + + ror x14,x20,#64-18 + ror x23,x15,#64-41 + ror x7,x10,#64-3 + ror x16,x5,#64-36 // ? + + ror x5,x26,#64-28 // ? + ror x10,x30,#64-1 + ror x15,x28,#64-27 // ? + ror x20,x27,#64-62 // ? + + ////////////////////////////////////////// Chi+Iota + bic x26,x2,x1 + bic x27,x3,x2 + bic x28,x0,x4 + bic x30,x1,x0 + eor x0,x0,x26 + bic x26,x4,x3 + eor x1,x1,x27 + ldr x27,[sp,#16] + eor x3,x3,x28 + eor x4,x4,x30 + eor x2,x2,x26 + ldr x30,[x27],#8 // Iota[i++] + + bic x26,x7,x6 + tst x27,#255 // are we done? + str x27,[sp,#16] + bic x27,x8,x7 + bic x28,x5,x9 + eor x0,x0,x30 // A[0][0] ^= Iota + bic x30,x6,x5 + eor x5,x5,x26 + bic x26,x9,x8 + eor x6,x6,x27 + eor x8,x8,x28 + eor x9,x9,x30 + eor x7,x7,x26 + + bic x26,x12,x11 + bic x27,x13,x12 + bic x28,x10,x14 + bic x30,x11,x10 + eor x10,x10,x26 + bic x26,x14,x13 + eor x11,x11,x27 + eor x13,x13,x28 + eor x14,x14,x30 + eor x12,x12,x26 + + bic x26,x17,x16 + bic x27,x25,x17 + bic x28,x15,x19 + bic x30,x16,x15 + eor x15,x15,x26 + bic x26,x19,x25 + eor x16,x16,x27 + eor x25,x25,x28 + eor x19,x19,x30 + eor x17,x17,x26 + + bic x26,x22,x21 + bic x27,x23,x22 + bic x28,x20,x24 + bic x30,x21,x20 + eor x20,x20,x26 + bic x26,x24,x23 + eor x21,x21,x27 + eor x23,x23,x28 + eor x24,x24,x30 + eor x22,x22,x26 + + bne .Loop + + ldr x30,[sp,#16+__SIZEOF_POINTER__] + .inst 0xd50323bf // autiasp + ret + .size KeccakF1600_int,.-KeccakF1600_int + + .type KeccakF1600,%function + .align 5 + KeccakF1600: + .inst 0xd503233f // paciasp + stp x29,x30,[sp,#-16*__SIZEOF_POINTER__]! + add x29,sp,#0 + stp x19,x20,[sp,#2*__SIZEOF_POINTER__] + stp x21,x22,[sp,#4*__SIZEOF_POINTER__] + stp x23,x24,[sp,#6*__SIZEOF_POINTER__] + stp x25,x26,[sp,#8*__SIZEOF_POINTER__] + stp x27,x28,[sp,#10*__SIZEOF_POINTER__] + sub sp,sp,#16+4*__SIZEOF_POINTER__ + + str x0,[sp,#16+2*__SIZEOF_POINTER__] // offload argument + mov x26,x0 + ldp x0,x1,[x0,#16*0] + ldp x2,x3,[x26,#16*1] + ldp x4,x5,[x26,#16*2] + ldp x6,x7,[x26,#16*3] + ldp x8,x9,[x26,#16*4] + ldp x10,x11,[x26,#16*5] + ldp x12,x13,[x26,#16*6] + ldp x14,x15,[x26,#16*7] + ldp x16,x17,[x26,#16*8] + ldp x25,x19,[x26,#16*9] + ldp x20,x21,[x26,#16*10] + ldp x22,x23,[x26,#16*11] + ldr x24,[x26,#16*12] + + adr x28,iotas + bl KeccakF1600_int + + ldr x26,[sp,#16+2*__SIZEOF_POINTER__] + stp x0,x1,[x26,#16*0] + stp x2,x3,[x26,#16*1] + stp x4,x5,[x26,#16*2] + stp x6,x7,[x26,#16*3] + stp x8,x9,[x26,#16*4] + stp x10,x11,[x26,#16*5] + stp x12,x13,[x26,#16*6] + stp x14,x15,[x26,#16*7] + stp x16,x17,[x26,#16*8] + stp x25,x19,[x26,#16*9] + stp x20,x21,[x26,#16*10] + stp x22,x23,[x26,#16*11] + str x24,[x26,#16*12] + + ldp x19,x20,[x29,#2*__SIZEOF_POINTER__] + add sp,sp,#16+4*__SIZEOF_POINTER__ + ldp x21,x22,[x29,#4*__SIZEOF_POINTER__] + ldp x23,x24,[x29,#6*__SIZEOF_POINTER__] + ldp x25,x26,[x29,#8*__SIZEOF_POINTER__] + ldp x27,x28,[x29,#10*__SIZEOF_POINTER__] + ldp x29,x30,[sp],#16*__SIZEOF_POINTER__ + .inst 0xd50323bf // autiasp + ret + .size KeccakF1600,.-KeccakF1600 + + .globl SHA3_absorb + .type SHA3_absorb,%function + .align 5 + SHA3_absorb: + .inst 0xd503233f // paciasp + stp x29,x30,[sp,#-16*__SIZEOF_POINTER__]! + add x29,sp,#0 + stp x19,x20,[sp,#2*__SIZEOF_POINTER__] + stp x21,x22,[sp,#4*__SIZEOF_POINTER__] + stp x23,x24,[sp,#6*__SIZEOF_POINTER__] + stp x25,x26,[sp,#8*__SIZEOF_POINTER__] + stp x27,x28,[sp,#10*__SIZEOF_POINTER__] + sub sp,sp,#16+4*__SIZEOF_POINTER__+16 + + stp x0,x1,[sp,#16+2*__SIZEOF_POINTER__] // offload arguments + stp x2,x3,[sp,#16+4*__SIZEOF_POINTER__] + + mov x26,x0 // uint64_t A[5][5] + mov x27,x1 // const void *inp + mov x28,x2 // size_t len + mov x30,x3 // size_t bsz + ldp x0,x1,[x26,#16*0] + ldp x2,x3,[x26,#16*1] + ldp x4,x5,[x26,#16*2] + ldp x6,x7,[x26,#16*3] + ldp x8,x9,[x26,#16*4] + ldp x10,x11,[x26,#16*5] + ldp x12,x13,[x26,#16*6] + ldp x14,x15,[x26,#16*7] + ldp x16,x17,[x26,#16*8] + ldp x25,x19,[x26,#16*9] + ldp x20,x21,[x26,#16*10] + ldp x22,x23,[x26,#16*11] + ldr x24,[x26,#16*12] + b .Loop_absorb + + .align 4 + .Loop_absorb: + subs x26,x28,x30 // len - bsz + blo .Labsorbed + + str x26,[sp,#16+4*__SIZEOF_POINTER__] // save len - bsz + cmp x30,#104 + ldr x26,[x27,#0] // A[0][0] ^= *inp++ + #ifdef __AARCH64EB__ + rev x26,x26 + #endif + eor x0,x0,x26 + ldr x26,[x27,#8] // A[0][1] ^= *inp++ + #ifdef __AARCH64EB__ + rev x26,x26 + #endif + eor x1,x1,x26 + ldr x26,[x27,#16] // A[0][2] ^= *inp++ + #ifdef __AARCH64EB__ + rev x26,x26 + #endif + eor x2,x2,x26 + ldr x26,[x27,#24] // A[0][3] ^= *inp++ + #ifdef __AARCH64EB__ + rev x26,x26 + #endif + eor x3,x3,x26 + ldr x26,[x27,#32] // A[0][4] ^= *inp++ + #ifdef __AARCH64EB__ + rev x26,x26 + #endif + eor x4,x4,x26 + ldr x26,[x27,#40] // A[1][0] ^= *inp++ + #ifdef __AARCH64EB__ + rev x26,x26 + #endif + eor x5,x5,x26 + ldr x26,[x27,#48] // A[1][1] ^= *inp++ + #ifdef __AARCH64EB__ + rev x26,x26 + #endif + eor x6,x6,x26 + ldr x26,[x27,#56] // A[1][2] ^= *inp++ + #ifdef __AARCH64EB__ + rev x26,x26 + #endif + eor x7,x7,x26 + ldr x26,[x27,#64] // A[1][3] ^= *inp++ + #ifdef __AARCH64EB__ + rev x26,x26 + #endif + eor x8,x8,x26 + blo .Lprocess_block + + ldr x26,[x27,#72] // A[1][4] ^= *inp++ + #ifdef __AARCH64EB__ + rev x26,x26 + #endif + eor x9,x9,x26 + ldr x26,[x27,#80] // A[2][0] ^= *inp++ + #ifdef __AARCH64EB__ + rev x26,x26 + #endif + eor x10,x10,x26 + ldr x26,[x27,#88] // A[2][1] ^= *inp++ + #ifdef __AARCH64EB__ + rev x26,x26 + #endif + eor x11,x11,x26 + ldr x26,[x27,#96] // A[2][2] ^= *inp++ + #ifdef __AARCH64EB__ + rev x26,x26 + #endif + eor x12,x12,x26 + beq .Lprocess_block + + cmp x30,#144 + ldr x26,[x27,#104] // A[2][3] ^= *inp++ + #ifdef __AARCH64EB__ + rev x26,x26 + #endif + eor x13,x13,x26 + ldr x26,[x27,#112] // A[2][4] ^= *inp++ + #ifdef __AARCH64EB__ + rev x26,x26 + #endif + eor x14,x14,x26 + ldr x26,[x27,#120] // A[3][0] ^= *inp++ + #ifdef __AARCH64EB__ + rev x26,x26 + #endif + eor x15,x15,x26 + ldr x26,[x27,#128] // A[3][1] ^= *inp++ + #ifdef __AARCH64EB__ + rev x26,x26 + #endif + eor x16,x16,x26 + blo .Lprocess_block + + ldr x26,[x27,#136] // A[3][2] ^= *inp++ + #ifdef __AARCH64EB__ + rev x26,x26 + #endif + eor x17,x17,x26 + beq .Lprocess_block + + ldr x26,[x27,#144] // A[3][3] ^= *inp++ + #ifdef __AARCH64EB__ + rev x26,x26 + #endif + eor x25,x25,x26 + ldr x26,[x27,#152] // A[3][4] ^= *inp++ + #ifdef __AARCH64EB__ + rev x26,x26 + #endif + eor x19,x19,x26 + ldr x26,[x27,#160] // A[4][0] ^= *inp++ + #ifdef __AARCH64EB__ + rev x26,x26 + #endif + eor x20,x20,x26 + + .Lprocess_block: + add x27,x27,x30 + str x27,[sp,#16+3*__SIZEOF_POINTER__] // save inp + + adr x28,iotas + bl KeccakF1600_int + + ldr x27,[sp,#16+3*__SIZEOF_POINTER__] // restore arguments + ldp x28,x30,[sp,#16+4*__SIZEOF_POINTER__] + b .Loop_absorb + + .align 4 + .Labsorbed: + ldr x27,[sp,#16+2*__SIZEOF_POINTER__] + stp x0,x1,[x27,#16*0] + stp x2,x3,[x27,#16*1] + stp x4,x5,[x27,#16*2] + stp x6,x7,[x27,#16*3] + stp x8,x9,[x27,#16*4] + stp x10,x11,[x27,#16*5] + stp x12,x13,[x27,#16*6] + stp x14,x15,[x27,#16*7] + stp x16,x17,[x27,#16*8] + stp x25,x19,[x27,#16*9] + stp x20,x21,[x27,#16*10] + stp x22,x23,[x27,#16*11] + str x24,[x27,#16*12] + + mov x0,x28 // return value + ldp x19,x20,[x29,#2*__SIZEOF_POINTER__] + add sp,sp,#16+4*__SIZEOF_POINTER__+16 + ldp x21,x22,[x29,#4*__SIZEOF_POINTER__] + ldp x23,x24,[x29,#6*__SIZEOF_POINTER__] + ldp x25,x26,[x29,#8*__SIZEOF_POINTER__] + ldp x27,x28,[x29,#10*__SIZEOF_POINTER__] + ldp x29,x30,[sp],#16*__SIZEOF_POINTER__ + .inst 0xd50323bf // autiasp + ret + .size SHA3_absorb,.-SHA3_absorb + .globl SHA3_squeeze + .type SHA3_squeeze,%function + .align 5 + SHA3_squeeze: + .inst 0xd503233f // paciasp + stp x29,x30,[sp,#-6*__SIZEOF_POINTER__]! + add x29,sp,#0 + stp x19,x20,[sp,#2*__SIZEOF_POINTER__] + stp x21,x22,[sp,#4*__SIZEOF_POINTER__] + + mov x19,x0 // put aside arguments + mov x20,x1 + mov x21,x2 + mov x22,x3 + + .Loop_squeeze: + ldr x4,[x0],#8 + cmp x21,#8 + blo .Lsqueeze_tail + #ifdef __AARCH64EB__ + rev x4,x4 + #endif + str x4,[x20],#8 + subs x21,x21,#8 + beq .Lsqueeze_done + + subs x3,x3,#8 + bhi .Loop_squeeze + + mov x0,x19 + bl KeccakF1600 + mov x0,x19 + mov x3,x22 + b .Loop_squeeze + + .align 4 + .Lsqueeze_tail: + strb w4,[x20],#1 + lsr x4,x4,#8 + subs x21,x21,#1 + beq .Lsqueeze_done + strb w4,[x20],#1 + lsr x4,x4,#8 + subs x21,x21,#1 + beq .Lsqueeze_done + strb w4,[x20],#1 + lsr x4,x4,#8 + subs x21,x21,#1 + beq .Lsqueeze_done + strb w4,[x20],#1 + lsr x4,x4,#8 + subs x21,x21,#1 + beq .Lsqueeze_done + strb w4,[x20],#1 + lsr x4,x4,#8 + subs x21,x21,#1 + beq .Lsqueeze_done + strb w4,[x20],#1 + lsr x4,x4,#8 + subs x21,x21,#1 + beq .Lsqueeze_done + strb w4,[x20],#1 + + .Lsqueeze_done: + ldp x19,x20,[sp,#2*__SIZEOF_POINTER__] + ldp x21,x22,[sp,#4*__SIZEOF_POINTER__] + ldp x29,x30,[sp],#6*__SIZEOF_POINTER__ + .inst 0xd50323bf // autiasp + ret + .size SHA3_squeeze,.-SHA3_squeeze + .type KeccakF1600_ce,%function + .align 5 + KeccakF1600_ce: + .Loop_ce: + ////////////////////////////////////////////////// Theta + .inst 0xce0f2a99 //eor3 v25.16b,v20.16b,v15.16b,v10.16b + .inst 0xce102eba //eor3 v26.16b,v21.16b,v16.16b,v11.16b + .inst 0xce1132db //eor3 v27.16b,v22.16b,v17.16b,v12.16b + .inst 0xce1236fc //eor3 v28.16b,v23.16b,v18.16b,v13.16b + .inst 0xce133b1d //eor3 v29.16b,v24.16b,v19.16b,v14.16b + .inst 0xce050339 //eor3 v25.16b,v25.16b, v5.16b,v0.16b + .inst 0xce06075a //eor3 v26.16b,v26.16b, v6.16b,v1.16b + .inst 0xce070b7b //eor3 v27.16b,v27.16b, v7.16b,v2.16b + .inst 0xce080f9c //eor3 v28.16b,v28.16b, v8.16b,v3.16b + .inst 0xce0913bd //eor3 v29.16b,v29.16b, v9.16b,v4.16b + + .inst 0xce7b8f3e //rax1 v30.2d,v25.2d,v27.2d // D[1] + .inst 0xce7c8f5f //rax1 v31.2d,v26.2d,v28.2d // D[2] + .inst 0xce7d8f7b //rax1 v27.2d,v27.2d,v29.2d // D[3] + .inst 0xce798f9c //rax1 v28.2d,v28.2d,v25.2d // D[4] + .inst 0xce7a8fbd //rax1 v29.2d,v29.2d,v26.2d // D[0] + + ////////////////////////////////////////////////// Theta+Rho+Pi + .inst 0xce9efc39 //xar v25.2d, v1.2d,v30.2d,#64-1 // C[0]=A[2][0] + + .inst 0xce9e50c1 //xar v1.2d,v6.2d,v30.2d,#64-44 + .inst 0xce9cb126 //xar v6.2d,v9.2d,v28.2d,#64-20 + .inst 0xce9f0ec9 //xar v9.2d,v22.2d,v31.2d,#64-61 + .inst 0xce9c65d6 //xar v22.2d,v14.2d,v28.2d,#64-39 + .inst 0xce9dba8e //xar v14.2d,v20.2d,v29.2d,#64-18 + + .inst 0xce9f085a //xar v26.2d, v2.2d,v31.2d,#64-62 // C[1]=A[4][0] + + .inst 0xce9f5582 //xar v2.2d,v12.2d,v31.2d,#64-43 + .inst 0xce9b9dac //xar v12.2d,v13.2d,v27.2d,#64-25 + .inst 0xce9ce26d //xar v13.2d,v19.2d,v28.2d,#64-8 + .inst 0xce9b22f3 //xar v19.2d,v23.2d,v27.2d,#64-56 + .inst 0xce9d5df7 //xar v23.2d,v15.2d,v29.2d,#64-41 + + .inst 0xce9c948f //xar v15.2d,v4.2d,v28.2d,#64-27 + + .inst 0xce9ccb1c //xar v28.2d, v24.2d,v28.2d,#64-14 // D[4]=A[0][4] + .inst 0xce9efab8 //xar v24.2d,v21.2d,v30.2d,#64-2 + .inst 0xce9b2508 //xar v8.2d,v8.2d,v27.2d,#64-55 // A[1][3]=A[4][1] + .inst 0xce9e4e04 //xar v4.2d,v16.2d,v30.2d,#64-45 // A[0][4]=A[1][3] + .inst 0xce9d70b0 //xar v16.2d,v5.2d,v29.2d,#64-36 + + .inst 0xce9b9065 //xar v5.2d,v3.2d,v27.2d,#64-28 + + eor v0.16b,v0.16b,v29.16b + + .inst 0xce9bae5b //xar v27.2d, v18.2d,v27.2d,#64-21 // D[3]=A[0][3] + .inst 0xce9fc623 //xar v3.2d,v17.2d,v31.2d,#64-15 // A[0][3]=A[3][3] + .inst 0xce9ed97e //xar v30.2d, v11.2d,v30.2d,#64-10 // D[1]=A[3][2] + .inst 0xce9fe8ff //xar v31.2d, v7.2d,v31.2d,#64-6 // D[2]=A[2][1] + .inst 0xce9df55d //xar v29.2d, v10.2d,v29.2d,#64-3 // D[0]=A[1][2] + + ////////////////////////////////////////////////// Chi+Iota + .inst 0xce362354 //bcax v20.16b,v26.16b, v22.16b,v8.16b // A[1][3]=A[4][1] + .inst 0xce375915 //bcax v21.16b,v8.16b,v23.16b,v22.16b // A[1][3]=A[4][1] + .inst 0xce385ed6 //bcax v22.16b,v22.16b,v24.16b,v23.16b + .inst 0xce3a62f7 //bcax v23.16b,v23.16b,v26.16b, v24.16b + .inst 0xce286b18 //bcax v24.16b,v24.16b,v8.16b,v26.16b // A[1][3]=A[4][1] + + ld1r {{v26.2d}},[x10],#8 + + .inst 0xce330fd1 //bcax v17.16b,v30.16b, v19.16b,v3.16b // A[0][3]=A[3][3] + .inst 0xce2f4c72 //bcax v18.16b,v3.16b,v15.16b,v19.16b // A[0][3]=A[3][3] + .inst 0xce303e73 //bcax v19.16b,v19.16b,v16.16b,v15.16b + .inst 0xce3e41ef //bcax v15.16b,v15.16b,v30.16b, v16.16b + .inst 0xce237a10 //bcax v16.16b,v16.16b,v3.16b,v30.16b // A[0][3]=A[3][3] + + .inst 0xce2c7f2a //bcax v10.16b,v25.16b, v12.16b,v31.16b + .inst 0xce2d33eb //bcax v11.16b,v31.16b, v13.16b,v12.16b + .inst 0xce2e358c //bcax v12.16b,v12.16b,v14.16b,v13.16b + .inst 0xce3939ad //bcax v13.16b,v13.16b,v25.16b, v14.16b + .inst 0xce3f65ce //bcax v14.16b,v14.16b,v31.16b, v25.16b + + .inst 0xce2913a7 //bcax v7.16b,v29.16b, v9.16b,v4.16b // A[0][4]=A[1][3] + .inst 0xce252488 //bcax v8.16b,v4.16b,v5.16b,v9.16b // A[0][4]=A[1][3] + .inst 0xce261529 //bcax v9.16b,v9.16b,v6.16b,v5.16b + .inst 0xce3d18a5 //bcax v5.16b,v5.16b,v29.16b, v6.16b + .inst 0xce2474c6 //bcax v6.16b,v6.16b,v4.16b,v29.16b // A[0][4]=A[1][3] + + .inst 0xce207363 //bcax v3.16b,v27.16b, v0.16b,v28.16b + .inst 0xce210384 //bcax v4.16b,v28.16b, v1.16b,v0.16b + .inst 0xce220400 //bcax v0.16b,v0.16b,v2.16b,v1.16b + .inst 0xce3b0821 //bcax v1.16b,v1.16b,v27.16b, v2.16b + .inst 0xce3c6c42 //bcax v2.16b,v2.16b,v28.16b, v27.16b + + eor v0.16b,v0.16b,v26.16b + + tst x10,#255 + bne .Loop_ce + + ret + .size KeccakF1600_ce,.-KeccakF1600_ce + + .type KeccakF1600_cext,%function + .align 5 + KeccakF1600_cext: + .inst 0xd503233f // paciasp + stp x29,x30,[sp,#-2*__SIZEOF_POINTER__-64]! + add x29,sp,#0 + stp d8,d9,[sp,#2*__SIZEOF_POINTER__+0] // per ABI requirement + stp d10,d11,[sp,#2*__SIZEOF_POINTER__+16] + stp d12,d13,[sp,#2*__SIZEOF_POINTER__+32] + stp d14,d15,[sp,#2*__SIZEOF_POINTER__+48] + ldp d0,d1,[x0,#8*0] + ldp d2,d3,[x0,#8*2] + ldp d4,d5,[x0,#8*4] + ldp d6,d7,[x0,#8*6] + ldp d8,d9,[x0,#8*8] + ldp d10,d11,[x0,#8*10] + ldp d12,d13,[x0,#8*12] + ldp d14,d15,[x0,#8*14] + ldp d16,d17,[x0,#8*16] + ldp d18,d19,[x0,#8*18] + ldp d20,d21,[x0,#8*20] + ldp d22,d23,[x0,#8*22] + ldr d24,[x0,#8*24] + adr x10,iotas + bl KeccakF1600_ce + ldr x30,[sp,#__SIZEOF_POINTER__] + stp d0,d1,[x0,#8*0] + stp d2,d3,[x0,#8*2] + stp d4,d5,[x0,#8*4] + stp d6,d7,[x0,#8*6] + stp d8,d9,[x0,#8*8] + stp d10,d11,[x0,#8*10] + stp d12,d13,[x0,#8*12] + stp d14,d15,[x0,#8*14] + stp d16,d17,[x0,#8*16] + stp d18,d19,[x0,#8*18] + stp d20,d21,[x0,#8*20] + stp d22,d23,[x0,#8*22] + str d24,[x0,#8*24] + + ldp d8,d9,[sp,#2*__SIZEOF_POINTER__+0] + ldp d10,d11,[sp,#2*__SIZEOF_POINTER__+16] + ldp d12,d13,[sp,#2*__SIZEOF_POINTER__+32] + ldp d14,d15,[sp,#2*__SIZEOF_POINTER__+48] + ldr x29,[sp],#2*__SIZEOF_POINTER__+64 + .inst 0xd50323bf // autiasp + ret + .size KeccakF1600_cext,.-KeccakF1600_cext + .globl SHA3_absorb_cext + .type SHA3_absorb_cext,%function + .align 5 + SHA3_absorb_cext: + .inst 0xd503233f // paciasp + stp x29,x30,[sp,#-2*__SIZEOF_POINTER__-64]! + add x29,sp,#0 + stp d8,d9,[sp,#2*__SIZEOF_POINTER__+0] // per ABI requirement + stp d10,d11,[sp,#2*__SIZEOF_POINTER__+16] + stp d12,d13,[sp,#2*__SIZEOF_POINTER__+32] + stp d14,d15,[sp,#2*__SIZEOF_POINTER__+48] + ldp d0,d1,[x0,#8*0] + ldp d2,d3,[x0,#8*2] + ldp d4,d5,[x0,#8*4] + ldp d6,d7,[x0,#8*6] + ldp d8,d9,[x0,#8*8] + ldp d10,d11,[x0,#8*10] + ldp d12,d13,[x0,#8*12] + ldp d14,d15,[x0,#8*14] + ldp d16,d17,[x0,#8*16] + ldp d18,d19,[x0,#8*18] + ldp d20,d21,[x0,#8*20] + ldp d22,d23,[x0,#8*22] + ldr d24,[x0,#8*24] + b .Loop_absorb_ce + + .align 4 + .Loop_absorb_ce: + subs x2,x2,x3 // len - bsz + blo .Labsorbed_ce + + cmp x3,#104 + ld1 {{v27.8b,v28.8b,v29.8b,v30.8b}},[x1],#32 + eor v0.16b,v0.16b,v27.16b + eor v1.16b,v1.16b,v28.16b + eor v2.16b,v2.16b,v29.16b + eor v3.16b,v3.16b,v30.16b + ld1 {{v27.8b,v28.8b,v29.8b,v30.8b}},[x1],#32 + eor v4.16b,v4.16b,v27.16b + eor v5.16b,v5.16b,v28.16b + eor v6.16b,v6.16b,v29.16b + eor v7.16b,v7.16b,v30.16b + ld1 {{v31.8b}},[x1],#8 // A[1][4] ^= *inp++ + eor v8.16b,v8.16b,v31.16b + blo .Lprocess_block_ce + + ld1 {{v27.8b,v28.8b,v29.8b,v30.8b}},[x1],#32 + eor v9.16b,v9.16b,v27.16b + eor v10.16b,v10.16b,v28.16b + eor v11.16b,v11.16b,v29.16b + eor v12.16b,v12.16b,v30.16b + beq .Lprocess_block_ce + + cmp x3,#144 + ld1 {{v27.8b,v28.8b,v29.8b,v30.8b}},[x1],#32 + eor v13.16b,v13.16b,v27.16b + eor v14.16b,v14.16b,v28.16b + eor v15.16b,v15.16b,v29.16b + eor v16.16b,v16.16b,v30.16b + blo .Lprocess_block_ce + + ld1 {{v31.8b}},[x1],#8 // A[3][3] ^= *inp++ + eor v17.16b,v17.16b,v31.16b + beq .Lprocess_block_ce + + ld1 {{v28.8b,v29.8b,v30.8b}},[x1],#24 + eor v18.16b,v18.16b,v28.16b + eor v19.16b,v19.16b,v29.16b + eor v20.16b,v20.16b,v30.16b + + .Lprocess_block_ce: + adr x10,iotas + bl KeccakF1600_ce + + b .Loop_absorb_ce + + .align 4 + .Labsorbed_ce: + stp d0,d1,[x0,#8*0] + stp d2,d3,[x0,#8*2] + stp d4,d5,[x0,#8*4] + stp d6,d7,[x0,#8*6] + stp d8,d9,[x0,#8*8] + stp d10,d11,[x0,#8*10] + stp d12,d13,[x0,#8*12] + stp d14,d15,[x0,#8*14] + stp d16,d17,[x0,#8*16] + stp d18,d19,[x0,#8*18] + stp d20,d21,[x0,#8*20] + stp d22,d23,[x0,#8*22] + str d24,[x0,#8*24] + add x0,x2,x3 // return value + + ldp d8,d9,[sp,#2*__SIZEOF_POINTER__+0] + ldp d10,d11,[sp,#2*__SIZEOF_POINTER__+16] + ldp d12,d13,[sp,#2*__SIZEOF_POINTER__+32] + ldp d14,d15,[sp,#2*__SIZEOF_POINTER__+48] + ldp x29,x30,[sp],#2*__SIZEOF_POINTER__+64 + .inst 0xd50323bf // autiasp + ret + .size SHA3_absorb_cext,.-SHA3_absorb_cext + .globl SHA3_squeeze_cext + .type SHA3_squeeze_cext,%function + .align 5 + SHA3_squeeze_cext: + .inst 0xd503233f // paciasp + stp x29,x30,[sp,#-2*__SIZEOF_POINTER__]! + add x29,sp,#0 + mov x9,x0 + mov x10,x3 + + .Loop_squeeze_ce: + ldr x4,[x9],#8 + cmp x2,#8 + blo .Lsqueeze_tail_ce + #ifdef __AARCH64EB__ + rev x4,x4 + #endif + str x4,[x1],#8 + beq .Lsqueeze_done_ce + + sub x2,x2,#8 + subs x10,x10,#8 + bhi .Loop_squeeze_ce + + bl KeccakF1600_cext + ldr x30,[sp,#__SIZEOF_POINTER__] + mov x9,x0 + mov x10,x3 + b .Loop_squeeze_ce + + .align 4 + .Lsqueeze_tail_ce: + strb w4,[x1],#1 + lsr x4,x4,#8 + subs x2,x2,#1 + beq .Lsqueeze_done_ce + strb w4,[x1],#1 + lsr x4,x4,#8 + subs x2,x2,#1 + beq .Lsqueeze_done_ce + strb w4,[x1],#1 + lsr x4,x4,#8 + subs x2,x2,#1 + beq .Lsqueeze_done_ce + strb w4,[x1],#1 + lsr x4,x4,#8 + subs x2,x2,#1 + beq .Lsqueeze_done_ce + strb w4,[x1],#1 + lsr x4,x4,#8 + subs x2,x2,#1 + beq .Lsqueeze_done_ce + strb w4,[x1],#1 + lsr x4,x4,#8 + subs x2,x2,#1 + beq .Lsqueeze_done_ce + strb w4,[x1],#1 + + .Lsqueeze_done_ce: + ldr x29,[sp],#2*__SIZEOF_POINTER__ + .inst 0xd50323bf // autiasp + ret + .size SHA3_squeeze_cext,.-SHA3_squeeze_cext + .byte 75,101,99,99,97,107,45,49,54,48,48,32,97,98,115,111,114,98,32,97,110,100,32,115,113,117,101,101,122,101,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,64,100,111,116,45,97,115,109,0 + .align 2 + "# + ) +} diff --git a/crates/common/crypto/lib.rs b/crates/common/crypto/lib.rs index 352e6ab466e..4e78534e431 100644 --- a/crates/common/crypto/lib.rs +++ b/crates/common/crypto/lib.rs @@ -1,2 +1,3 @@ pub mod blake2f; +pub mod keccak; pub mod kzg; From a73024ff63aeeff3c217bbbd30eed370f30d203d Mon Sep 17 00:00:00 2001 From: Esteve Soler Arderiu Date: Wed, 5 Nov 2025 14:50:04 +0100 Subject: [PATCH 02/40] Rewrite `x86_64` implementation using Intel syntax. Refactor keccak module. --- crates/common/crypto/keccak/aarch64.rs | 1 + .../common/crypto/keccak/keccak1600-x86_64.s | 1074 +++++++++-------- crates/common/crypto/keccak/mod.rs | 957 ++------------- crates/common/crypto/keccak/x86_64.rs | 12 + 4 files changed, 667 insertions(+), 1377 deletions(-) create mode 100644 crates/common/crypto/keccak/aarch64.rs create mode 100644 crates/common/crypto/keccak/x86_64.rs diff --git a/crates/common/crypto/keccak/aarch64.rs b/crates/common/crypto/keccak/aarch64.rs new file mode 100644 index 00000000000..fa5a67f162b --- /dev/null +++ b/crates/common/crypto/keccak/aarch64.rs @@ -0,0 +1 @@ +#![cfg(target_arch = "aarch64")] diff --git a/crates/common/crypto/keccak/keccak1600-x86_64.s b/crates/common/crypto/keccak/keccak1600-x86_64.s index d7652991330..3bba6a936ef 100644 --- a/crates/common/crypto/keccak/keccak1600-x86_64.s +++ b/crates/common/crypto/keccak/keccak1600-x86_64.s @@ -1,536 +1,548 @@ -.text - -.type __KeccakF1600,@function -.align 32 + .type __KeccakF1600, @function + .align 0x20 __KeccakF1600: -.cfi_startproc - .byte 0xf3,0x0f,0x1e,0xfa - - movq 60(%rdi),%rax - movq 68(%rdi),%rbx - movq 76(%rdi),%rcx - movq 84(%rdi),%rdx - movq 92(%rdi),%rbp - jmp .Loop - -.align 32 -.Loop: - movq -100(%rdi),%r8 - movq -52(%rdi),%r9 - movq -4(%rdi),%r10 - movq 44(%rdi),%r11 - - xorq -84(%rdi),%rcx - xorq -76(%rdi),%rdx - xorq %r8,%rax - xorq -92(%rdi),%rbx - xorq -44(%rdi),%rcx - xorq -60(%rdi),%rax - movq %rbp,%r12 - xorq -68(%rdi),%rbp - - xorq %r10,%rcx - xorq -20(%rdi),%rax - xorq -36(%rdi),%rdx - xorq %r9,%rbx - xorq -28(%rdi),%rbp - - xorq 36(%rdi),%rcx - xorq 20(%rdi),%rax - xorq 4(%rdi),%rdx - xorq -12(%rdi),%rbx - xorq 12(%rdi),%rbp - - movq %rcx,%r13 - rolq $1,%rcx - xorq %rax,%rcx - xorq %r11,%rdx - - rolq $1,%rax - xorq %rdx,%rax - xorq 28(%rdi),%rbx - - rolq $1,%rdx - xorq %rbx,%rdx - xorq 52(%rdi),%rbp - - rolq $1,%rbx - xorq %rbp,%rbx - - rolq $1,%rbp - xorq %r13,%rbp - xorq %rcx,%r9 - xorq %rdx,%r10 - rolq $44,%r9 - xorq %rbp,%r11 - xorq %rax,%r12 - rolq $43,%r10 - xorq %rbx,%r8 - movq %r9,%r13 - rolq $21,%r11 - orq %r10,%r9 - xorq %r8,%r9 - rolq $14,%r12 - - xorq (%r15),%r9 - leaq 8(%r15),%r15 - - movq %r12,%r14 - andq %r11,%r12 - movq %r9,-100(%rsi) - xorq %r10,%r12 - notq %r10 - movq %r12,-84(%rsi) - - orq %r11,%r10 - movq 76(%rdi),%r12 - xorq %r13,%r10 - movq %r10,-92(%rsi) - - andq %r8,%r13 - movq -28(%rdi),%r9 - xorq %r14,%r13 - movq -20(%rdi),%r10 - movq %r13,-68(%rsi) - - orq %r8,%r14 - movq -76(%rdi),%r8 - xorq %r11,%r14 - movq 28(%rdi),%r11 - movq %r14,-76(%rsi) - - - xorq %rbp,%r8 - xorq %rdx,%r12 - rolq $28,%r8 - xorq %rcx,%r11 - xorq %rax,%r9 - rolq $61,%r12 - rolq $45,%r11 - xorq %rbx,%r10 - rolq $20,%r9 - movq %r8,%r13 - orq %r12,%r8 - rolq $3,%r10 - - xorq %r11,%r8 - movq %r8,-36(%rsi) - - movq %r9,%r14 - andq %r13,%r9 - movq -92(%rdi),%r8 - xorq %r12,%r9 - notq %r12 - movq %r9,-28(%rsi) - - orq %r11,%r12 - movq -44(%rdi),%r9 - xorq %r10,%r12 - movq %r12,-44(%rsi) - - andq %r10,%r11 - movq 60(%rdi),%r12 - xorq %r14,%r11 - movq %r11,-52(%rsi) - - orq %r10,%r14 - movq 4(%rdi),%r10 - xorq %r13,%r14 - movq 52(%rdi),%r11 - movq %r14,-60(%rsi) - - - xorq %rbp,%r10 - xorq %rax,%r11 - rolq $25,%r10 - xorq %rdx,%r9 - rolq $8,%r11 - xorq %rbx,%r12 - rolq $6,%r9 - xorq %rcx,%r8 - rolq $18,%r12 - movq %r10,%r13 - andq %r11,%r10 - rolq $1,%r8 - - notq %r11 - xorq %r9,%r10 - movq %r10,-12(%rsi) - - movq %r12,%r14 - andq %r11,%r12 - movq -12(%rdi),%r10 - xorq %r13,%r12 - movq %r12,-4(%rsi) - - orq %r9,%r13 - movq 84(%rdi),%r12 - xorq %r8,%r13 - movq %r13,-20(%rsi) - - andq %r8,%r9 - xorq %r14,%r9 - movq %r9,12(%rsi) - - orq %r8,%r14 - movq -60(%rdi),%r9 - xorq %r11,%r14 - movq 36(%rdi),%r11 - movq %r14,4(%rsi) - - - movq -68(%rdi),%r8 - - xorq %rcx,%r10 - xorq %rdx,%r11 - rolq $10,%r10 - xorq %rbx,%r9 - rolq $15,%r11 - xorq %rbp,%r12 - rolq $36,%r9 - xorq %rax,%r8 - rolq $56,%r12 - movq %r10,%r13 - orq %r11,%r10 - rolq $27,%r8 - - notq %r11 - xorq %r9,%r10 - movq %r10,28(%rsi) - - movq %r12,%r14 - orq %r11,%r12 - xorq %r13,%r12 - movq %r12,36(%rsi) - - andq %r9,%r13 - xorq %r8,%r13 - movq %r13,20(%rsi) - - orq %r8,%r9 - xorq %r14,%r9 - movq %r9,52(%rsi) - - andq %r14,%r8 - xorq %r11,%r8 - movq %r8,44(%rsi) - - - xorq -84(%rdi),%rdx - xorq -36(%rdi),%rbp - rolq $62,%rdx - xorq 68(%rdi),%rcx - rolq $55,%rbp - xorq 12(%rdi),%rax - rolq $2,%rcx - xorq 20(%rdi),%rbx - xchgq %rsi,%rdi - rolq $39,%rax - rolq $41,%rbx - movq %rdx,%r13 - andq %rbp,%rdx - notq %rbp - xorq %rcx,%rdx - movq %rdx,92(%rdi) - - movq %rax,%r14 - andq %rbp,%rax - xorq %r13,%rax - movq %rax,60(%rdi) - - orq %rcx,%r13 - xorq %rbx,%r13 - movq %r13,84(%rdi) - - andq %rbx,%rcx - xorq %r14,%rcx - movq %rcx,76(%rdi) - - orq %r14,%rbx - xorq %rbp,%rbx - movq %rbx,68(%rdi) - - movq %rdx,%rbp - movq %r13,%rdx - - testq $255,%r15 - jnz .Loop - - leaq -192(%r15),%r15 - .byte 0xf3,0xc3 -.cfi_endproc -.size __KeccakF1600,.-__KeccakF1600 - -.globl KeccakF1600 -.type KeccakF1600,@function -.align 32 + .cfi_startproc + endbr64 + + mov rax, [rdi + 0x3C] + mov rbx, [rdi + 0x44] + mov rcx, [rdi + 0x4C] + mov rdx, [rdi + 0x54] + mov rbp, [rdi + 0x5C] + jmp .Loop + + .align 0x20 + .Loop: + mov r8, [rdi - 0x64] + mov r9, [rdi - 0x34] + mov r10, [rdi - 0x04] + mov r11, [rdi + 0x2C] + + xor rcx, [rdi - 0x54] + xor rdx, [rdi - 0x4C] + xor rax, r8 + xor rbx, [rdi - 0x5C] + xor rcx, [rdi - 0x2C] + xor rax, [rdi - 0x3C] + mov r12, rbp + xor rbp, [rdi - 0x44] + + xor rcx, r10 + xor rax, [rdi - 0x14] + xor rdx, [rdi - 0x24] + xor rbx, r9 + xor rbp, [rdi - 0x1C] + + xor rcx, [rdi + 0x24] + xor rax, [rdi + 0x14] + xor rdx, [rdi + 0x04] + xor rbx, [rdi - 0x0C] + xor rbp, [rdi + 0x0C] + + mov r13, rcx + rol rcx, 0x01 + xor rcx, rax + xor rdx, r11 + + rol rax, 0x01 + xor rax, rdx + xor rbx, [rdi + 0x1C] + + rol rdx, 0x01 + xor rdx, rbx + xor rbp, [rdi + 0x34] + + rol rbx, 0x01 + xor rbx, rbp + + rol rbp, 0x01 + xor rbp, r13 + xor r9, rcx + xor r10, rdx + rol r9, 0x2C + xor r11, rbp + xor r12, rax + rol r10, 0x2B + xor r8, rbx + mov r13, r9 + rol r11, 0x15 + or r9, r10 + xor r9, r8 + rol r12, 0x0E + + xor r9, [r15] + lea r15, [r15 + 0x08] + + mov r14, r12 + and r12, r11 + mov [rsi - 0x64], r9 + xor r12, r10 + not r10 + mov [rsi - 0x54], r12 + + or r10, r11 + mov r12, [rdi + 0x4C] + xor r10, r13 + mov [rsi - 0x5C], r10 + + and r13, r8 + mov r9, [rdi - 0x1C] + xor r13, r14 + mov r10, [rdi - 0x14] + mov [rsi - 0x44], r13 + + or r14, r8 + mov r8, [rdi - 0x4C] + xor r14, r11 + mov r11, [rdi + 0x1C] + mov [rsi - 0x4C], r14 + + + xor r8, rbp + xor r12, rdx + rol r8, 0x1C + xor r11, rcx + xor r9, rax + rol r12, 0x3D + rol r11, 0x2D + xor r10, rbx + rol r9, 0x14 + mov r13, r8 + or r8, r12 + rol r10, 0x03 + + xor r8, r11 + mov [rsi - 0x24], r8 + + mov r14, r9 + and r9, r13 + mov r8, [rdi - 0x5C] + xor r9, r12 + not r12 + mov [rsi - 0x1C], r9 + + or r12, r11 + mov r9, [rdi - 0x2C] + xor r12, r10 + mov [rsi - 0x2C], r12 + + and r11, r10 + mov r12, [rdi + 0x3C] + xor r11, r14 + mov [rsi - 0x34], r11 + + or r14, r10 + mov r10, [rdi + 0x04] + xor r14, r13 + mov r11, [rdi + 0x34] + mov [rsi - 0x3C], r14 + + + xor r10, rbp + xor r11, rax + rol r10, 0x19 + xor r9, rdx + rol r11, 0x08 + xor r12, rbx + rol r9, 0x06 + xor r8, rcx + rol r12, 0x12 + mov r13, r10 + and r10, r11 + rol r8, 0x01 + + not r11 + xor r10, r9 + mov [rsi - 0x0C], r10 + + mov r14, r12 + and r12, r11 + mov r10, [rdi - 0x0C] + xor r12, r13 + mov [rsi - 0x04], r12 + + or r13, r9 + mov r12, [rdi + 0x54] + xor r13, r8 + mov [rsi - 0x14], r13 + + and r9, r8 + xor r9, r14 + mov [rsi + 0x0C], r9 + + or r14, r8 + mov r9, [rdi - 0x3C] + xor r14, r11 + mov r11, [rdi + 0x24] + mov [rsi + 0x04], r14 + + + mov r8, [rdi - 0x44] + + xor r10, rcx + xor r11, rdx + rol r10, 0x0A + xor r9, rbx + rol r11, 0x0F + xor r12, rbp + rol r9, 0x24 + xor r8, rax + rol r12, 0x38 + mov r13, r10 + or r10, r11 + rol r8, 0x1B + + not r11 + xor r10, r9 + mov [rsi + 0x1C], r10 + + mov r14, r12 + or r12, r11 + xor r12, r13 + mov [rsi + 0x24], r12 + + and r13, r9 + xor r13, r8 + mov [rsi + 0x14], r13 + + or r9, r8 + xor r9, r14 + mov [rsi + 0x34], r9 + + and r8, r14 + xor r8, r11 + mov [rsi + 0x2C], r8 + + + xor rdx, [rdi - 0x54] + xor rbp, [rdi - 0x24] + rol rdx, 0x3E + xor rcx, [rdi + 0x44] + rol rbp, 0x37 + xor rax, [rdi + 0x0C] + rol rcx, 0x02 + xor rbx, [rdi + 0x14] + xchg rdi, rsi + rol rax, 0x27 + rol rbx, 0x29 + mov r13, rdx + and rdx, rbp + not rbp + xor rdx, rcx + mov [rdi + 0x5C], rdx + + mov r14, rax + and rax, rbp + xor rax, r13 + mov [rdi + 0x3C], rax + + or r13, rcx + xor r13, rbx + mov [rdi + 0x54], r13 + + and rcx, rbx + xor rcx, r14 + mov [rdi + 0x4C], rcx + + or rbx, r14 + xor rbx, rbp + mov [rdi + 0x44], rbx + + mov rbp, rdx + mov rdx, r13 + + test r15, 0xFF + jnz .Loop + + lea r15, [r15 - 0xC0] + .byte 0xF3, 0xC3 + .cfi_endproc + .size __KeccakF1600, . - __KeccakF1600 + + .global KeccakF1600 + .type KeccakF1600, @function + .align 0x20 KeccakF1600: -.cfi_startproc - .byte 0xf3,0x0f,0x1e,0xfa - - - pushq %rbx -.cfi_adjust_cfa_offset 8 -.cfi_offset %rbx,-16 - pushq %rbp -.cfi_adjust_cfa_offset 8 -.cfi_offset %rbp,-24 - pushq %r12 -.cfi_adjust_cfa_offset 8 -.cfi_offset %r12,-32 - pushq %r13 -.cfi_adjust_cfa_offset 8 -.cfi_offset %r13,-40 - pushq %r14 -.cfi_adjust_cfa_offset 8 -.cfi_offset %r14,-48 - pushq %r15 -.cfi_adjust_cfa_offset 8 -.cfi_offset %r15,-56 - - leaq 100(%rdi),%rdi - subq $200,%rsp -.cfi_adjust_cfa_offset 200 - - - notq -92(%rdi) - notq -84(%rdi) - notq -36(%rdi) - notq -4(%rdi) - notq 36(%rdi) - notq 60(%rdi) - - leaq iotas(%rip),%r15 - leaq 100(%rsp),%rsi - - call __KeccakF1600 - - notq -92(%rdi) - notq -84(%rdi) - notq -36(%rdi) - notq -4(%rdi) - notq 36(%rdi) - notq 60(%rdi) - leaq -100(%rdi),%rdi - - leaq 248(%rsp),%r11 -.cfi_def_cfa %r11,8 - movq -48(%r11),%r15 - movq -40(%r11),%r14 - movq -32(%r11),%r13 - movq -24(%r11),%r12 - movq -16(%r11),%rbp - movq -8(%r11),%rbx - leaq (%r11),%rsp -.cfi_restore %r12 -.cfi_restore %r13 -.cfi_restore %r14 -.cfi_restore %r15 -.cfi_restore %rbp -.cfi_restore %rbx - .byte 0xf3,0xc3 -.cfi_endproc -.size KeccakF1600,.-KeccakF1600 -.globl SHA3_absorb -.type SHA3_absorb,@function -.align 32 + .cfi_startproc + endbr64 + + + push rbx + .cfi_adjust_cfa_offset 0x08 + .cfi_offset rbx, -0x10 + push rbp + .cfi_adjust_cfa_offset 0x08 + .cfi_offset rbp, -0x18 + push r12 + .cfi_adjust_cfa_offset 0x08 + .cfi_offset r12, -0x20 + push r13 + .cfi_adjust_cfa_offset 0x08 + .cfi_offset r13, -0x28 + push r14 + .cfi_adjust_cfa_offset 0x08 + .cfi_offset r14, -0x30 + push r15 + .cfi_adjust_cfa_offset 0x08 + .cfi_offset r15, -0x38 + + lea rdi, [rdi + 0x64] + sub rsp, 0xC8 + .cfi_adjust_cfa_offset 0xC8 + + + not QWORD PTR [rdi - 0x5C] + not QWORD PTR [rdi - 0x54] + not QWORD PTR [rdi - 0x24] + not QWORD PTR [rdi - 0x04] + not QWORD PTR [rdi + 0x24] + not QWORD PTR [rdi + 0x3C] + + lea r15, [rip + iotas] + lea rsi, [rsp + 0x64] + + call __KeccakF1600 + + not QWORD PTR [rdi - 0x5C] + not QWORD PTR [rdi - 0x54] + not QWORD PTR [rdi - 0x24] + not QWORD PTR [rdi - 0x04] + not QWORD PTR [rdi + 0x24] + not QWORD PTR [rdi + 0x3C] + lea rdi, [rdi - 0x64] + + lea r11, [rsp + 0xF8] + .cfi_def_cfa r11, 0x08 + mov r15, [r11 - 0x30] + mov r14, [r11 - 0x28] + mov r13, [r11 - 0x20] + mov r12, [r11 - 0x18] + mov rbp, [r11 - 0x10] + mov rbx, [r11 - 0x08] + lea rsp, [r11] + .cfi_restore r12 + .cfi_restore r13 + .cfi_restore r14 + .cfi_restore r15 + .cfi_restore rbp + .cfi_restore rbx + .byte 0xF3, 0xC3 + .cfi_endproc + .size KeccakF1600, . - KeccakF1600 + + .global SHA3_absorb + .type SHA3_absorb, @function + .align 0x20 SHA3_absorb: -.cfi_startproc - .byte 0xf3,0x0f,0x1e,0xfa - - - pushq %rbx -.cfi_adjust_cfa_offset 8 -.cfi_offset %rbx,-16 - pushq %rbp -.cfi_adjust_cfa_offset 8 -.cfi_offset %rbp,-24 - pushq %r12 -.cfi_adjust_cfa_offset 8 -.cfi_offset %r12,-32 - pushq %r13 -.cfi_adjust_cfa_offset 8 -.cfi_offset %r13,-40 - pushq %r14 -.cfi_adjust_cfa_offset 8 -.cfi_offset %r14,-48 - pushq %r15 -.cfi_adjust_cfa_offset 8 -.cfi_offset %r15,-56 - - leaq 100(%rdi),%rdi - subq $232,%rsp -.cfi_adjust_cfa_offset 232 - - - movq %rsi,%r9 - leaq 100(%rsp),%rsi - - notq -92(%rdi) - notq -84(%rdi) - notq -36(%rdi) - notq -4(%rdi) - notq 36(%rdi) - notq 60(%rdi) - leaq iotas(%rip),%r15 - - movq %rcx,216-100(%rsi) - -.Loop_absorb: - cmpq %rcx,%rdx - jc .Ldone_absorb - - shrq $3,%rcx - leaq -100(%rdi),%r8 - -.Lblock_absorb: - movq (%r9),%rax - leaq 8(%r9),%r9 - xorq (%r8),%rax - leaq 8(%r8),%r8 - subq $8,%rdx - movq %rax,-8(%r8) - subq $1,%rcx - jnz .Lblock_absorb - - movq %r9,200-100(%rsi) - movq %rdx,208-100(%rsi) - call __KeccakF1600 - movq 200-100(%rsi),%r9 - movq 208-100(%rsi),%rdx - movq 216-100(%rsi),%rcx - jmp .Loop_absorb - -.align 32 -.Ldone_absorb: - movq %rdx,%rax - - notq -92(%rdi) - notq -84(%rdi) - notq -36(%rdi) - notq -4(%rdi) - notq 36(%rdi) - notq 60(%rdi) - - leaq 280(%rsp),%r11 -.cfi_def_cfa %r11,8 - movq -48(%r11),%r15 - movq -40(%r11),%r14 - movq -32(%r11),%r13 - movq -24(%r11),%r12 - movq -16(%r11),%rbp - movq -8(%r11),%rbx - leaq (%r11),%rsp -.cfi_restore %r12 -.cfi_restore %r13 -.cfi_restore %r14 -.cfi_restore %r15 -.cfi_restore %rbp -.cfi_restore %rbx - .byte 0xf3,0xc3 -.cfi_endproc -.size SHA3_absorb,.-SHA3_absorb -.globl SHA3_squeeze -.type SHA3_squeeze,@function -.align 32 + .cfi_startproc + endbr64 + + + push rbx + .cfi_adjust_cfa_offset 0x08 + .cfi_offset rbx, -0x10 + push rbp + .cfi_adjust_cfa_offset 0x08 + .cfi_offset rbp, -0x18 + push r12 + .cfi_adjust_cfa_offset 0x08 + .cfi_offset r12, -0x20 + push r13 + .cfi_adjust_cfa_offset 0x08 + .cfi_offset r13, -0x28 + push r14 + .cfi_adjust_cfa_offset 0x08 + .cfi_offset r14, -0x30 + push r15 + .cfi_adjust_cfa_offset 0x08 + .cfi_offset r15, -0x38 + + lea rdi, [rdi + 0x64] + sub rsp, 0xE8 + .cfi_adjust_cfa_offset 0xE8 + + + mov r9, rsi + lea rsi, [rsp + 0x64] + + not QWORD PTR [rdi - 0x5C] + not QWORD PTR [rdi - 0x54] + not QWORD PTR [rdi - 0x24] + not QWORD PTR [rdi - 0x04] + not QWORD PTR [rdi + 0x24] + not QWORD PTR [rdi + 0x3C] + lea r15, [rip + iotas] + + mov [rsi + 0x74], rcx + + .Loop_absorb: + cmp rdx, rcx + jc .Ldone_absorb + + shr rcx, 0x03 + lea r8, [rdi - 0x64] + + .Lblock_absorb: + mov rax, [r9] + lea r9, [r9 + 0x08] + xor rax, [r8] + lea r8, [r8 + 0x08] + sub rdx, 0x08 + mov [r8 - 0x08], rax + sub rcx, 0x01 + jnz .Lblock_absorb + + mov [rsi + 0x64], r9 + mov [rsi + 0x6C], rdx + call __KeccakF1600 + mov r9, [rsi + 0x64] + mov rdx, [rsi + 0x6C] + mov rcx, [rsi + 0x74] + jmp .Loop_absorb + + .align 0x20 + .Ldone_absorb: + mov rax, rdx + + not QWORD PTR [rdi - 0x5C] + not QWORD PTR [rdi - 0x54] + not QWORD PTR [rdi - 0x24] + not QWORD PTR [rdi - 0x04] + not QWORD PTR [rdi + 0x24] + not QWORD PTR [rdi + 0x3C] + + lea r11, [rsp + 0x0118] + .cfi_def_cfa r11, 0x08 + mov r15, [r11 - 0x30] + mov r14, [r11 - 0x28] + mov r13, [r11 - 0x20] + mov r12, [r11 - 0x18] + mov rbp, [r11 - 0x10] + mov rbx, [r11 - 0x08] + lea rsp, [r11] + .cfi_restore r12 + .cfi_restore r13 + .cfi_restore r14 + .cfi_restore r15 + .cfi_restore rbp + .cfi_restore rbx + .byte 0xF3, 0xC3 + .cfi_endproc + .size SHA3_absorb, . - SHA3_absorb + + .global SHA3_squeeze + .type SHA3_squeeze, @function + .align 0x20 SHA3_squeeze: -.cfi_startproc - .byte 0xf3,0x0f,0x1e,0xfa - - - pushq %r12 -.cfi_adjust_cfa_offset 8 -.cfi_offset %r12,-16 - pushq %r13 -.cfi_adjust_cfa_offset 8 -.cfi_offset %r13,-24 - pushq %r14 -.cfi_adjust_cfa_offset 8 -.cfi_offset %r14,-32 - subq $32,%rsp -.cfi_adjust_cfa_offset 32 - - - shrq $3,%rcx - movq %rdi,%r8 - movq %rsi,%r12 - movq %rdx,%r13 - movq %rcx,%r14 - jmp .Loop_squeeze - -.align 32 -.Loop_squeeze: - cmpq $8,%r13 - jb .Ltail_squeeze - - movq (%r8),%rax - leaq 8(%r8),%r8 - movq %rax,(%r12) - leaq 8(%r12),%r12 - subq $8,%r13 - jz .Ldone_squeeze - - subq $1,%rcx - jnz .Loop_squeeze - - movq %rdi,%rcx - call KeccakF1600 - movq %rdi,%r8 - movq %r14,%rcx - jmp .Loop_squeeze - -.Ltail_squeeze: - movq %r8,%rsi - movq %r12,%rdi - movq %r13,%rcx -.byte 0xf3,0xa4 - -.Ldone_squeeze: - movq 32(%rsp),%r14 - movq 40(%rsp),%r13 - movq 48(%rsp),%r12 - addq $56,%rsp -.cfi_adjust_cfa_offset -56 -.cfi_restore %r12 -.cfi_restore %r13 -.cfi_restore %r14 - .byte 0xf3,0xc3 -.cfi_endproc -.size SHA3_squeeze,.-SHA3_squeeze -.align 256 -.quad 0,0,0,0,0,0,0,0 -.type iotas,@object + .cfi_startproc + endbr64 + + + push r12 + .cfi_adjust_cfa_offset 0x08 + .cfi_offset r12, -0x10 + push r13 + .cfi_adjust_cfa_offset 0x08 + .cfi_offset r13, -0x18 + push r14 + .cfi_adjust_cfa_offset 0x08 + .cfi_offset r14, -0x20 + sub rsp, 0x20 + .cfi_adjust_cfa_offset 0x20 + + + shr rcx, 0x03 + mov r8, rdi + mov r12, rsi + mov r13, rdx + mov r14, rcx + jmp .Loop_squeeze + + .align 0x20 + .Loop_squeeze: + cmp r13, 0x08 + jb .Ltail_squeeze + + mov rax, [r8] + lea r8, [r8 + 0x08] + mov [r12], rax + lea r12, [r12 + 0x08] + sub r13, 0x08 + jz .Ldone_squeeze + + sub rcx, 0x01 + jnz .Loop_squeeze + + mov rcx, rdi + call KeccakF1600 + mov r8, rdi + mov rcx, r14 + jmp .Loop_squeeze + + .Ltail_squeeze: + mov rsi, r8 + mov rdi, r12 + mov rcx, r13 + .byte 0xF3, 0xA4 + + .Ldone_squeeze: + mov r14, [rsp + 0x20] + mov r13, [rsp + 0x28] + mov r12, [rsp + 0x30] + add rsp, 0x38 + .cfi_adjust_cfa_offset -0x38 + .cfi_restore r12 + .cfi_restore r13 + .cfi_restore r14 + .byte 0xF3, 0xC3 + .cfi_endproc + .size SHA3_squeeze, . - SHA3_squeeze + + + .align 0x0100 + .quad 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 + .type iotas, @object iotas: -.quad 0x0000000000000001 -.quad 0x0000000000008082 -.quad 0x800000000000808a -.quad 0x8000000080008000 -.quad 0x000000000000808b -.quad 0x0000000080000001 -.quad 0x8000000080008081 -.quad 0x8000000000008009 -.quad 0x000000000000008a -.quad 0x0000000000000088 -.quad 0x0000000080008009 -.quad 0x000000008000000a -.quad 0x000000008000808b -.quad 0x800000000000008b -.quad 0x8000000000008089 -.quad 0x8000000000008003 -.quad 0x8000000000008002 -.quad 0x8000000000000080 -.quad 0x000000000000800a -.quad 0x800000008000000a -.quad 0x8000000080008081 -.quad 0x8000000000008080 -.quad 0x0000000080000001 -.quad 0x8000000080008008 -.size iotas,.-iotas -.byte 75,101,99,99,97,107,45,49,54,48,48,32,97,98,115,111,114,98,32,97,110,100,32,115,113,117,101,101,122,101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 - -.section .note.gnu.property,"a",@note - .long 4,2f-1f,5 - .byte 0x47,0x4E,0x55,0 -1: .long 0xc0000002,4,3 -.align 8 -2: + .quad 0x0000000000000001 + .quad 0x0000000000008082 + .quad 0x800000000000808A + .quad 0x8000000080008000 + .quad 0x000000000000808B + .quad 0x0000000080000001 + .quad 0x8000000080008081 + .quad 0x8000000000008009 + .quad 0x000000000000008A + .quad 0x0000000000000088 + .quad 0x0000000080008009 + .quad 0x000000008000000A + .quad 0x000000008000808B + .quad 0x800000000000008B + .quad 0x8000000000008089 + .quad 0x8000000000008003 + .quad 0x8000000000008002 + .quad 0x8000000000000080 + .quad 0x000000000000800A + .quad 0x800000008000000A + .quad 0x8000000080008081 + .quad 0x8000000000008080 + .quad 0x0000000080000001 + .quad 0x8000000080008008 + .size iotas, . - iotas + .byte 0x4B, 0x65, 0x63, 0x63, 0x61, 0x6B, 0x2D, 0x31 + .byte 0x36, 0x30, 0x30, 0x20, 0x61, 0x62, 0x73, 0x6F + .byte 0x72, 0x62, 0x20, 0x61, 0x6E, 0x64, 0x20, 0x73 + .byte 0x71, 0x75, 0x65, 0x65, 0x7A, 0x65, 0x20, 0x66 + .byte 0x6F, 0x72, 0x20, 0x78, 0x38, 0x36, 0x5F, 0x36 + .byte 0x34, 0x2C, 0x20, 0x43, 0x52, 0x59, 0x50, 0x54 + .byte 0x4F, 0x47, 0x41, 0x4D, 0x53, 0x20, 0x62, 0x79 + .byte 0x20, 0x3C, 0x61, 0x70, 0x70, 0x72, 0x6F, 0x40 + .byte 0x6F, 0x70, 0x65, 0x6E, 0x73, 0x73, 0x6C, 0x2E + .byte 0x6F, 0x72, 0x67, 0x3E, 0x00 + + .section .note.gnu.property, "a", @note + .long 4, 2f-1f, 5 + .byte 0x47, 0x4E, 0x55, 0x00 + 1: + .long 0xC0000002, 0x04, 0x03 + .align 0x08 + 2: diff --git a/crates/common/crypto/keccak/mod.rs b/crates/common/crypto/keccak/mod.rs index fd7fa5acda9..5cba0abcb0d 100644 --- a/crates/common/crypto/keccak/mod.rs +++ b/crates/common/crypto/keccak/mod.rs @@ -1,848 +1,113 @@ -#[unsafe(naked)] -extern "C" fn keccak1600() { - core::arch::naked_asm!( - r#" - .text - - .align 8 // strategic alignment and padding that allows to use - // address value as loop termination condition... - .quad 0,0,0,0,0,0,0,0 - .type iotas,%object - iotas: - .quad 0x0000000000000001 - .quad 0x0000000000008082 - .quad 0x800000000000808a - .quad 0x8000000080008000 - .quad 0x000000000000808b - .quad 0x0000000080000001 - .quad 0x8000000080008081 - .quad 0x8000000000008009 - .quad 0x000000000000008a - .quad 0x0000000000000088 - .quad 0x0000000080008009 - .quad 0x000000008000000a - .Liotas12: - .quad 0x000000008000808b - .quad 0x800000000000008b - .quad 0x8000000000008089 - .quad 0x8000000000008003 - .quad 0x8000000000008002 - .quad 0x8000000000000080 - .quad 0x000000000000800a - .quad 0x800000008000000a - .quad 0x8000000080008081 - .quad 0x8000000000008080 - .quad 0x0000000080000001 - .quad 0x8000000080008008 - .size iotas,.-iotas - .type KeccakF1600_int,%function - .align 5 - KeccakF1600_int: - .inst 0xd503233f // paciasp - stp x28,x30,[sp,#16] // stack is pre-allocated - b .Loop - .align 4 - .Loop: - ////////////////////////////////////////// Theta - eor x26,x0,x5 - stp x4,x9,[sp,#0] // offload pair... - eor x27,x1,x6 - eor x28,x2,x7 - eor x30,x3,x8 - eor x4,x4,x9 - eor x26,x26,x10 - eor x27,x27,x11 - eor x28,x28,x12 - eor x30,x30,x13 - eor x4,x4,x14 - eor x26,x26,x15 - eor x27,x27,x16 - eor x28,x28,x17 - eor x30,x30,x25 - eor x4,x4,x19 - eor x26,x26,x20 - eor x28,x28,x22 - eor x27,x27,x21 - eor x30,x30,x23 - eor x4,x4,x24 - - eor x9,x26,x28,ror#63 - - eor x1,x1,x9 - eor x6,x6,x9 - eor x11,x11,x9 - eor x16,x16,x9 - eor x21,x21,x9 - - eor x9,x27,x30,ror#63 - eor x28,x28,x4,ror#63 - eor x30,x30,x26,ror#63 - eor x4,x4,x27,ror#63 - - eor x27, x2,x9 // mov x27,x2 - eor x7,x7,x9 - eor x12,x12,x9 - eor x17,x17,x9 - eor x22,x22,x9 - - eor x0,x0,x4 - eor x5,x5,x4 - eor x10,x10,x4 - eor x15,x15,x4 - eor x20,x20,x4 - ldp x4,x9,[sp,#0] // re-load offloaded data - eor x26, x3,x28 // mov x26,x3 - eor x8,x8,x28 - eor x13,x13,x28 - eor x25,x25,x28 - eor x23,x23,x28 - - eor x28, x4,x30 // mov x28,x4 - eor x9,x9,x30 - eor x14,x14,x30 - eor x19,x19,x30 - eor x24,x24,x30 - - ////////////////////////////////////////// Rho+Pi - mov x30,x1 - ror x1,x6,#64-44 - //mov x27,x2 - ror x2,x12,#64-43 - //mov x26,x3 - ror x3,x25,#64-21 // ? - //mov x28,x4 - ror x4,x24,#64-14 // ? - - ror x6,x9,#64-20 // ? - ror x12,x13,#64-25 // ? - ror x25,x17,#64-15 - ror x24,x21,#64-2 // ? - - ror x9,x22,#64-61 - ror x13,x19,#64-8 - ror x17,x11,#64-10 - ror x21,x8,#64-55 - - ror x22,x14,#64-39 - ror x19,x23,#64-56 - ror x11,x7,#64-6 // ? - ror x8,x16,#64-45 - - ror x14,x20,#64-18 - ror x23,x15,#64-41 - ror x7,x10,#64-3 - ror x16,x5,#64-36 // ? - - ror x5,x26,#64-28 // ? - ror x10,x30,#64-1 - ror x15,x28,#64-27 // ? - ror x20,x27,#64-62 // ? - - ////////////////////////////////////////// Chi+Iota - bic x26,x2,x1 - bic x27,x3,x2 - bic x28,x0,x4 - bic x30,x1,x0 - eor x0,x0,x26 - bic x26,x4,x3 - eor x1,x1,x27 - ldr x27,[sp,#16] - eor x3,x3,x28 - eor x4,x4,x30 - eor x2,x2,x26 - ldr x30,[x27],#8 // Iota[i++] - - bic x26,x7,x6 - tst x27,#255 // are we done? - str x27,[sp,#16] - bic x27,x8,x7 - bic x28,x5,x9 - eor x0,x0,x30 // A[0][0] ^= Iota - bic x30,x6,x5 - eor x5,x5,x26 - bic x26,x9,x8 - eor x6,x6,x27 - eor x8,x8,x28 - eor x9,x9,x30 - eor x7,x7,x26 - - bic x26,x12,x11 - bic x27,x13,x12 - bic x28,x10,x14 - bic x30,x11,x10 - eor x10,x10,x26 - bic x26,x14,x13 - eor x11,x11,x27 - eor x13,x13,x28 - eor x14,x14,x30 - eor x12,x12,x26 - - bic x26,x17,x16 - bic x27,x25,x17 - bic x28,x15,x19 - bic x30,x16,x15 - eor x15,x15,x26 - bic x26,x19,x25 - eor x16,x16,x27 - eor x25,x25,x28 - eor x19,x19,x30 - eor x17,x17,x26 - - bic x26,x22,x21 - bic x27,x23,x22 - bic x28,x20,x24 - bic x30,x21,x20 - eor x20,x20,x26 - bic x26,x24,x23 - eor x21,x21,x27 - eor x23,x23,x28 - eor x24,x24,x30 - eor x22,x22,x26 - - bne .Loop - - ldr x30,[sp,#16+__SIZEOF_POINTER__] - .inst 0xd50323bf // autiasp - ret - .size KeccakF1600_int,.-KeccakF1600_int - - .type KeccakF1600,%function - .align 5 - KeccakF1600: - .inst 0xd503233f // paciasp - stp x29,x30,[sp,#-16*__SIZEOF_POINTER__]! - add x29,sp,#0 - stp x19,x20,[sp,#2*__SIZEOF_POINTER__] - stp x21,x22,[sp,#4*__SIZEOF_POINTER__] - stp x23,x24,[sp,#6*__SIZEOF_POINTER__] - stp x25,x26,[sp,#8*__SIZEOF_POINTER__] - stp x27,x28,[sp,#10*__SIZEOF_POINTER__] - sub sp,sp,#16+4*__SIZEOF_POINTER__ - - str x0,[sp,#16+2*__SIZEOF_POINTER__] // offload argument - mov x26,x0 - ldp x0,x1,[x0,#16*0] - ldp x2,x3,[x26,#16*1] - ldp x4,x5,[x26,#16*2] - ldp x6,x7,[x26,#16*3] - ldp x8,x9,[x26,#16*4] - ldp x10,x11,[x26,#16*5] - ldp x12,x13,[x26,#16*6] - ldp x14,x15,[x26,#16*7] - ldp x16,x17,[x26,#16*8] - ldp x25,x19,[x26,#16*9] - ldp x20,x21,[x26,#16*10] - ldp x22,x23,[x26,#16*11] - ldr x24,[x26,#16*12] - - adr x28,iotas - bl KeccakF1600_int - - ldr x26,[sp,#16+2*__SIZEOF_POINTER__] - stp x0,x1,[x26,#16*0] - stp x2,x3,[x26,#16*1] - stp x4,x5,[x26,#16*2] - stp x6,x7,[x26,#16*3] - stp x8,x9,[x26,#16*4] - stp x10,x11,[x26,#16*5] - stp x12,x13,[x26,#16*6] - stp x14,x15,[x26,#16*7] - stp x16,x17,[x26,#16*8] - stp x25,x19,[x26,#16*9] - stp x20,x21,[x26,#16*10] - stp x22,x23,[x26,#16*11] - str x24,[x26,#16*12] - - ldp x19,x20,[x29,#2*__SIZEOF_POINTER__] - add sp,sp,#16+4*__SIZEOF_POINTER__ - ldp x21,x22,[x29,#4*__SIZEOF_POINTER__] - ldp x23,x24,[x29,#6*__SIZEOF_POINTER__] - ldp x25,x26,[x29,#8*__SIZEOF_POINTER__] - ldp x27,x28,[x29,#10*__SIZEOF_POINTER__] - ldp x29,x30,[sp],#16*__SIZEOF_POINTER__ - .inst 0xd50323bf // autiasp - ret - .size KeccakF1600,.-KeccakF1600 - - .globl SHA3_absorb - .type SHA3_absorb,%function - .align 5 - SHA3_absorb: - .inst 0xd503233f // paciasp - stp x29,x30,[sp,#-16*__SIZEOF_POINTER__]! - add x29,sp,#0 - stp x19,x20,[sp,#2*__SIZEOF_POINTER__] - stp x21,x22,[sp,#4*__SIZEOF_POINTER__] - stp x23,x24,[sp,#6*__SIZEOF_POINTER__] - stp x25,x26,[sp,#8*__SIZEOF_POINTER__] - stp x27,x28,[sp,#10*__SIZEOF_POINTER__] - sub sp,sp,#16+4*__SIZEOF_POINTER__+16 - - stp x0,x1,[sp,#16+2*__SIZEOF_POINTER__] // offload arguments - stp x2,x3,[sp,#16+4*__SIZEOF_POINTER__] - - mov x26,x0 // uint64_t A[5][5] - mov x27,x1 // const void *inp - mov x28,x2 // size_t len - mov x30,x3 // size_t bsz - ldp x0,x1,[x26,#16*0] - ldp x2,x3,[x26,#16*1] - ldp x4,x5,[x26,#16*2] - ldp x6,x7,[x26,#16*3] - ldp x8,x9,[x26,#16*4] - ldp x10,x11,[x26,#16*5] - ldp x12,x13,[x26,#16*6] - ldp x14,x15,[x26,#16*7] - ldp x16,x17,[x26,#16*8] - ldp x25,x19,[x26,#16*9] - ldp x20,x21,[x26,#16*10] - ldp x22,x23,[x26,#16*11] - ldr x24,[x26,#16*12] - b .Loop_absorb - - .align 4 - .Loop_absorb: - subs x26,x28,x30 // len - bsz - blo .Labsorbed - - str x26,[sp,#16+4*__SIZEOF_POINTER__] // save len - bsz - cmp x30,#104 - ldr x26,[x27,#0] // A[0][0] ^= *inp++ - #ifdef __AARCH64EB__ - rev x26,x26 - #endif - eor x0,x0,x26 - ldr x26,[x27,#8] // A[0][1] ^= *inp++ - #ifdef __AARCH64EB__ - rev x26,x26 - #endif - eor x1,x1,x26 - ldr x26,[x27,#16] // A[0][2] ^= *inp++ - #ifdef __AARCH64EB__ - rev x26,x26 - #endif - eor x2,x2,x26 - ldr x26,[x27,#24] // A[0][3] ^= *inp++ - #ifdef __AARCH64EB__ - rev x26,x26 - #endif - eor x3,x3,x26 - ldr x26,[x27,#32] // A[0][4] ^= *inp++ - #ifdef __AARCH64EB__ - rev x26,x26 - #endif - eor x4,x4,x26 - ldr x26,[x27,#40] // A[1][0] ^= *inp++ - #ifdef __AARCH64EB__ - rev x26,x26 - #endif - eor x5,x5,x26 - ldr x26,[x27,#48] // A[1][1] ^= *inp++ - #ifdef __AARCH64EB__ - rev x26,x26 - #endif - eor x6,x6,x26 - ldr x26,[x27,#56] // A[1][2] ^= *inp++ - #ifdef __AARCH64EB__ - rev x26,x26 - #endif - eor x7,x7,x26 - ldr x26,[x27,#64] // A[1][3] ^= *inp++ - #ifdef __AARCH64EB__ - rev x26,x26 - #endif - eor x8,x8,x26 - blo .Lprocess_block - - ldr x26,[x27,#72] // A[1][4] ^= *inp++ - #ifdef __AARCH64EB__ - rev x26,x26 - #endif - eor x9,x9,x26 - ldr x26,[x27,#80] // A[2][0] ^= *inp++ - #ifdef __AARCH64EB__ - rev x26,x26 - #endif - eor x10,x10,x26 - ldr x26,[x27,#88] // A[2][1] ^= *inp++ - #ifdef __AARCH64EB__ - rev x26,x26 - #endif - eor x11,x11,x26 - ldr x26,[x27,#96] // A[2][2] ^= *inp++ - #ifdef __AARCH64EB__ - rev x26,x26 - #endif - eor x12,x12,x26 - beq .Lprocess_block - - cmp x30,#144 - ldr x26,[x27,#104] // A[2][3] ^= *inp++ - #ifdef __AARCH64EB__ - rev x26,x26 - #endif - eor x13,x13,x26 - ldr x26,[x27,#112] // A[2][4] ^= *inp++ - #ifdef __AARCH64EB__ - rev x26,x26 - #endif - eor x14,x14,x26 - ldr x26,[x27,#120] // A[3][0] ^= *inp++ - #ifdef __AARCH64EB__ - rev x26,x26 - #endif - eor x15,x15,x26 - ldr x26,[x27,#128] // A[3][1] ^= *inp++ - #ifdef __AARCH64EB__ - rev x26,x26 - #endif - eor x16,x16,x26 - blo .Lprocess_block - - ldr x26,[x27,#136] // A[3][2] ^= *inp++ - #ifdef __AARCH64EB__ - rev x26,x26 - #endif - eor x17,x17,x26 - beq .Lprocess_block - - ldr x26,[x27,#144] // A[3][3] ^= *inp++ - #ifdef __AARCH64EB__ - rev x26,x26 - #endif - eor x25,x25,x26 - ldr x26,[x27,#152] // A[3][4] ^= *inp++ - #ifdef __AARCH64EB__ - rev x26,x26 - #endif - eor x19,x19,x26 - ldr x26,[x27,#160] // A[4][0] ^= *inp++ - #ifdef __AARCH64EB__ - rev x26,x26 - #endif - eor x20,x20,x26 - - .Lprocess_block: - add x27,x27,x30 - str x27,[sp,#16+3*__SIZEOF_POINTER__] // save inp - - adr x28,iotas - bl KeccakF1600_int - - ldr x27,[sp,#16+3*__SIZEOF_POINTER__] // restore arguments - ldp x28,x30,[sp,#16+4*__SIZEOF_POINTER__] - b .Loop_absorb - - .align 4 - .Labsorbed: - ldr x27,[sp,#16+2*__SIZEOF_POINTER__] - stp x0,x1,[x27,#16*0] - stp x2,x3,[x27,#16*1] - stp x4,x5,[x27,#16*2] - stp x6,x7,[x27,#16*3] - stp x8,x9,[x27,#16*4] - stp x10,x11,[x27,#16*5] - stp x12,x13,[x27,#16*6] - stp x14,x15,[x27,#16*7] - stp x16,x17,[x27,#16*8] - stp x25,x19,[x27,#16*9] - stp x20,x21,[x27,#16*10] - stp x22,x23,[x27,#16*11] - str x24,[x27,#16*12] - - mov x0,x28 // return value - ldp x19,x20,[x29,#2*__SIZEOF_POINTER__] - add sp,sp,#16+4*__SIZEOF_POINTER__+16 - ldp x21,x22,[x29,#4*__SIZEOF_POINTER__] - ldp x23,x24,[x29,#6*__SIZEOF_POINTER__] - ldp x25,x26,[x29,#8*__SIZEOF_POINTER__] - ldp x27,x28,[x29,#10*__SIZEOF_POINTER__] - ldp x29,x30,[sp],#16*__SIZEOF_POINTER__ - .inst 0xd50323bf // autiasp - ret - .size SHA3_absorb,.-SHA3_absorb - .globl SHA3_squeeze - .type SHA3_squeeze,%function - .align 5 - SHA3_squeeze: - .inst 0xd503233f // paciasp - stp x29,x30,[sp,#-6*__SIZEOF_POINTER__]! - add x29,sp,#0 - stp x19,x20,[sp,#2*__SIZEOF_POINTER__] - stp x21,x22,[sp,#4*__SIZEOF_POINTER__] - - mov x19,x0 // put aside arguments - mov x20,x1 - mov x21,x2 - mov x22,x3 - - .Loop_squeeze: - ldr x4,[x0],#8 - cmp x21,#8 - blo .Lsqueeze_tail - #ifdef __AARCH64EB__ - rev x4,x4 - #endif - str x4,[x20],#8 - subs x21,x21,#8 - beq .Lsqueeze_done - - subs x3,x3,#8 - bhi .Loop_squeeze - - mov x0,x19 - bl KeccakF1600 - mov x0,x19 - mov x3,x22 - b .Loop_squeeze - - .align 4 - .Lsqueeze_tail: - strb w4,[x20],#1 - lsr x4,x4,#8 - subs x21,x21,#1 - beq .Lsqueeze_done - strb w4,[x20],#1 - lsr x4,x4,#8 - subs x21,x21,#1 - beq .Lsqueeze_done - strb w4,[x20],#1 - lsr x4,x4,#8 - subs x21,x21,#1 - beq .Lsqueeze_done - strb w4,[x20],#1 - lsr x4,x4,#8 - subs x21,x21,#1 - beq .Lsqueeze_done - strb w4,[x20],#1 - lsr x4,x4,#8 - subs x21,x21,#1 - beq .Lsqueeze_done - strb w4,[x20],#1 - lsr x4,x4,#8 - subs x21,x21,#1 - beq .Lsqueeze_done - strb w4,[x20],#1 - - .Lsqueeze_done: - ldp x19,x20,[sp,#2*__SIZEOF_POINTER__] - ldp x21,x22,[sp,#4*__SIZEOF_POINTER__] - ldp x29,x30,[sp],#6*__SIZEOF_POINTER__ - .inst 0xd50323bf // autiasp - ret - .size SHA3_squeeze,.-SHA3_squeeze - .type KeccakF1600_ce,%function - .align 5 - KeccakF1600_ce: - .Loop_ce: - ////////////////////////////////////////////////// Theta - .inst 0xce0f2a99 //eor3 v25.16b,v20.16b,v15.16b,v10.16b - .inst 0xce102eba //eor3 v26.16b,v21.16b,v16.16b,v11.16b - .inst 0xce1132db //eor3 v27.16b,v22.16b,v17.16b,v12.16b - .inst 0xce1236fc //eor3 v28.16b,v23.16b,v18.16b,v13.16b - .inst 0xce133b1d //eor3 v29.16b,v24.16b,v19.16b,v14.16b - .inst 0xce050339 //eor3 v25.16b,v25.16b, v5.16b,v0.16b - .inst 0xce06075a //eor3 v26.16b,v26.16b, v6.16b,v1.16b - .inst 0xce070b7b //eor3 v27.16b,v27.16b, v7.16b,v2.16b - .inst 0xce080f9c //eor3 v28.16b,v28.16b, v8.16b,v3.16b - .inst 0xce0913bd //eor3 v29.16b,v29.16b, v9.16b,v4.16b - - .inst 0xce7b8f3e //rax1 v30.2d,v25.2d,v27.2d // D[1] - .inst 0xce7c8f5f //rax1 v31.2d,v26.2d,v28.2d // D[2] - .inst 0xce7d8f7b //rax1 v27.2d,v27.2d,v29.2d // D[3] - .inst 0xce798f9c //rax1 v28.2d,v28.2d,v25.2d // D[4] - .inst 0xce7a8fbd //rax1 v29.2d,v29.2d,v26.2d // D[0] - - ////////////////////////////////////////////////// Theta+Rho+Pi - .inst 0xce9efc39 //xar v25.2d, v1.2d,v30.2d,#64-1 // C[0]=A[2][0] - - .inst 0xce9e50c1 //xar v1.2d,v6.2d,v30.2d,#64-44 - .inst 0xce9cb126 //xar v6.2d,v9.2d,v28.2d,#64-20 - .inst 0xce9f0ec9 //xar v9.2d,v22.2d,v31.2d,#64-61 - .inst 0xce9c65d6 //xar v22.2d,v14.2d,v28.2d,#64-39 - .inst 0xce9dba8e //xar v14.2d,v20.2d,v29.2d,#64-18 - - .inst 0xce9f085a //xar v26.2d, v2.2d,v31.2d,#64-62 // C[1]=A[4][0] - - .inst 0xce9f5582 //xar v2.2d,v12.2d,v31.2d,#64-43 - .inst 0xce9b9dac //xar v12.2d,v13.2d,v27.2d,#64-25 - .inst 0xce9ce26d //xar v13.2d,v19.2d,v28.2d,#64-8 - .inst 0xce9b22f3 //xar v19.2d,v23.2d,v27.2d,#64-56 - .inst 0xce9d5df7 //xar v23.2d,v15.2d,v29.2d,#64-41 - - .inst 0xce9c948f //xar v15.2d,v4.2d,v28.2d,#64-27 - - .inst 0xce9ccb1c //xar v28.2d, v24.2d,v28.2d,#64-14 // D[4]=A[0][4] - .inst 0xce9efab8 //xar v24.2d,v21.2d,v30.2d,#64-2 - .inst 0xce9b2508 //xar v8.2d,v8.2d,v27.2d,#64-55 // A[1][3]=A[4][1] - .inst 0xce9e4e04 //xar v4.2d,v16.2d,v30.2d,#64-45 // A[0][4]=A[1][3] - .inst 0xce9d70b0 //xar v16.2d,v5.2d,v29.2d,#64-36 - - .inst 0xce9b9065 //xar v5.2d,v3.2d,v27.2d,#64-28 - - eor v0.16b,v0.16b,v29.16b - - .inst 0xce9bae5b //xar v27.2d, v18.2d,v27.2d,#64-21 // D[3]=A[0][3] - .inst 0xce9fc623 //xar v3.2d,v17.2d,v31.2d,#64-15 // A[0][3]=A[3][3] - .inst 0xce9ed97e //xar v30.2d, v11.2d,v30.2d,#64-10 // D[1]=A[3][2] - .inst 0xce9fe8ff //xar v31.2d, v7.2d,v31.2d,#64-6 // D[2]=A[2][1] - .inst 0xce9df55d //xar v29.2d, v10.2d,v29.2d,#64-3 // D[0]=A[1][2] - - ////////////////////////////////////////////////// Chi+Iota - .inst 0xce362354 //bcax v20.16b,v26.16b, v22.16b,v8.16b // A[1][3]=A[4][1] - .inst 0xce375915 //bcax v21.16b,v8.16b,v23.16b,v22.16b // A[1][3]=A[4][1] - .inst 0xce385ed6 //bcax v22.16b,v22.16b,v24.16b,v23.16b - .inst 0xce3a62f7 //bcax v23.16b,v23.16b,v26.16b, v24.16b - .inst 0xce286b18 //bcax v24.16b,v24.16b,v8.16b,v26.16b // A[1][3]=A[4][1] - - ld1r {{v26.2d}},[x10],#8 - - .inst 0xce330fd1 //bcax v17.16b,v30.16b, v19.16b,v3.16b // A[0][3]=A[3][3] - .inst 0xce2f4c72 //bcax v18.16b,v3.16b,v15.16b,v19.16b // A[0][3]=A[3][3] - .inst 0xce303e73 //bcax v19.16b,v19.16b,v16.16b,v15.16b - .inst 0xce3e41ef //bcax v15.16b,v15.16b,v30.16b, v16.16b - .inst 0xce237a10 //bcax v16.16b,v16.16b,v3.16b,v30.16b // A[0][3]=A[3][3] - - .inst 0xce2c7f2a //bcax v10.16b,v25.16b, v12.16b,v31.16b - .inst 0xce2d33eb //bcax v11.16b,v31.16b, v13.16b,v12.16b - .inst 0xce2e358c //bcax v12.16b,v12.16b,v14.16b,v13.16b - .inst 0xce3939ad //bcax v13.16b,v13.16b,v25.16b, v14.16b - .inst 0xce3f65ce //bcax v14.16b,v14.16b,v31.16b, v25.16b - - .inst 0xce2913a7 //bcax v7.16b,v29.16b, v9.16b,v4.16b // A[0][4]=A[1][3] - .inst 0xce252488 //bcax v8.16b,v4.16b,v5.16b,v9.16b // A[0][4]=A[1][3] - .inst 0xce261529 //bcax v9.16b,v9.16b,v6.16b,v5.16b - .inst 0xce3d18a5 //bcax v5.16b,v5.16b,v29.16b, v6.16b - .inst 0xce2474c6 //bcax v6.16b,v6.16b,v4.16b,v29.16b // A[0][4]=A[1][3] - - .inst 0xce207363 //bcax v3.16b,v27.16b, v0.16b,v28.16b - .inst 0xce210384 //bcax v4.16b,v28.16b, v1.16b,v0.16b - .inst 0xce220400 //bcax v0.16b,v0.16b,v2.16b,v1.16b - .inst 0xce3b0821 //bcax v1.16b,v1.16b,v27.16b, v2.16b - .inst 0xce3c6c42 //bcax v2.16b,v2.16b,v28.16b, v27.16b - - eor v0.16b,v0.16b,v26.16b - - tst x10,#255 - bne .Loop_ce - - ret - .size KeccakF1600_ce,.-KeccakF1600_ce - - .type KeccakF1600_cext,%function - .align 5 - KeccakF1600_cext: - .inst 0xd503233f // paciasp - stp x29,x30,[sp,#-2*__SIZEOF_POINTER__-64]! - add x29,sp,#0 - stp d8,d9,[sp,#2*__SIZEOF_POINTER__+0] // per ABI requirement - stp d10,d11,[sp,#2*__SIZEOF_POINTER__+16] - stp d12,d13,[sp,#2*__SIZEOF_POINTER__+32] - stp d14,d15,[sp,#2*__SIZEOF_POINTER__+48] - ldp d0,d1,[x0,#8*0] - ldp d2,d3,[x0,#8*2] - ldp d4,d5,[x0,#8*4] - ldp d6,d7,[x0,#8*6] - ldp d8,d9,[x0,#8*8] - ldp d10,d11,[x0,#8*10] - ldp d12,d13,[x0,#8*12] - ldp d14,d15,[x0,#8*14] - ldp d16,d17,[x0,#8*16] - ldp d18,d19,[x0,#8*18] - ldp d20,d21,[x0,#8*20] - ldp d22,d23,[x0,#8*22] - ldr d24,[x0,#8*24] - adr x10,iotas - bl KeccakF1600_ce - ldr x30,[sp,#__SIZEOF_POINTER__] - stp d0,d1,[x0,#8*0] - stp d2,d3,[x0,#8*2] - stp d4,d5,[x0,#8*4] - stp d6,d7,[x0,#8*6] - stp d8,d9,[x0,#8*8] - stp d10,d11,[x0,#8*10] - stp d12,d13,[x0,#8*12] - stp d14,d15,[x0,#8*14] - stp d16,d17,[x0,#8*16] - stp d18,d19,[x0,#8*18] - stp d20,d21,[x0,#8*20] - stp d22,d23,[x0,#8*22] - str d24,[x0,#8*24] - - ldp d8,d9,[sp,#2*__SIZEOF_POINTER__+0] - ldp d10,d11,[sp,#2*__SIZEOF_POINTER__+16] - ldp d12,d13,[sp,#2*__SIZEOF_POINTER__+32] - ldp d14,d15,[sp,#2*__SIZEOF_POINTER__+48] - ldr x29,[sp],#2*__SIZEOF_POINTER__+64 - .inst 0xd50323bf // autiasp - ret - .size KeccakF1600_cext,.-KeccakF1600_cext - .globl SHA3_absorb_cext - .type SHA3_absorb_cext,%function - .align 5 - SHA3_absorb_cext: - .inst 0xd503233f // paciasp - stp x29,x30,[sp,#-2*__SIZEOF_POINTER__-64]! - add x29,sp,#0 - stp d8,d9,[sp,#2*__SIZEOF_POINTER__+0] // per ABI requirement - stp d10,d11,[sp,#2*__SIZEOF_POINTER__+16] - stp d12,d13,[sp,#2*__SIZEOF_POINTER__+32] - stp d14,d15,[sp,#2*__SIZEOF_POINTER__+48] - ldp d0,d1,[x0,#8*0] - ldp d2,d3,[x0,#8*2] - ldp d4,d5,[x0,#8*4] - ldp d6,d7,[x0,#8*6] - ldp d8,d9,[x0,#8*8] - ldp d10,d11,[x0,#8*10] - ldp d12,d13,[x0,#8*12] - ldp d14,d15,[x0,#8*14] - ldp d16,d17,[x0,#8*16] - ldp d18,d19,[x0,#8*18] - ldp d20,d21,[x0,#8*20] - ldp d22,d23,[x0,#8*22] - ldr d24,[x0,#8*24] - b .Loop_absorb_ce - - .align 4 - .Loop_absorb_ce: - subs x2,x2,x3 // len - bsz - blo .Labsorbed_ce - - cmp x3,#104 - ld1 {{v27.8b,v28.8b,v29.8b,v30.8b}},[x1],#32 - eor v0.16b,v0.16b,v27.16b - eor v1.16b,v1.16b,v28.16b - eor v2.16b,v2.16b,v29.16b - eor v3.16b,v3.16b,v30.16b - ld1 {{v27.8b,v28.8b,v29.8b,v30.8b}},[x1],#32 - eor v4.16b,v4.16b,v27.16b - eor v5.16b,v5.16b,v28.16b - eor v6.16b,v6.16b,v29.16b - eor v7.16b,v7.16b,v30.16b - ld1 {{v31.8b}},[x1],#8 // A[1][4] ^= *inp++ - eor v8.16b,v8.16b,v31.16b - blo .Lprocess_block_ce - - ld1 {{v27.8b,v28.8b,v29.8b,v30.8b}},[x1],#32 - eor v9.16b,v9.16b,v27.16b - eor v10.16b,v10.16b,v28.16b - eor v11.16b,v11.16b,v29.16b - eor v12.16b,v12.16b,v30.16b - beq .Lprocess_block_ce - - cmp x3,#144 - ld1 {{v27.8b,v28.8b,v29.8b,v30.8b}},[x1],#32 - eor v13.16b,v13.16b,v27.16b - eor v14.16b,v14.16b,v28.16b - eor v15.16b,v15.16b,v29.16b - eor v16.16b,v16.16b,v30.16b - blo .Lprocess_block_ce - - ld1 {{v31.8b}},[x1],#8 // A[3][3] ^= *inp++ - eor v17.16b,v17.16b,v31.16b - beq .Lprocess_block_ce - - ld1 {{v28.8b,v29.8b,v30.8b}},[x1],#24 - eor v18.16b,v18.16b,v28.16b - eor v19.16b,v19.16b,v29.16b - eor v20.16b,v20.16b,v30.16b - - .Lprocess_block_ce: - adr x10,iotas - bl KeccakF1600_ce - - b .Loop_absorb_ce - - .align 4 - .Labsorbed_ce: - stp d0,d1,[x0,#8*0] - stp d2,d3,[x0,#8*2] - stp d4,d5,[x0,#8*4] - stp d6,d7,[x0,#8*6] - stp d8,d9,[x0,#8*8] - stp d10,d11,[x0,#8*10] - stp d12,d13,[x0,#8*12] - stp d14,d15,[x0,#8*14] - stp d16,d17,[x0,#8*16] - stp d18,d19,[x0,#8*18] - stp d20,d21,[x0,#8*20] - stp d22,d23,[x0,#8*22] - str d24,[x0,#8*24] - add x0,x2,x3 // return value - - ldp d8,d9,[sp,#2*__SIZEOF_POINTER__+0] - ldp d10,d11,[sp,#2*__SIZEOF_POINTER__+16] - ldp d12,d13,[sp,#2*__SIZEOF_POINTER__+32] - ldp d14,d15,[sp,#2*__SIZEOF_POINTER__+48] - ldp x29,x30,[sp],#2*__SIZEOF_POINTER__+64 - .inst 0xd50323bf // autiasp - ret - .size SHA3_absorb_cext,.-SHA3_absorb_cext - .globl SHA3_squeeze_cext - .type SHA3_squeeze_cext,%function - .align 5 - SHA3_squeeze_cext: - .inst 0xd503233f // paciasp - stp x29,x30,[sp,#-2*__SIZEOF_POINTER__]! - add x29,sp,#0 - mov x9,x0 - mov x10,x3 - - .Loop_squeeze_ce: - ldr x4,[x9],#8 - cmp x2,#8 - blo .Lsqueeze_tail_ce - #ifdef __AARCH64EB__ - rev x4,x4 - #endif - str x4,[x1],#8 - beq .Lsqueeze_done_ce - - sub x2,x2,#8 - subs x10,x10,#8 - bhi .Loop_squeeze_ce - - bl KeccakF1600_cext - ldr x30,[sp,#__SIZEOF_POINTER__] - mov x9,x0 - mov x10,x3 - b .Loop_squeeze_ce - - .align 4 - .Lsqueeze_tail_ce: - strb w4,[x1],#1 - lsr x4,x4,#8 - subs x2,x2,#1 - beq .Lsqueeze_done_ce - strb w4,[x1],#1 - lsr x4,x4,#8 - subs x2,x2,#1 - beq .Lsqueeze_done_ce - strb w4,[x1],#1 - lsr x4,x4,#8 - subs x2,x2,#1 - beq .Lsqueeze_done_ce - strb w4,[x1],#1 - lsr x4,x4,#8 - subs x2,x2,#1 - beq .Lsqueeze_done_ce - strb w4,[x1],#1 - lsr x4,x4,#8 - subs x2,x2,#1 - beq .Lsqueeze_done_ce - strb w4,[x1],#1 - lsr x4,x4,#8 - subs x2,x2,#1 - beq .Lsqueeze_done_ce - strb w4,[x1],#1 +use std::ptr; + +#[cfg(target_arch = "aarch64")] +use self::aarch64::{SHA3_absorb, SHA3_squeeze}; +#[cfg(target_arch = "x86_64")] +use self::x86_64::{SHA3_absorb, SHA3_squeeze}; + +mod aarch64; +mod x86_64; + +const BLOCK_SIZE: usize = 136; + +#[derive(Default)] +#[repr(transparent)] +struct State([u64; 25]); + +pub fn keccak_hash(data: impl AsRef<[u8]>) -> [u8; 32] { + let mut state = State::default(); + let mut tail_buf = [0; BLOCK_SIZE]; + let mut hash_buf = [0; 32]; + + let data = data.as_ref(); + if !data.is_empty() { + let tail_len = unsafe { SHA3_absorb(&mut state, data.as_ptr(), data.len(), BLOCK_SIZE) }; + if tail_len != 0 { + let tail_ptr = unsafe { data.as_ptr().add(data.len() - tail_len) }; + unsafe { ptr::copy_nonoverlapping(tail_ptr, tail_buf.as_mut_ptr(), tail_len) }; + } + + *unsafe { tail_buf.get_unchecked_mut(tail_len) } = 0x01; + if tail_len == BLOCK_SIZE - 1 { + tail_buf[BLOCK_SIZE - 1] = 0x01; + unsafe { SHA3_absorb(&mut state, tail_buf.as_ptr(), BLOCK_SIZE, BLOCK_SIZE) }; + unsafe { ptr::write_bytes(tail_buf.as_mut_ptr(), 0x00, tail_buf.len()) }; + tail_buf[0] = 0x01; + } + } else { + tail_buf[0] = 0x01; + } + + tail_buf[BLOCK_SIZE - 1] = 0x80; + unsafe { SHA3_absorb(&mut state, tail_buf.as_mut_ptr(), BLOCK_SIZE, BLOCK_SIZE) }; + + unsafe { + SHA3_squeeze( + &mut state, + hash_buf.as_mut_ptr(), + hash_buf.len(), + BLOCK_SIZE, + ); + } + + hash_buf +} - .Lsqueeze_done_ce: - ldr x29,[sp],#2*__SIZEOF_POINTER__ - .inst 0xd50323bf // autiasp - ret - .size SHA3_squeeze_cext,.-SHA3_squeeze_cext - .byte 75,101,99,99,97,107,45,49,54,48,48,32,97,98,115,111,114,98,32,97,110,100,32,115,113,117,101,101,122,101,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,64,100,111,116,45,97,115,109,0 - .align 2 - "# - ) +#[cfg(test)] +mod test { + use super::*; + use std::array; + + #[test] + fn keccak_empty() { + assert_eq!( + keccak_hash(b"") + .into_iter() + .map(|x| format!("{x:02x}")) + .collect::(), + "c5d2460186f7233c927e7db2dcc703c0e500b653ca82273b7bfad8045d85a470", + ); + } + + #[test] + fn keccak_half_block() { + let buf: [u8; BLOCK_SIZE >> 1] = + array::from_fn(|i| (i << 5 & 0xF0 | ((i << 1) + 1) & 0x0F) as u8); + + assert_eq!( + keccak_hash(buf) + .into_iter() + .map(|x| format!("{x:02x}")) + .collect::(), + "337bf14237b641240bd3204e9991c8b96a5349613735ade90a5c2b8806355c11", + ); + } + + #[test] + fn keccak_full_block() { + let buf: [u8; BLOCK_SIZE] = + array::from_fn(|i| (i << 5 & 0xF0 | ((i << 1) + 1) & 0x0F) as u8); + + assert_eq!( + keccak_hash(buf) + .into_iter() + .map(|x| format!("{x:02x}")) + .collect::(), + "3f7424fa94a2f8c5a733b86dac312d85685f9af3dea919694cc6a8abfc075460", + ); + } + + #[test] + fn keccak_almost_full_block() { + let buf: [u8; BLOCK_SIZE - 1] = + array::from_fn(|i| (i << 5 & 0xF0 | ((i << 1) + 1) & 0x0F) as u8); + + assert_eq!( + keccak_hash(buf) + .into_iter() + .map(|x| format!("{x:02x}")) + .collect::(), + "3e4916729e2522af4937548f5848a5b49067eec910a0a6a890b0c71dde08854e", + ); + } } diff --git a/crates/common/crypto/keccak/x86_64.rs b/crates/common/crypto/keccak/x86_64.rs new file mode 100644 index 00000000000..c680ab0371b --- /dev/null +++ b/crates/common/crypto/keccak/x86_64.rs @@ -0,0 +1,12 @@ +#![cfg(target_arch = "x86_64")] + +use super::State; +use std::arch::global_asm; + +global_asm!(include_str!("keccak1600-x86_64.s")); + +unsafe extern "C" { + // unsafe fn KeccakF1600(); + pub unsafe fn SHA3_absorb(state: *mut State, buf: *const u8, len: usize, r: usize) -> usize; + pub unsafe fn SHA3_squeeze(state: *mut State, buf: *mut u8, len: usize, r: usize); +} From 5ee9f132779f657071f6c8ff52e1973032533500 Mon Sep 17 00:00:00 2001 From: Esteve Soler Arderiu Date: Wed, 5 Nov 2025 16:31:20 +0100 Subject: [PATCH 03/40] Fix keccak bug on almost full block. --- crates/common/crypto/keccak/mod.rs | 42 ++++++++++++++---------------- 1 file changed, 20 insertions(+), 22 deletions(-) diff --git a/crates/common/crypto/keccak/mod.rs b/crates/common/crypto/keccak/mod.rs index 5cba0abcb0d..4264e35d26b 100644 --- a/crates/common/crypto/keccak/mod.rs +++ b/crates/common/crypto/keccak/mod.rs @@ -1,5 +1,3 @@ -use std::ptr; - #[cfg(target_arch = "aarch64")] use self::aarch64::{SHA3_absorb, SHA3_squeeze}; #[cfg(target_arch = "x86_64")] @@ -19,29 +17,29 @@ pub fn keccak_hash(data: impl AsRef<[u8]>) -> [u8; 32] { let mut tail_buf = [0; BLOCK_SIZE]; let mut hash_buf = [0; 32]; - let data = data.as_ref(); - if !data.is_empty() { - let tail_len = unsafe { SHA3_absorb(&mut state, data.as_ptr(), data.len(), BLOCK_SIZE) }; - if tail_len != 0 { - let tail_ptr = unsafe { data.as_ptr().add(data.len() - tail_len) }; - unsafe { ptr::copy_nonoverlapping(tail_ptr, tail_buf.as_mut_ptr(), tail_len) }; - } - - *unsafe { tail_buf.get_unchecked_mut(tail_len) } = 0x01; - if tail_len == BLOCK_SIZE - 1 { - tail_buf[BLOCK_SIZE - 1] = 0x01; - unsafe { SHA3_absorb(&mut state, tail_buf.as_ptr(), BLOCK_SIZE, BLOCK_SIZE) }; - unsafe { ptr::write_bytes(tail_buf.as_mut_ptr(), 0x00, tail_buf.len()) }; - tail_buf[0] = 0x01; - } - } else { - tail_buf[0] = 0x01; + let tail_len; + match data.as_ref() { + [] => tail_len = 0, + data if data.len() < BLOCK_SIZE => unsafe { + tail_len = data.len(); + tail_buf.get_unchecked_mut(..tail_len).copy_from_slice(data); + }, + data => unsafe { + tail_len = SHA3_absorb(&mut state, data.as_ptr(), data.len(), BLOCK_SIZE); + if tail_len != 0 { + let tail_data = data.get_unchecked(data.len() - tail_len..); + tail_buf + .get_unchecked_mut(..tail_len) + .copy_from_slice(tail_data); + } + }, } - tail_buf[BLOCK_SIZE - 1] = 0x80; - unsafe { SHA3_absorb(&mut state, tail_buf.as_mut_ptr(), BLOCK_SIZE, BLOCK_SIZE) }; - unsafe { + *tail_buf.get_unchecked_mut(tail_len) = 0x01; + *tail_buf.get_unchecked_mut(BLOCK_SIZE - 1) |= 0x80; + + SHA3_absorb(&mut state, tail_buf.as_ptr(), tail_buf.len(), BLOCK_SIZE); SHA3_squeeze( &mut state, hash_buf.as_mut_ptr(), From fddc65161f5e586dc2b11531c59968da60f80aa0 Mon Sep 17 00:00:00 2001 From: Esteve Soler Arderiu Date: Wed, 5 Nov 2025 17:04:20 +0100 Subject: [PATCH 04/40] Remove unnecessary files. --- crates/common/crypto/keccak/aarch64.rs | 1 - crates/common/crypto/keccak/mod.rs | 13 ++++++++----- crates/common/crypto/keccak/x86_64.rs | 12 ------------ 3 files changed, 8 insertions(+), 18 deletions(-) delete mode 100644 crates/common/crypto/keccak/aarch64.rs delete mode 100644 crates/common/crypto/keccak/x86_64.rs diff --git a/crates/common/crypto/keccak/aarch64.rs b/crates/common/crypto/keccak/aarch64.rs deleted file mode 100644 index fa5a67f162b..00000000000 --- a/crates/common/crypto/keccak/aarch64.rs +++ /dev/null @@ -1 +0,0 @@ -#![cfg(target_arch = "aarch64")] diff --git a/crates/common/crypto/keccak/mod.rs b/crates/common/crypto/keccak/mod.rs index 4264e35d26b..091adc4307a 100644 --- a/crates/common/crypto/keccak/mod.rs +++ b/crates/common/crypto/keccak/mod.rs @@ -1,10 +1,7 @@ #[cfg(target_arch = "aarch64")] -use self::aarch64::{SHA3_absorb, SHA3_squeeze}; +std::arch::global_asm!(include_str!("keccak1600-armv8.s")); #[cfg(target_arch = "x86_64")] -use self::x86_64::{SHA3_absorb, SHA3_squeeze}; - -mod aarch64; -mod x86_64; +std::arch::global_asm!(include_str!("keccak1600-x86_64.s")); const BLOCK_SIZE: usize = 136; @@ -12,6 +9,12 @@ const BLOCK_SIZE: usize = 136; #[repr(transparent)] struct State([u64; 25]); +unsafe extern "C" { + #[link_name = "SHA3_absorb"] + unsafe fn SHA3_absorb(state: *mut State, buf: *const u8, len: usize, r: usize) -> usize; + unsafe fn SHA3_squeeze(state: *mut State, buf: *mut u8, len: usize, r: usize); +} + pub fn keccak_hash(data: impl AsRef<[u8]>) -> [u8; 32] { let mut state = State::default(); let mut tail_buf = [0; BLOCK_SIZE]; diff --git a/crates/common/crypto/keccak/x86_64.rs b/crates/common/crypto/keccak/x86_64.rs deleted file mode 100644 index c680ab0371b..00000000000 --- a/crates/common/crypto/keccak/x86_64.rs +++ /dev/null @@ -1,12 +0,0 @@ -#![cfg(target_arch = "x86_64")] - -use super::State; -use std::arch::global_asm; - -global_asm!(include_str!("keccak1600-x86_64.s")); - -unsafe extern "C" { - // unsafe fn KeccakF1600(); - pub unsafe fn SHA3_absorb(state: *mut State, buf: *const u8, len: usize, r: usize) -> usize; - pub unsafe fn SHA3_squeeze(state: *mut State, buf: *mut u8, len: usize, r: usize); -} From 5b379ed2a13221eee71ed5918c32b33627ffcdf2 Mon Sep 17 00:00:00 2001 From: Mario Rugiero Date: Wed, 5 Nov 2025 13:12:31 -0300 Subject: [PATCH 05/40] fixes to armv8 asm --- .../common/crypto/keccak/keccak1600-armv8.s | 406 +++++++++--------- 1 file changed, 207 insertions(+), 199 deletions(-) diff --git a/crates/common/crypto/keccak/keccak1600-armv8.s b/crates/common/crypto/keccak/keccak1600-armv8.s index 934e6ac9c99..9baac7fab8a 100644 --- a/crates/common/crypto/keccak/keccak1600-armv8.s +++ b/crates/common/crypto/keccak/keccak1600-armv8.s @@ -1,9 +1,17 @@ -.text +// Modified: all instances of curly brackets need to be escaped with +// a second bracket to avoid Rust's `global_asm` trying to interpret +// them as a template substitution. +// Ran `cpp` to substitute constants. +// Commented out ARM assembly annotations (.size, .type) used only +// for debugging purposes and not understood by Rust. +// Removed dots from local labels for correct detection in the frontend. +// TODO: this is probably a matter of selecting the right parameter +// for the translator. .align 8 // strategic alignment and padding that allows to use // address value as loop termination condition... .quad 0,0,0,0,0,0,0,0 -.type iotas,%object +// .type iotas,%object iotas: .quad 0x0000000000000001 .quad 0x0000000000008082 @@ -30,8 +38,8 @@ iotas: .quad 0x8000000000008080 .quad 0x0000000080000001 .quad 0x8000000080008008 -.size iotas,.-iotas -.type KeccakF1600_int,%function +// .size iotas,.-iotas +// .type KeccakF1600_int,%function .align 5 KeccakF1600_int: .inst 0xd503233f // paciasp @@ -197,25 +205,25 @@ KeccakF1600_int: bne .Loop - ldr x30,[sp,#16+__SIZEOF_POINTER__] + ldr x30,[sp,#16+8] .inst 0xd50323bf // autiasp ret -.size KeccakF1600_int,.-KeccakF1600_int +// .size KeccakF1600_int,.-KeccakF1600_int -.type KeccakF1600,%function +// .type KeccakF1600,%function .align 5 KeccakF1600: .inst 0xd503233f // paciasp - stp x29,x30,[sp,#-16*__SIZEOF_POINTER__]! + stp x29,x30,[sp,#-16*8]! add x29,sp,#0 - stp x19,x20,[sp,#2*__SIZEOF_POINTER__] - stp x21,x22,[sp,#4*__SIZEOF_POINTER__] - stp x23,x24,[sp,#6*__SIZEOF_POINTER__] - stp x25,x26,[sp,#8*__SIZEOF_POINTER__] - stp x27,x28,[sp,#10*__SIZEOF_POINTER__] - sub sp,sp,#16+4*__SIZEOF_POINTER__ - - str x0,[sp,#16+2*__SIZEOF_POINTER__] // offload argument + stp x19,x20,[sp,#2*8] + stp x21,x22,[sp,#4*8] + stp x23,x24,[sp,#6*8] + stp x25,x26,[sp,#8*8] + stp x27,x28,[sp,#10*8] + sub sp,sp,#16+4*8 + + str x0,[sp,#16+2*8] // offload argument mov x26,x0 ldp x0,x1,[x0,#16*0] ldp x2,x3,[x26,#16*1] @@ -234,7 +242,7 @@ KeccakF1600: adr x28,iotas bl KeccakF1600_int - ldr x26,[sp,#16+2*__SIZEOF_POINTER__] + ldr x26,[sp,#16+2*8] stp x0,x1,[x26,#16*0] stp x2,x3,[x26,#16*1] stp x4,x5,[x26,#16*2] @@ -249,33 +257,33 @@ KeccakF1600: stp x22,x23,[x26,#16*11] str x24,[x26,#16*12] - ldp x19,x20,[x29,#2*__SIZEOF_POINTER__] - add sp,sp,#16+4*__SIZEOF_POINTER__ - ldp x21,x22,[x29,#4*__SIZEOF_POINTER__] - ldp x23,x24,[x29,#6*__SIZEOF_POINTER__] - ldp x25,x26,[x29,#8*__SIZEOF_POINTER__] - ldp x27,x28,[x29,#10*__SIZEOF_POINTER__] - ldp x29,x30,[sp],#16*__SIZEOF_POINTER__ + ldp x19,x20,[x29,#2*8] + add sp,sp,#16+4*8 + ldp x21,x22,[x29,#4*8] + ldp x23,x24,[x29,#6*8] + ldp x25,x26,[x29,#8*8] + ldp x27,x28,[x29,#10*8] + ldp x29,x30,[sp],#16*8 .inst 0xd50323bf // autiasp ret -.size KeccakF1600,.-KeccakF1600 +// .size KeccakF1600,.-KeccakF1600 .globl SHA3_absorb -.type SHA3_absorb,%function +// .type SHA3_absorb,%function .align 5 SHA3_absorb: .inst 0xd503233f // paciasp - stp x29,x30,[sp,#-16*__SIZEOF_POINTER__]! + stp x29,x30,[sp,#-16*8]! add x29,sp,#0 - stp x19,x20,[sp,#2*__SIZEOF_POINTER__] - stp x21,x22,[sp,#4*__SIZEOF_POINTER__] - stp x23,x24,[sp,#6*__SIZEOF_POINTER__] - stp x25,x26,[sp,#8*__SIZEOF_POINTER__] - stp x27,x28,[sp,#10*__SIZEOF_POINTER__] - sub sp,sp,#16+4*__SIZEOF_POINTER__+16 + stp x19,x20,[sp,#2*8] + stp x21,x22,[sp,#4*8] + stp x23,x24,[sp,#6*8] + stp x25,x26,[sp,#8*8] + stp x27,x28,[sp,#10*8] + sub sp,sp,#16+4*8 +16 - stp x0,x1,[sp,#16+2*__SIZEOF_POINTER__] // offload arguments - stp x2,x3,[sp,#16+4*__SIZEOF_POINTER__] + stp x0,x1,[sp,#16+2*8] // offload arguments + stp x2,x3,[sp,#16+4*8] mov x26,x0 // uint64_t A[5][5] mov x27,x1 // const void *inp @@ -299,139 +307,139 @@ SHA3_absorb: .align 4 .Loop_absorb: subs x26,x28,x30 // len - bsz - blo .Labsorbed + blo Labsorbed - str x26,[sp,#16+4*__SIZEOF_POINTER__] // save len - bsz + str x26,[sp,#16+4*8] // save len - bsz cmp x30,#104 ldr x26,[x27,#0] // A[0][0] ^= *inp++ -#ifdef __AARCH64EB__ - rev x26,x26 -#endif + + + eor x0,x0,x26 ldr x26,[x27,#8] // A[0][1] ^= *inp++ -#ifdef __AARCH64EB__ - rev x26,x26 -#endif + + + eor x1,x1,x26 ldr x26,[x27,#16] // A[0][2] ^= *inp++ -#ifdef __AARCH64EB__ - rev x26,x26 -#endif + + + eor x2,x2,x26 ldr x26,[x27,#24] // A[0][3] ^= *inp++ -#ifdef __AARCH64EB__ - rev x26,x26 -#endif + + + eor x3,x3,x26 ldr x26,[x27,#32] // A[0][4] ^= *inp++ -#ifdef __AARCH64EB__ - rev x26,x26 -#endif + + + eor x4,x4,x26 ldr x26,[x27,#40] // A[1][0] ^= *inp++ -#ifdef __AARCH64EB__ - rev x26,x26 -#endif + + + eor x5,x5,x26 ldr x26,[x27,#48] // A[1][1] ^= *inp++ -#ifdef __AARCH64EB__ - rev x26,x26 -#endif + + + eor x6,x6,x26 ldr x26,[x27,#56] // A[1][2] ^= *inp++ -#ifdef __AARCH64EB__ - rev x26,x26 -#endif + + + eor x7,x7,x26 ldr x26,[x27,#64] // A[1][3] ^= *inp++ -#ifdef __AARCH64EB__ - rev x26,x26 -#endif + + + eor x8,x8,x26 - blo .Lprocess_block + blo Lprocess_block ldr x26,[x27,#72] // A[1][4] ^= *inp++ -#ifdef __AARCH64EB__ - rev x26,x26 -#endif + + + eor x9,x9,x26 ldr x26,[x27,#80] // A[2][0] ^= *inp++ -#ifdef __AARCH64EB__ - rev x26,x26 -#endif + + + eor x10,x10,x26 ldr x26,[x27,#88] // A[2][1] ^= *inp++ -#ifdef __AARCH64EB__ - rev x26,x26 -#endif + + + eor x11,x11,x26 ldr x26,[x27,#96] // A[2][2] ^= *inp++ -#ifdef __AARCH64EB__ - rev x26,x26 -#endif + + + eor x12,x12,x26 - beq .Lprocess_block + beq Lprocess_block cmp x30,#144 ldr x26,[x27,#104] // A[2][3] ^= *inp++ -#ifdef __AARCH64EB__ - rev x26,x26 -#endif + + + eor x13,x13,x26 ldr x26,[x27,#112] // A[2][4] ^= *inp++ -#ifdef __AARCH64EB__ - rev x26,x26 -#endif + + + eor x14,x14,x26 ldr x26,[x27,#120] // A[3][0] ^= *inp++ -#ifdef __AARCH64EB__ - rev x26,x26 -#endif + + + eor x15,x15,x26 ldr x26,[x27,#128] // A[3][1] ^= *inp++ -#ifdef __AARCH64EB__ - rev x26,x26 -#endif + + + eor x16,x16,x26 - blo .Lprocess_block + blo Lprocess_block ldr x26,[x27,#136] // A[3][2] ^= *inp++ -#ifdef __AARCH64EB__ - rev x26,x26 -#endif + + + eor x17,x17,x26 - beq .Lprocess_block + beq Lprocess_block ldr x26,[x27,#144] // A[3][3] ^= *inp++ -#ifdef __AARCH64EB__ - rev x26,x26 -#endif + + + eor x25,x25,x26 ldr x26,[x27,#152] // A[3][4] ^= *inp++ -#ifdef __AARCH64EB__ - rev x26,x26 -#endif + + + eor x19,x19,x26 ldr x26,[x27,#160] // A[4][0] ^= *inp++ -#ifdef __AARCH64EB__ - rev x26,x26 -#endif + + + eor x20,x20,x26 -.Lprocess_block: +Lprocess_block: add x27,x27,x30 - str x27,[sp,#16+3*__SIZEOF_POINTER__] // save inp + str x27,[sp,#16+3*8] // save inp adr x28,iotas bl KeccakF1600_int - ldr x27,[sp,#16+3*__SIZEOF_POINTER__] // restore arguments - ldp x28,x30,[sp,#16+4*__SIZEOF_POINTER__] + ldr x27,[sp,#16+3*8] // restore arguments + ldp x28,x30,[sp,#16+4*8] b .Loop_absorb .align 4 -.Labsorbed: - ldr x27,[sp,#16+2*__SIZEOF_POINTER__] +Labsorbed: + ldr x27,[sp,#16+2*8] stp x0,x1,[x27,#16*0] stp x2,x3,[x27,#16*1] stp x4,x5,[x27,#16*2] @@ -447,25 +455,25 @@ SHA3_absorb: str x24,[x27,#16*12] mov x0,x28 // return value - ldp x19,x20,[x29,#2*__SIZEOF_POINTER__] - add sp,sp,#16+4*__SIZEOF_POINTER__+16 - ldp x21,x22,[x29,#4*__SIZEOF_POINTER__] - ldp x23,x24,[x29,#6*__SIZEOF_POINTER__] - ldp x25,x26,[x29,#8*__SIZEOF_POINTER__] - ldp x27,x28,[x29,#10*__SIZEOF_POINTER__] - ldp x29,x30,[sp],#16*__SIZEOF_POINTER__ + ldp x19,x20,[x29,#2*8] + add sp,sp,#16+4*8 +16 + ldp x21,x22,[x29,#4*8] + ldp x23,x24,[x29,#6*8] + ldp x25,x26,[x29,#8*8] + ldp x27,x28,[x29,#10*8] + ldp x29,x30,[sp],#16*8 .inst 0xd50323bf // autiasp ret -.size SHA3_absorb,.-SHA3_absorb +// .size SHA3_absorb,.-SHA3_absorb .globl SHA3_squeeze -.type SHA3_squeeze,%function +// .type SHA3_squeeze,%function .align 5 SHA3_squeeze: .inst 0xd503233f // paciasp - stp x29,x30,[sp,#-6*__SIZEOF_POINTER__]! + stp x29,x30,[sp,#-6*8]! add x29,sp,#0 - stp x19,x20,[sp,#2*__SIZEOF_POINTER__] - stp x21,x22,[sp,#4*__SIZEOF_POINTER__] + stp x19,x20,[sp,#2*8] + stp x21,x22,[sp,#4*8] mov x19,x0 // put aside arguments mov x20,x1 @@ -475,13 +483,13 @@ SHA3_squeeze: .Loop_squeeze: ldr x4,[x0],#8 cmp x21,#8 - blo .Lsqueeze_tail -#ifdef __AARCH64EB__ - rev x4,x4 -#endif + blo Lsqueeze_tail + + + str x4,[x20],#8 subs x21,x21,#8 - beq .Lsqueeze_done + beq Lsqueeze_done subs x3,x3,#8 bhi .Loop_squeeze @@ -493,41 +501,41 @@ SHA3_squeeze: b .Loop_squeeze .align 4 -.Lsqueeze_tail: +Lsqueeze_tail: strb w4,[x20],#1 lsr x4,x4,#8 subs x21,x21,#1 - beq .Lsqueeze_done + beq Lsqueeze_done strb w4,[x20],#1 lsr x4,x4,#8 subs x21,x21,#1 - beq .Lsqueeze_done + beq Lsqueeze_done strb w4,[x20],#1 lsr x4,x4,#8 subs x21,x21,#1 - beq .Lsqueeze_done + beq Lsqueeze_done strb w4,[x20],#1 lsr x4,x4,#8 subs x21,x21,#1 - beq .Lsqueeze_done + beq Lsqueeze_done strb w4,[x20],#1 lsr x4,x4,#8 subs x21,x21,#1 - beq .Lsqueeze_done + beq Lsqueeze_done strb w4,[x20],#1 lsr x4,x4,#8 subs x21,x21,#1 - beq .Lsqueeze_done + beq Lsqueeze_done strb w4,[x20],#1 -.Lsqueeze_done: - ldp x19,x20,[sp,#2*__SIZEOF_POINTER__] - ldp x21,x22,[sp,#4*__SIZEOF_POINTER__] - ldp x29,x30,[sp],#6*__SIZEOF_POINTER__ +Lsqueeze_done: + ldp x19,x20,[sp,#2*8] + ldp x21,x22,[sp,#4*8] + ldp x29,x30,[sp],#6*8 .inst 0xd50323bf // autiasp ret -.size SHA3_squeeze,.-SHA3_squeeze -.type KeccakF1600_ce,%function +// .size SHA3_squeeze,.-SHA3_squeeze +// .type KeccakF1600_ce,%function .align 5 KeccakF1600_ce: .Loop_ce: @@ -591,7 +599,7 @@ KeccakF1600_ce: .inst 0xce3a62f7 //bcax v23.16b,v23.16b,v26.16b, v24.16b .inst 0xce286b18 //bcax v24.16b,v24.16b,v8.16b,v26.16b // A[1][3]=A[4][1] - ld1r {v26.2d},[x10],#8 + ld1r {{v26.2d}},[x10],#8 .inst 0xce330fd1 //bcax v17.16b,v30.16b, v19.16b,v3.16b // A[0][3]=A[3][3] .inst 0xce2f4c72 //bcax v18.16b,v3.16b,v15.16b,v19.16b // A[0][3]=A[3][3] @@ -623,18 +631,18 @@ KeccakF1600_ce: bne .Loop_ce ret -.size KeccakF1600_ce,.-KeccakF1600_ce +// .size KeccakF1600_ce,.-KeccakF1600_ce -.type KeccakF1600_cext,%function +// .type KeccakF1600_cext,%function .align 5 KeccakF1600_cext: .inst 0xd503233f // paciasp - stp x29,x30,[sp,#-2*__SIZEOF_POINTER__-64]! + stp x29,x30,[sp,#-2*8 -64]! add x29,sp,#0 - stp d8,d9,[sp,#2*__SIZEOF_POINTER__+0] // per ABI requirement - stp d10,d11,[sp,#2*__SIZEOF_POINTER__+16] - stp d12,d13,[sp,#2*__SIZEOF_POINTER__+32] - stp d14,d15,[sp,#2*__SIZEOF_POINTER__+48] + stp d8,d9,[sp,#2*8 +0] // per ABI requirement + stp d10,d11,[sp,#2*8 +16] + stp d12,d13,[sp,#2*8 +32] + stp d14,d15,[sp,#2*8 +48] ldp d0,d1,[x0,#8*0] ldp d2,d3,[x0,#8*2] ldp d4,d5,[x0,#8*4] @@ -650,7 +658,7 @@ KeccakF1600_cext: ldr d24,[x0,#8*24] adr x10,iotas bl KeccakF1600_ce - ldr x30,[sp,#__SIZEOF_POINTER__] + ldr x30,[sp,#8] stp d0,d1,[x0,#8*0] stp d2,d3,[x0,#8*2] stp d4,d5,[x0,#8*4] @@ -665,25 +673,25 @@ KeccakF1600_cext: stp d22,d23,[x0,#8*22] str d24,[x0,#8*24] - ldp d8,d9,[sp,#2*__SIZEOF_POINTER__+0] - ldp d10,d11,[sp,#2*__SIZEOF_POINTER__+16] - ldp d12,d13,[sp,#2*__SIZEOF_POINTER__+32] - ldp d14,d15,[sp,#2*__SIZEOF_POINTER__+48] - ldr x29,[sp],#2*__SIZEOF_POINTER__+64 + ldp d8,d9,[sp,#2*8 +0] + ldp d10,d11,[sp,#2*8 +16] + ldp d12,d13,[sp,#2*8 +32] + ldp d14,d15,[sp,#2*8 +48] + ldr x29,[sp],#2*8 +64 .inst 0xd50323bf // autiasp ret -.size KeccakF1600_cext,.-KeccakF1600_cext +// .size KeccakF1600_cext,.-KeccakF1600_cext .globl SHA3_absorb_cext -.type SHA3_absorb_cext,%function +// .type SHA3_absorb_cext,%function .align 5 SHA3_absorb_cext: .inst 0xd503233f // paciasp - stp x29,x30,[sp,#-2*__SIZEOF_POINTER__-64]! + stp x29,x30,[sp,#-2*8 -64]! add x29,sp,#0 - stp d8,d9,[sp,#2*__SIZEOF_POINTER__+0] // per ABI requirement - stp d10,d11,[sp,#2*__SIZEOF_POINTER__+16] - stp d12,d13,[sp,#2*__SIZEOF_POINTER__+32] - stp d14,d15,[sp,#2*__SIZEOF_POINTER__+48] + stp d8,d9,[sp,#2*8 +0] // per ABI requirement + stp d10,d11,[sp,#2*8 +16] + stp d12,d13,[sp,#2*8 +32] + stp d14,d15,[sp,#2*8 +48] ldp d0,d1,[x0,#8*0] ldp d2,d3,[x0,#8*2] ldp d4,d5,[x0,#8*4] @@ -702,55 +710,55 @@ SHA3_absorb_cext: .align 4 .Loop_absorb_ce: subs x2,x2,x3 // len - bsz - blo .Labsorbed_ce + blo Labsorbed_ce cmp x3,#104 - ld1 {v27.8b,v28.8b,v29.8b,v30.8b},[x1],#32 + ld1 {{v27.8b,v28.8b,v29.8b,v30.8b}},[x1],#32 eor v0.16b,v0.16b,v27.16b eor v1.16b,v1.16b,v28.16b eor v2.16b,v2.16b,v29.16b eor v3.16b,v3.16b,v30.16b - ld1 {v27.8b,v28.8b,v29.8b,v30.8b},[x1],#32 + ld1 {{v27.8b,v28.8b,v29.8b,v30.8b}},[x1],#32 eor v4.16b,v4.16b,v27.16b eor v5.16b,v5.16b,v28.16b eor v6.16b,v6.16b,v29.16b eor v7.16b,v7.16b,v30.16b - ld1 {v31.8b},[x1],#8 // A[1][4] ^= *inp++ + ld1 {{v31.8b}},[x1],#8 // A[1][4] ^= *inp++ eor v8.16b,v8.16b,v31.16b - blo .Lprocess_block_ce + blo Lprocess_block_ce - ld1 {v27.8b,v28.8b,v29.8b,v30.8b},[x1],#32 + ld1 {{v27.8b,v28.8b,v29.8b,v30.8b}},[x1],#32 eor v9.16b,v9.16b,v27.16b eor v10.16b,v10.16b,v28.16b eor v11.16b,v11.16b,v29.16b eor v12.16b,v12.16b,v30.16b - beq .Lprocess_block_ce + beq Lprocess_block_ce cmp x3,#144 - ld1 {v27.8b,v28.8b,v29.8b,v30.8b},[x1],#32 + ld1 {{v27.8b,v28.8b,v29.8b,v30.8b}},[x1],#32 eor v13.16b,v13.16b,v27.16b eor v14.16b,v14.16b,v28.16b eor v15.16b,v15.16b,v29.16b eor v16.16b,v16.16b,v30.16b - blo .Lprocess_block_ce + blo Lprocess_block_ce - ld1 {v31.8b},[x1],#8 // A[3][3] ^= *inp++ + ld1 {{v31.8b}},[x1],#8 // A[3][3] ^= *inp++ eor v17.16b,v17.16b,v31.16b - beq .Lprocess_block_ce + beq Lprocess_block_ce - ld1 {v28.8b,v29.8b,v30.8b},[x1],#24 + ld1 {{v28.8b,v29.8b,v30.8b}},[x1],#24 eor v18.16b,v18.16b,v28.16b eor v19.16b,v19.16b,v29.16b eor v20.16b,v20.16b,v30.16b -.Lprocess_block_ce: +Lprocess_block_ce: adr x10,iotas bl KeccakF1600_ce b .Loop_absorb_ce .align 4 -.Labsorbed_ce: +Labsorbed_ce: stp d0,d1,[x0,#8*0] stp d2,d3,[x0,#8*2] stp d4,d5,[x0,#8*4] @@ -766,20 +774,20 @@ SHA3_absorb_cext: str d24,[x0,#8*24] add x0,x2,x3 // return value - ldp d8,d9,[sp,#2*__SIZEOF_POINTER__+0] - ldp d10,d11,[sp,#2*__SIZEOF_POINTER__+16] - ldp d12,d13,[sp,#2*__SIZEOF_POINTER__+32] - ldp d14,d15,[sp,#2*__SIZEOF_POINTER__+48] - ldp x29,x30,[sp],#2*__SIZEOF_POINTER__+64 + ldp d8,d9,[sp,#2*8 +0] + ldp d10,d11,[sp,#2*8 +16] + ldp d12,d13,[sp,#2*8 +32] + ldp d14,d15,[sp,#2*8 +48] + ldp x29,x30,[sp],#2*8 +64 .inst 0xd50323bf // autiasp ret -.size SHA3_absorb_cext,.-SHA3_absorb_cext +// .size SHA3_absorb_cext,.-SHA3_absorb_cext .globl SHA3_squeeze_cext -.type SHA3_squeeze_cext,%function +// .type SHA3_squeeze_cext,%function .align 5 SHA3_squeeze_cext: .inst 0xd503233f // paciasp - stp x29,x30,[sp,#-2*__SIZEOF_POINTER__]! + stp x29,x30,[sp,#-2*8]! add x29,sp,#0 mov x9,x0 mov x10,x3 @@ -787,55 +795,55 @@ SHA3_squeeze_cext: .Loop_squeeze_ce: ldr x4,[x9],#8 cmp x2,#8 - blo .Lsqueeze_tail_ce -#ifdef __AARCH64EB__ - rev x4,x4 -#endif + blo Lsqueeze_tail_ce + + + str x4,[x1],#8 - beq .Lsqueeze_done_ce + beq Lsqueeze_done_ce sub x2,x2,#8 subs x10,x10,#8 bhi .Loop_squeeze_ce bl KeccakF1600_cext - ldr x30,[sp,#__SIZEOF_POINTER__] + ldr x30,[sp,#8] mov x9,x0 mov x10,x3 b .Loop_squeeze_ce .align 4 -.Lsqueeze_tail_ce: +Lsqueeze_tail_ce: strb w4,[x1],#1 lsr x4,x4,#8 subs x2,x2,#1 - beq .Lsqueeze_done_ce + beq Lsqueeze_done_ce strb w4,[x1],#1 lsr x4,x4,#8 subs x2,x2,#1 - beq .Lsqueeze_done_ce + beq Lsqueeze_done_ce strb w4,[x1],#1 lsr x4,x4,#8 subs x2,x2,#1 - beq .Lsqueeze_done_ce + beq Lsqueeze_done_ce strb w4,[x1],#1 lsr x4,x4,#8 subs x2,x2,#1 - beq .Lsqueeze_done_ce + beq Lsqueeze_done_ce strb w4,[x1],#1 lsr x4,x4,#8 subs x2,x2,#1 - beq .Lsqueeze_done_ce + beq Lsqueeze_done_ce strb w4,[x1],#1 lsr x4,x4,#8 subs x2,x2,#1 - beq .Lsqueeze_done_ce + beq Lsqueeze_done_ce strb w4,[x1],#1 -.Lsqueeze_done_ce: - ldr x29,[sp],#2*__SIZEOF_POINTER__ +Lsqueeze_done_ce: + ldr x29,[sp],#2*8 .inst 0xd50323bf // autiasp ret -.size SHA3_squeeze_cext,.-SHA3_squeeze_cext +// .size SHA3_squeeze_cext,.-SHA3_squeeze_cext .byte 75,101,99,99,97,107,45,49,54,48,48,32,97,98,115,111,114,98,32,97,110,100,32,115,113,117,101,101,122,101,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,64,100,111,116,45,97,115,109,0 .align 2 From 993ff5ad23c789180c0ea79b9136a59766df264d Mon Sep 17 00:00:00 2001 From: Esteve Soler Arderiu Date: Wed, 5 Nov 2025 17:58:39 +0100 Subject: [PATCH 06/40] Fix keccak asm for aarch64 (without SHA3). --- .../common/crypto/keccak/keccak1600-armv8.s | 74 ++++++++++--------- 1 file changed, 41 insertions(+), 33 deletions(-) diff --git a/crates/common/crypto/keccak/keccak1600-armv8.s b/crates/common/crypto/keccak/keccak1600-armv8.s index 9baac7fab8a..b352d764b92 100644 --- a/crates/common/crypto/keccak/keccak1600-armv8.s +++ b/crates/common/crypto/keccak/keccak1600-armv8.s @@ -1,10 +1,14 @@ -// Modified: all instances of curly brackets need to be escaped with -// a second bracket to avoid Rust's `global_asm` trying to interpret -// them as a template substitution. -// Ran `cpp` to substitute constants. -// Commented out ARM assembly annotations (.size, .type) used only -// for debugging purposes and not understood by Rust. -// Removed dots from local labels for correct detection in the frontend. +// Modified: +// - All instances of curly brackets need to be escaped with a second bracket to avoid Rust's +// `global_asm` trying to interpret them as a template substitution. +// - Ran `cpp` to substitute constants. +// - Commented out ARM assembly annotations (.size, .type) used only for debugging purposes and not understood by +// Rust. +// - Removed dots from all local labels for correct detection in the frontend. +// Reason: `.L` local labels are ELF-specific. +// - Replaced instance of `adr x??,label` by `adrp x??,label@PAGE` followed by +// `add x??,x??,label@PAGEOFF`. +// // TODO: this is probably a matter of selecting the right parameter // for the translator. @@ -25,7 +29,7 @@ iotas: .quad 0x0000000000000088 .quad 0x0000000080008009 .quad 0x000000008000000a -.Liotas12: +Liotas12: .quad 0x000000008000808b .quad 0x800000000000008b .quad 0x8000000000008089 @@ -44,9 +48,9 @@ iotas: KeccakF1600_int: .inst 0xd503233f // paciasp stp x28,x30,[sp,#16] // stack is pre-allocated - b .Loop + b Loop .align 4 -.Loop: +Loop: ////////////////////////////////////////// Theta eor x26,x0,x5 stp x4,x9,[sp,#0] // offload pair... @@ -203,7 +207,7 @@ KeccakF1600_int: eor x24,x24,x30 eor x22,x22,x26 - bne .Loop + bne Loop ldr x30,[sp,#16+8] .inst 0xd50323bf // autiasp @@ -239,7 +243,8 @@ KeccakF1600: ldp x22,x23,[x26,#16*11] ldr x24,[x26,#16*12] - adr x28,iotas + adrp x28,iotas@PAGE + add x28,x28,iotas@PAGEOFF bl KeccakF1600_int ldr x26,[sp,#16+2*8] @@ -268,10 +273,10 @@ KeccakF1600: ret // .size KeccakF1600,.-KeccakF1600 -.globl SHA3_absorb +.globl _SHA3_absorb // .type SHA3_absorb,%function .align 5 -SHA3_absorb: +_SHA3_absorb: .inst 0xd503233f // paciasp stp x29,x30,[sp,#-16*8]! add x29,sp,#0 @@ -302,10 +307,10 @@ SHA3_absorb: ldp x20,x21,[x26,#16*10] ldp x22,x23,[x26,#16*11] ldr x24,[x26,#16*12] - b .Loop_absorb + b Loop_absorb .align 4 -.Loop_absorb: +Loop_absorb: subs x26,x28,x30 // len - bsz blo Labsorbed @@ -430,12 +435,13 @@ Lprocess_block: add x27,x27,x30 str x27,[sp,#16+3*8] // save inp - adr x28,iotas + adrp x28,iotas@PAGE + add x28,x28,iotas@PAGEOFF bl KeccakF1600_int ldr x27,[sp,#16+3*8] // restore arguments ldp x28,x30,[sp,#16+4*8] - b .Loop_absorb + b Loop_absorb .align 4 Labsorbed: @@ -465,10 +471,10 @@ Labsorbed: .inst 0xd50323bf // autiasp ret // .size SHA3_absorb,.-SHA3_absorb -.globl SHA3_squeeze +.globl _SHA3_squeeze // .type SHA3_squeeze,%function .align 5 -SHA3_squeeze: +_SHA3_squeeze: .inst 0xd503233f // paciasp stp x29,x30,[sp,#-6*8]! add x29,sp,#0 @@ -480,7 +486,7 @@ SHA3_squeeze: mov x21,x2 mov x22,x3 -.Loop_squeeze: +Loop_squeeze: ldr x4,[x0],#8 cmp x21,#8 blo Lsqueeze_tail @@ -492,13 +498,13 @@ SHA3_squeeze: beq Lsqueeze_done subs x3,x3,#8 - bhi .Loop_squeeze + bhi Loop_squeeze mov x0,x19 bl KeccakF1600 mov x0,x19 mov x3,x22 - b .Loop_squeeze + b Loop_squeeze .align 4 Lsqueeze_tail: @@ -538,7 +544,7 @@ Lsqueeze_done: // .type KeccakF1600_ce,%function .align 5 KeccakF1600_ce: -.Loop_ce: +Loop_ce: ////////////////////////////////////////////////// Theta .inst 0xce0f2a99 //eor3 v25.16b,v20.16b,v15.16b,v10.16b .inst 0xce102eba //eor3 v26.16b,v21.16b,v16.16b,v11.16b @@ -628,7 +634,7 @@ KeccakF1600_ce: eor v0.16b,v0.16b,v26.16b tst x10,#255 - bne .Loop_ce + bne Loop_ce ret // .size KeccakF1600_ce,.-KeccakF1600_ce @@ -656,7 +662,8 @@ KeccakF1600_cext: ldp d20,d21,[x0,#8*20] ldp d22,d23,[x0,#8*22] ldr d24,[x0,#8*24] - adr x10,iotas + adrp x10,iotas@PAGE + add x10,x10,iotas@PAGEOFF bl KeccakF1600_ce ldr x30,[sp,#8] stp d0,d1,[x0,#8*0] @@ -705,10 +712,10 @@ SHA3_absorb_cext: ldp d20,d21,[x0,#8*20] ldp d22,d23,[x0,#8*22] ldr d24,[x0,#8*24] - b .Loop_absorb_ce + b Loop_absorb_ce .align 4 -.Loop_absorb_ce: +Loop_absorb_ce: subs x2,x2,x3 // len - bsz blo Labsorbed_ce @@ -752,10 +759,11 @@ SHA3_absorb_cext: eor v20.16b,v20.16b,v30.16b Lprocess_block_ce: - adr x10,iotas + adrp x10,iotas@PAGE + add x10,x10,iotas@PAGEOFF bl KeccakF1600_ce - b .Loop_absorb_ce + b Loop_absorb_ce .align 4 Labsorbed_ce: @@ -792,7 +800,7 @@ SHA3_squeeze_cext: mov x9,x0 mov x10,x3 -.Loop_squeeze_ce: +Loop_squeeze_ce: ldr x4,[x9],#8 cmp x2,#8 blo Lsqueeze_tail_ce @@ -804,13 +812,13 @@ SHA3_squeeze_cext: sub x2,x2,#8 subs x10,x10,#8 - bhi .Loop_squeeze_ce + bhi Loop_squeeze_ce bl KeccakF1600_cext ldr x30,[sp,#8] mov x9,x0 mov x10,x3 - b .Loop_squeeze_ce + b Loop_squeeze_ce .align 4 Lsqueeze_tail_ce: From ea3e78c6d4c634bde7123aa0d033b6c4e5f7d2b4 Mon Sep 17 00:00:00 2001 From: Esteve Soler Arderiu Date: Wed, 5 Nov 2025 18:21:08 +0100 Subject: [PATCH 07/40] Avoid modifying the original keccak asm for `x86_64`. --- .../common/crypto/keccak/keccak1600-x86_64.s | 1074 ++++++++--------- crates/common/crypto/keccak/mod.rs | 2 +- 2 files changed, 532 insertions(+), 544 deletions(-) diff --git a/crates/common/crypto/keccak/keccak1600-x86_64.s b/crates/common/crypto/keccak/keccak1600-x86_64.s index 3bba6a936ef..d7652991330 100644 --- a/crates/common/crypto/keccak/keccak1600-x86_64.s +++ b/crates/common/crypto/keccak/keccak1600-x86_64.s @@ -1,548 +1,536 @@ - .type __KeccakF1600, @function - .align 0x20 +.text + +.type __KeccakF1600,@function +.align 32 __KeccakF1600: - .cfi_startproc - endbr64 - - mov rax, [rdi + 0x3C] - mov rbx, [rdi + 0x44] - mov rcx, [rdi + 0x4C] - mov rdx, [rdi + 0x54] - mov rbp, [rdi + 0x5C] - jmp .Loop - - .align 0x20 - .Loop: - mov r8, [rdi - 0x64] - mov r9, [rdi - 0x34] - mov r10, [rdi - 0x04] - mov r11, [rdi + 0x2C] - - xor rcx, [rdi - 0x54] - xor rdx, [rdi - 0x4C] - xor rax, r8 - xor rbx, [rdi - 0x5C] - xor rcx, [rdi - 0x2C] - xor rax, [rdi - 0x3C] - mov r12, rbp - xor rbp, [rdi - 0x44] - - xor rcx, r10 - xor rax, [rdi - 0x14] - xor rdx, [rdi - 0x24] - xor rbx, r9 - xor rbp, [rdi - 0x1C] - - xor rcx, [rdi + 0x24] - xor rax, [rdi + 0x14] - xor rdx, [rdi + 0x04] - xor rbx, [rdi - 0x0C] - xor rbp, [rdi + 0x0C] - - mov r13, rcx - rol rcx, 0x01 - xor rcx, rax - xor rdx, r11 - - rol rax, 0x01 - xor rax, rdx - xor rbx, [rdi + 0x1C] - - rol rdx, 0x01 - xor rdx, rbx - xor rbp, [rdi + 0x34] - - rol rbx, 0x01 - xor rbx, rbp - - rol rbp, 0x01 - xor rbp, r13 - xor r9, rcx - xor r10, rdx - rol r9, 0x2C - xor r11, rbp - xor r12, rax - rol r10, 0x2B - xor r8, rbx - mov r13, r9 - rol r11, 0x15 - or r9, r10 - xor r9, r8 - rol r12, 0x0E - - xor r9, [r15] - lea r15, [r15 + 0x08] - - mov r14, r12 - and r12, r11 - mov [rsi - 0x64], r9 - xor r12, r10 - not r10 - mov [rsi - 0x54], r12 - - or r10, r11 - mov r12, [rdi + 0x4C] - xor r10, r13 - mov [rsi - 0x5C], r10 - - and r13, r8 - mov r9, [rdi - 0x1C] - xor r13, r14 - mov r10, [rdi - 0x14] - mov [rsi - 0x44], r13 - - or r14, r8 - mov r8, [rdi - 0x4C] - xor r14, r11 - mov r11, [rdi + 0x1C] - mov [rsi - 0x4C], r14 - - - xor r8, rbp - xor r12, rdx - rol r8, 0x1C - xor r11, rcx - xor r9, rax - rol r12, 0x3D - rol r11, 0x2D - xor r10, rbx - rol r9, 0x14 - mov r13, r8 - or r8, r12 - rol r10, 0x03 - - xor r8, r11 - mov [rsi - 0x24], r8 - - mov r14, r9 - and r9, r13 - mov r8, [rdi - 0x5C] - xor r9, r12 - not r12 - mov [rsi - 0x1C], r9 - - or r12, r11 - mov r9, [rdi - 0x2C] - xor r12, r10 - mov [rsi - 0x2C], r12 - - and r11, r10 - mov r12, [rdi + 0x3C] - xor r11, r14 - mov [rsi - 0x34], r11 - - or r14, r10 - mov r10, [rdi + 0x04] - xor r14, r13 - mov r11, [rdi + 0x34] - mov [rsi - 0x3C], r14 - - - xor r10, rbp - xor r11, rax - rol r10, 0x19 - xor r9, rdx - rol r11, 0x08 - xor r12, rbx - rol r9, 0x06 - xor r8, rcx - rol r12, 0x12 - mov r13, r10 - and r10, r11 - rol r8, 0x01 - - not r11 - xor r10, r9 - mov [rsi - 0x0C], r10 - - mov r14, r12 - and r12, r11 - mov r10, [rdi - 0x0C] - xor r12, r13 - mov [rsi - 0x04], r12 - - or r13, r9 - mov r12, [rdi + 0x54] - xor r13, r8 - mov [rsi - 0x14], r13 - - and r9, r8 - xor r9, r14 - mov [rsi + 0x0C], r9 - - or r14, r8 - mov r9, [rdi - 0x3C] - xor r14, r11 - mov r11, [rdi + 0x24] - mov [rsi + 0x04], r14 - - - mov r8, [rdi - 0x44] - - xor r10, rcx - xor r11, rdx - rol r10, 0x0A - xor r9, rbx - rol r11, 0x0F - xor r12, rbp - rol r9, 0x24 - xor r8, rax - rol r12, 0x38 - mov r13, r10 - or r10, r11 - rol r8, 0x1B - - not r11 - xor r10, r9 - mov [rsi + 0x1C], r10 - - mov r14, r12 - or r12, r11 - xor r12, r13 - mov [rsi + 0x24], r12 - - and r13, r9 - xor r13, r8 - mov [rsi + 0x14], r13 - - or r9, r8 - xor r9, r14 - mov [rsi + 0x34], r9 - - and r8, r14 - xor r8, r11 - mov [rsi + 0x2C], r8 - - - xor rdx, [rdi - 0x54] - xor rbp, [rdi - 0x24] - rol rdx, 0x3E - xor rcx, [rdi + 0x44] - rol rbp, 0x37 - xor rax, [rdi + 0x0C] - rol rcx, 0x02 - xor rbx, [rdi + 0x14] - xchg rdi, rsi - rol rax, 0x27 - rol rbx, 0x29 - mov r13, rdx - and rdx, rbp - not rbp - xor rdx, rcx - mov [rdi + 0x5C], rdx - - mov r14, rax - and rax, rbp - xor rax, r13 - mov [rdi + 0x3C], rax - - or r13, rcx - xor r13, rbx - mov [rdi + 0x54], r13 - - and rcx, rbx - xor rcx, r14 - mov [rdi + 0x4C], rcx - - or rbx, r14 - xor rbx, rbp - mov [rdi + 0x44], rbx - - mov rbp, rdx - mov rdx, r13 - - test r15, 0xFF - jnz .Loop - - lea r15, [r15 - 0xC0] - .byte 0xF3, 0xC3 - .cfi_endproc - .size __KeccakF1600, . - __KeccakF1600 - - .global KeccakF1600 - .type KeccakF1600, @function - .align 0x20 +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq 60(%rdi),%rax + movq 68(%rdi),%rbx + movq 76(%rdi),%rcx + movq 84(%rdi),%rdx + movq 92(%rdi),%rbp + jmp .Loop + +.align 32 +.Loop: + movq -100(%rdi),%r8 + movq -52(%rdi),%r9 + movq -4(%rdi),%r10 + movq 44(%rdi),%r11 + + xorq -84(%rdi),%rcx + xorq -76(%rdi),%rdx + xorq %r8,%rax + xorq -92(%rdi),%rbx + xorq -44(%rdi),%rcx + xorq -60(%rdi),%rax + movq %rbp,%r12 + xorq -68(%rdi),%rbp + + xorq %r10,%rcx + xorq -20(%rdi),%rax + xorq -36(%rdi),%rdx + xorq %r9,%rbx + xorq -28(%rdi),%rbp + + xorq 36(%rdi),%rcx + xorq 20(%rdi),%rax + xorq 4(%rdi),%rdx + xorq -12(%rdi),%rbx + xorq 12(%rdi),%rbp + + movq %rcx,%r13 + rolq $1,%rcx + xorq %rax,%rcx + xorq %r11,%rdx + + rolq $1,%rax + xorq %rdx,%rax + xorq 28(%rdi),%rbx + + rolq $1,%rdx + xorq %rbx,%rdx + xorq 52(%rdi),%rbp + + rolq $1,%rbx + xorq %rbp,%rbx + + rolq $1,%rbp + xorq %r13,%rbp + xorq %rcx,%r9 + xorq %rdx,%r10 + rolq $44,%r9 + xorq %rbp,%r11 + xorq %rax,%r12 + rolq $43,%r10 + xorq %rbx,%r8 + movq %r9,%r13 + rolq $21,%r11 + orq %r10,%r9 + xorq %r8,%r9 + rolq $14,%r12 + + xorq (%r15),%r9 + leaq 8(%r15),%r15 + + movq %r12,%r14 + andq %r11,%r12 + movq %r9,-100(%rsi) + xorq %r10,%r12 + notq %r10 + movq %r12,-84(%rsi) + + orq %r11,%r10 + movq 76(%rdi),%r12 + xorq %r13,%r10 + movq %r10,-92(%rsi) + + andq %r8,%r13 + movq -28(%rdi),%r9 + xorq %r14,%r13 + movq -20(%rdi),%r10 + movq %r13,-68(%rsi) + + orq %r8,%r14 + movq -76(%rdi),%r8 + xorq %r11,%r14 + movq 28(%rdi),%r11 + movq %r14,-76(%rsi) + + + xorq %rbp,%r8 + xorq %rdx,%r12 + rolq $28,%r8 + xorq %rcx,%r11 + xorq %rax,%r9 + rolq $61,%r12 + rolq $45,%r11 + xorq %rbx,%r10 + rolq $20,%r9 + movq %r8,%r13 + orq %r12,%r8 + rolq $3,%r10 + + xorq %r11,%r8 + movq %r8,-36(%rsi) + + movq %r9,%r14 + andq %r13,%r9 + movq -92(%rdi),%r8 + xorq %r12,%r9 + notq %r12 + movq %r9,-28(%rsi) + + orq %r11,%r12 + movq -44(%rdi),%r9 + xorq %r10,%r12 + movq %r12,-44(%rsi) + + andq %r10,%r11 + movq 60(%rdi),%r12 + xorq %r14,%r11 + movq %r11,-52(%rsi) + + orq %r10,%r14 + movq 4(%rdi),%r10 + xorq %r13,%r14 + movq 52(%rdi),%r11 + movq %r14,-60(%rsi) + + + xorq %rbp,%r10 + xorq %rax,%r11 + rolq $25,%r10 + xorq %rdx,%r9 + rolq $8,%r11 + xorq %rbx,%r12 + rolq $6,%r9 + xorq %rcx,%r8 + rolq $18,%r12 + movq %r10,%r13 + andq %r11,%r10 + rolq $1,%r8 + + notq %r11 + xorq %r9,%r10 + movq %r10,-12(%rsi) + + movq %r12,%r14 + andq %r11,%r12 + movq -12(%rdi),%r10 + xorq %r13,%r12 + movq %r12,-4(%rsi) + + orq %r9,%r13 + movq 84(%rdi),%r12 + xorq %r8,%r13 + movq %r13,-20(%rsi) + + andq %r8,%r9 + xorq %r14,%r9 + movq %r9,12(%rsi) + + orq %r8,%r14 + movq -60(%rdi),%r9 + xorq %r11,%r14 + movq 36(%rdi),%r11 + movq %r14,4(%rsi) + + + movq -68(%rdi),%r8 + + xorq %rcx,%r10 + xorq %rdx,%r11 + rolq $10,%r10 + xorq %rbx,%r9 + rolq $15,%r11 + xorq %rbp,%r12 + rolq $36,%r9 + xorq %rax,%r8 + rolq $56,%r12 + movq %r10,%r13 + orq %r11,%r10 + rolq $27,%r8 + + notq %r11 + xorq %r9,%r10 + movq %r10,28(%rsi) + + movq %r12,%r14 + orq %r11,%r12 + xorq %r13,%r12 + movq %r12,36(%rsi) + + andq %r9,%r13 + xorq %r8,%r13 + movq %r13,20(%rsi) + + orq %r8,%r9 + xorq %r14,%r9 + movq %r9,52(%rsi) + + andq %r14,%r8 + xorq %r11,%r8 + movq %r8,44(%rsi) + + + xorq -84(%rdi),%rdx + xorq -36(%rdi),%rbp + rolq $62,%rdx + xorq 68(%rdi),%rcx + rolq $55,%rbp + xorq 12(%rdi),%rax + rolq $2,%rcx + xorq 20(%rdi),%rbx + xchgq %rsi,%rdi + rolq $39,%rax + rolq $41,%rbx + movq %rdx,%r13 + andq %rbp,%rdx + notq %rbp + xorq %rcx,%rdx + movq %rdx,92(%rdi) + + movq %rax,%r14 + andq %rbp,%rax + xorq %r13,%rax + movq %rax,60(%rdi) + + orq %rcx,%r13 + xorq %rbx,%r13 + movq %r13,84(%rdi) + + andq %rbx,%rcx + xorq %r14,%rcx + movq %rcx,76(%rdi) + + orq %r14,%rbx + xorq %rbp,%rbx + movq %rbx,68(%rdi) + + movq %rdx,%rbp + movq %r13,%rdx + + testq $255,%r15 + jnz .Loop + + leaq -192(%r15),%r15 + .byte 0xf3,0xc3 +.cfi_endproc +.size __KeccakF1600,.-__KeccakF1600 + +.globl KeccakF1600 +.type KeccakF1600,@function +.align 32 KeccakF1600: - .cfi_startproc - endbr64 - - - push rbx - .cfi_adjust_cfa_offset 0x08 - .cfi_offset rbx, -0x10 - push rbp - .cfi_adjust_cfa_offset 0x08 - .cfi_offset rbp, -0x18 - push r12 - .cfi_adjust_cfa_offset 0x08 - .cfi_offset r12, -0x20 - push r13 - .cfi_adjust_cfa_offset 0x08 - .cfi_offset r13, -0x28 - push r14 - .cfi_adjust_cfa_offset 0x08 - .cfi_offset r14, -0x30 - push r15 - .cfi_adjust_cfa_offset 0x08 - .cfi_offset r15, -0x38 - - lea rdi, [rdi + 0x64] - sub rsp, 0xC8 - .cfi_adjust_cfa_offset 0xC8 - - - not QWORD PTR [rdi - 0x5C] - not QWORD PTR [rdi - 0x54] - not QWORD PTR [rdi - 0x24] - not QWORD PTR [rdi - 0x04] - not QWORD PTR [rdi + 0x24] - not QWORD PTR [rdi + 0x3C] - - lea r15, [rip + iotas] - lea rsi, [rsp + 0x64] - - call __KeccakF1600 - - not QWORD PTR [rdi - 0x5C] - not QWORD PTR [rdi - 0x54] - not QWORD PTR [rdi - 0x24] - not QWORD PTR [rdi - 0x04] - not QWORD PTR [rdi + 0x24] - not QWORD PTR [rdi + 0x3C] - lea rdi, [rdi - 0x64] - - lea r11, [rsp + 0xF8] - .cfi_def_cfa r11, 0x08 - mov r15, [r11 - 0x30] - mov r14, [r11 - 0x28] - mov r13, [r11 - 0x20] - mov r12, [r11 - 0x18] - mov rbp, [r11 - 0x10] - mov rbx, [r11 - 0x08] - lea rsp, [r11] - .cfi_restore r12 - .cfi_restore r13 - .cfi_restore r14 - .cfi_restore r15 - .cfi_restore rbp - .cfi_restore rbx - .byte 0xF3, 0xC3 - .cfi_endproc - .size KeccakF1600, . - KeccakF1600 - - .global SHA3_absorb - .type SHA3_absorb, @function - .align 0x20 +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + + leaq 100(%rdi),%rdi + subq $200,%rsp +.cfi_adjust_cfa_offset 200 + + + notq -92(%rdi) + notq -84(%rdi) + notq -36(%rdi) + notq -4(%rdi) + notq 36(%rdi) + notq 60(%rdi) + + leaq iotas(%rip),%r15 + leaq 100(%rsp),%rsi + + call __KeccakF1600 + + notq -92(%rdi) + notq -84(%rdi) + notq -36(%rdi) + notq -4(%rdi) + notq 36(%rdi) + notq 60(%rdi) + leaq -100(%rdi),%rdi + + leaq 248(%rsp),%r11 +.cfi_def_cfa %r11,8 + movq -48(%r11),%r15 + movq -40(%r11),%r14 + movq -32(%r11),%r13 + movq -24(%r11),%r12 + movq -16(%r11),%rbp + movq -8(%r11),%rbx + leaq (%r11),%rsp +.cfi_restore %r12 +.cfi_restore %r13 +.cfi_restore %r14 +.cfi_restore %r15 +.cfi_restore %rbp +.cfi_restore %rbx + .byte 0xf3,0xc3 +.cfi_endproc +.size KeccakF1600,.-KeccakF1600 +.globl SHA3_absorb +.type SHA3_absorb,@function +.align 32 SHA3_absorb: - .cfi_startproc - endbr64 - - - push rbx - .cfi_adjust_cfa_offset 0x08 - .cfi_offset rbx, -0x10 - push rbp - .cfi_adjust_cfa_offset 0x08 - .cfi_offset rbp, -0x18 - push r12 - .cfi_adjust_cfa_offset 0x08 - .cfi_offset r12, -0x20 - push r13 - .cfi_adjust_cfa_offset 0x08 - .cfi_offset r13, -0x28 - push r14 - .cfi_adjust_cfa_offset 0x08 - .cfi_offset r14, -0x30 - push r15 - .cfi_adjust_cfa_offset 0x08 - .cfi_offset r15, -0x38 - - lea rdi, [rdi + 0x64] - sub rsp, 0xE8 - .cfi_adjust_cfa_offset 0xE8 - - - mov r9, rsi - lea rsi, [rsp + 0x64] - - not QWORD PTR [rdi - 0x5C] - not QWORD PTR [rdi - 0x54] - not QWORD PTR [rdi - 0x24] - not QWORD PTR [rdi - 0x04] - not QWORD PTR [rdi + 0x24] - not QWORD PTR [rdi + 0x3C] - lea r15, [rip + iotas] - - mov [rsi + 0x74], rcx - - .Loop_absorb: - cmp rdx, rcx - jc .Ldone_absorb - - shr rcx, 0x03 - lea r8, [rdi - 0x64] - - .Lblock_absorb: - mov rax, [r9] - lea r9, [r9 + 0x08] - xor rax, [r8] - lea r8, [r8 + 0x08] - sub rdx, 0x08 - mov [r8 - 0x08], rax - sub rcx, 0x01 - jnz .Lblock_absorb - - mov [rsi + 0x64], r9 - mov [rsi + 0x6C], rdx - call __KeccakF1600 - mov r9, [rsi + 0x64] - mov rdx, [rsi + 0x6C] - mov rcx, [rsi + 0x74] - jmp .Loop_absorb - - .align 0x20 - .Ldone_absorb: - mov rax, rdx - - not QWORD PTR [rdi - 0x5C] - not QWORD PTR [rdi - 0x54] - not QWORD PTR [rdi - 0x24] - not QWORD PTR [rdi - 0x04] - not QWORD PTR [rdi + 0x24] - not QWORD PTR [rdi + 0x3C] - - lea r11, [rsp + 0x0118] - .cfi_def_cfa r11, 0x08 - mov r15, [r11 - 0x30] - mov r14, [r11 - 0x28] - mov r13, [r11 - 0x20] - mov r12, [r11 - 0x18] - mov rbp, [r11 - 0x10] - mov rbx, [r11 - 0x08] - lea rsp, [r11] - .cfi_restore r12 - .cfi_restore r13 - .cfi_restore r14 - .cfi_restore r15 - .cfi_restore rbp - .cfi_restore rbx - .byte 0xF3, 0xC3 - .cfi_endproc - .size SHA3_absorb, . - SHA3_absorb - - .global SHA3_squeeze - .type SHA3_squeeze, @function - .align 0x20 +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + + leaq 100(%rdi),%rdi + subq $232,%rsp +.cfi_adjust_cfa_offset 232 + + + movq %rsi,%r9 + leaq 100(%rsp),%rsi + + notq -92(%rdi) + notq -84(%rdi) + notq -36(%rdi) + notq -4(%rdi) + notq 36(%rdi) + notq 60(%rdi) + leaq iotas(%rip),%r15 + + movq %rcx,216-100(%rsi) + +.Loop_absorb: + cmpq %rcx,%rdx + jc .Ldone_absorb + + shrq $3,%rcx + leaq -100(%rdi),%r8 + +.Lblock_absorb: + movq (%r9),%rax + leaq 8(%r9),%r9 + xorq (%r8),%rax + leaq 8(%r8),%r8 + subq $8,%rdx + movq %rax,-8(%r8) + subq $1,%rcx + jnz .Lblock_absorb + + movq %r9,200-100(%rsi) + movq %rdx,208-100(%rsi) + call __KeccakF1600 + movq 200-100(%rsi),%r9 + movq 208-100(%rsi),%rdx + movq 216-100(%rsi),%rcx + jmp .Loop_absorb + +.align 32 +.Ldone_absorb: + movq %rdx,%rax + + notq -92(%rdi) + notq -84(%rdi) + notq -36(%rdi) + notq -4(%rdi) + notq 36(%rdi) + notq 60(%rdi) + + leaq 280(%rsp),%r11 +.cfi_def_cfa %r11,8 + movq -48(%r11),%r15 + movq -40(%r11),%r14 + movq -32(%r11),%r13 + movq -24(%r11),%r12 + movq -16(%r11),%rbp + movq -8(%r11),%rbx + leaq (%r11),%rsp +.cfi_restore %r12 +.cfi_restore %r13 +.cfi_restore %r14 +.cfi_restore %r15 +.cfi_restore %rbp +.cfi_restore %rbx + .byte 0xf3,0xc3 +.cfi_endproc +.size SHA3_absorb,.-SHA3_absorb +.globl SHA3_squeeze +.type SHA3_squeeze,@function +.align 32 SHA3_squeeze: - .cfi_startproc - endbr64 - - - push r12 - .cfi_adjust_cfa_offset 0x08 - .cfi_offset r12, -0x10 - push r13 - .cfi_adjust_cfa_offset 0x08 - .cfi_offset r13, -0x18 - push r14 - .cfi_adjust_cfa_offset 0x08 - .cfi_offset r14, -0x20 - sub rsp, 0x20 - .cfi_adjust_cfa_offset 0x20 - - - shr rcx, 0x03 - mov r8, rdi - mov r12, rsi - mov r13, rdx - mov r14, rcx - jmp .Loop_squeeze - - .align 0x20 - .Loop_squeeze: - cmp r13, 0x08 - jb .Ltail_squeeze - - mov rax, [r8] - lea r8, [r8 + 0x08] - mov [r12], rax - lea r12, [r12 + 0x08] - sub r13, 0x08 - jz .Ldone_squeeze - - sub rcx, 0x01 - jnz .Loop_squeeze - - mov rcx, rdi - call KeccakF1600 - mov r8, rdi - mov rcx, r14 - jmp .Loop_squeeze - - .Ltail_squeeze: - mov rsi, r8 - mov rdi, r12 - mov rcx, r13 - .byte 0xF3, 0xA4 - - .Ldone_squeeze: - mov r14, [rsp + 0x20] - mov r13, [rsp + 0x28] - mov r12, [rsp + 0x30] - add rsp, 0x38 - .cfi_adjust_cfa_offset -0x38 - .cfi_restore r12 - .cfi_restore r13 - .cfi_restore r14 - .byte 0xF3, 0xC3 - .cfi_endproc - .size SHA3_squeeze, . - SHA3_squeeze - - - .align 0x0100 - .quad 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 - .type iotas, @object +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-16 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-24 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-32 + subq $32,%rsp +.cfi_adjust_cfa_offset 32 + + + shrq $3,%rcx + movq %rdi,%r8 + movq %rsi,%r12 + movq %rdx,%r13 + movq %rcx,%r14 + jmp .Loop_squeeze + +.align 32 +.Loop_squeeze: + cmpq $8,%r13 + jb .Ltail_squeeze + + movq (%r8),%rax + leaq 8(%r8),%r8 + movq %rax,(%r12) + leaq 8(%r12),%r12 + subq $8,%r13 + jz .Ldone_squeeze + + subq $1,%rcx + jnz .Loop_squeeze + + movq %rdi,%rcx + call KeccakF1600 + movq %rdi,%r8 + movq %r14,%rcx + jmp .Loop_squeeze + +.Ltail_squeeze: + movq %r8,%rsi + movq %r12,%rdi + movq %r13,%rcx +.byte 0xf3,0xa4 + +.Ldone_squeeze: + movq 32(%rsp),%r14 + movq 40(%rsp),%r13 + movq 48(%rsp),%r12 + addq $56,%rsp +.cfi_adjust_cfa_offset -56 +.cfi_restore %r12 +.cfi_restore %r13 +.cfi_restore %r14 + .byte 0xf3,0xc3 +.cfi_endproc +.size SHA3_squeeze,.-SHA3_squeeze +.align 256 +.quad 0,0,0,0,0,0,0,0 +.type iotas,@object iotas: - .quad 0x0000000000000001 - .quad 0x0000000000008082 - .quad 0x800000000000808A - .quad 0x8000000080008000 - .quad 0x000000000000808B - .quad 0x0000000080000001 - .quad 0x8000000080008081 - .quad 0x8000000000008009 - .quad 0x000000000000008A - .quad 0x0000000000000088 - .quad 0x0000000080008009 - .quad 0x000000008000000A - .quad 0x000000008000808B - .quad 0x800000000000008B - .quad 0x8000000000008089 - .quad 0x8000000000008003 - .quad 0x8000000000008002 - .quad 0x8000000000000080 - .quad 0x000000000000800A - .quad 0x800000008000000A - .quad 0x8000000080008081 - .quad 0x8000000000008080 - .quad 0x0000000080000001 - .quad 0x8000000080008008 - .size iotas, . - iotas - .byte 0x4B, 0x65, 0x63, 0x63, 0x61, 0x6B, 0x2D, 0x31 - .byte 0x36, 0x30, 0x30, 0x20, 0x61, 0x62, 0x73, 0x6F - .byte 0x72, 0x62, 0x20, 0x61, 0x6E, 0x64, 0x20, 0x73 - .byte 0x71, 0x75, 0x65, 0x65, 0x7A, 0x65, 0x20, 0x66 - .byte 0x6F, 0x72, 0x20, 0x78, 0x38, 0x36, 0x5F, 0x36 - .byte 0x34, 0x2C, 0x20, 0x43, 0x52, 0x59, 0x50, 0x54 - .byte 0x4F, 0x47, 0x41, 0x4D, 0x53, 0x20, 0x62, 0x79 - .byte 0x20, 0x3C, 0x61, 0x70, 0x70, 0x72, 0x6F, 0x40 - .byte 0x6F, 0x70, 0x65, 0x6E, 0x73, 0x73, 0x6C, 0x2E - .byte 0x6F, 0x72, 0x67, 0x3E, 0x00 - - .section .note.gnu.property, "a", @note - .long 4, 2f-1f, 5 - .byte 0x47, 0x4E, 0x55, 0x00 - 1: - .long 0xC0000002, 0x04, 0x03 - .align 0x08 - 2: +.quad 0x0000000000000001 +.quad 0x0000000000008082 +.quad 0x800000000000808a +.quad 0x8000000080008000 +.quad 0x000000000000808b +.quad 0x0000000080000001 +.quad 0x8000000080008081 +.quad 0x8000000000008009 +.quad 0x000000000000008a +.quad 0x0000000000000088 +.quad 0x0000000080008009 +.quad 0x000000008000000a +.quad 0x000000008000808b +.quad 0x800000000000008b +.quad 0x8000000000008089 +.quad 0x8000000000008003 +.quad 0x8000000000008002 +.quad 0x8000000000000080 +.quad 0x000000000000800a +.quad 0x800000008000000a +.quad 0x8000000080008081 +.quad 0x8000000000008080 +.quad 0x0000000080000001 +.quad 0x8000000080008008 +.size iotas,.-iotas +.byte 75,101,99,99,97,107,45,49,54,48,48,32,97,98,115,111,114,98,32,97,110,100,32,115,113,117,101,101,122,101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 + +.section .note.gnu.property,"a",@note + .long 4,2f-1f,5 + .byte 0x47,0x4E,0x55,0 +1: .long 0xc0000002,4,3 +.align 8 +2: diff --git a/crates/common/crypto/keccak/mod.rs b/crates/common/crypto/keccak/mod.rs index 091adc4307a..b08c9b8c799 100644 --- a/crates/common/crypto/keccak/mod.rs +++ b/crates/common/crypto/keccak/mod.rs @@ -1,7 +1,7 @@ #[cfg(target_arch = "aarch64")] std::arch::global_asm!(include_str!("keccak1600-armv8.s")); #[cfg(target_arch = "x86_64")] -std::arch::global_asm!(include_str!("keccak1600-x86_64.s")); +std::arch::global_asm!(include_str!("keccak1600-x86_64.s"), options(att_syntax)); const BLOCK_SIZE: usize = 136; From 20e78e559ac1eab3659311517c5a214c373ef5bd Mon Sep 17 00:00:00 2001 From: Mario Rugiero Date: Wed, 5 Nov 2025 18:20:57 -0300 Subject: [PATCH 08/40] replace usage of sha3 wherever possible and simplify some of the code --- Cargo.lock | 15 ++- Cargo.toml | 1 - crates/blockchain/Cargo.toml | 2 +- crates/blockchain/payload.rs | 20 ++-- crates/common/Cargo.toml | 1 - crates/common/constants.rs | 5 +- crates/common/evm.rs | 6 +- crates/common/trie/Cargo.toml | 2 +- crates/common/trie/node_hash.rs | 6 +- crates/common/trie/trie.rs | 6 +- crates/common/trie/verify_range.rs | 4 +- crates/common/types/account.rs | 7 +- .../common/types/block_execution_witness.rs | 10 +- crates/common/types/genesis.rs | 4 +- crates/common/types/transaction.rs | 10 +- crates/common/utils.rs | 29 +++++- crates/l2/common/Cargo.toml | 2 +- crates/networking/p2p/Cargo.toml | 3 +- crates/networking/p2p/discv4/messages.rs | 17 ++-- .../networking/p2p/rlpx/connection/codec.rs | 98 ++++--------------- .../p2p/rlpx/connection/handshake.rs | 6 +- crates/networking/p2p/rlpx/utils.rs | 6 +- crates/networking/p2p/types.rs | 7 +- crates/networking/rpc/Cargo.toml | 2 +- crates/storage/Cargo.toml | 2 +- crates/storage/store.rs | 29 ++---- crates/vm/Cargo.toml | 2 +- crates/vm/levm/Cargo.toml | 1 - .../vm/levm/bench/revm_comparison/Cargo.toml | 3 +- crates/vm/levm/src/opcode_handlers/keccak.rs | 7 +- crates/vm/levm/src/precompiles.rs | 7 +- crates/vm/levm/src/utils.rs | 24 ++--- tooling/Cargo.toml | 2 +- 33 files changed, 134 insertions(+), 212 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 06f830f89e9..360ed37979f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3345,6 +3345,7 @@ version = "5.0.0" dependencies = [ "bytes", "ethrex-common", + "ethrex-crypto", "ethrex-metrics", "ethrex-rlp", "ethrex-storage", @@ -3354,7 +3355,6 @@ dependencies = [ "rustc-hash 2.1.1", "secp256k1", "serde_json", - "sha3", "thiserror 2.0.17", "tokio", "tokio-util", @@ -3384,7 +3384,6 @@ dependencies = [ "serde", "serde_json", "sha2", - "sha3", "thiserror 2.0.17", "tinyvec", "tracing", @@ -3495,6 +3494,7 @@ dependencies = [ "bytes", "ethereum-types 0.15.1", "ethrex-common", + "ethrex-crypto", "ethrex-rlp", "ethrex-storage", "ethrex-trie", @@ -3505,7 +3505,6 @@ dependencies = [ "secp256k1", "serde", "serde_with", - "sha3", "thiserror 2.0.17", ] @@ -3566,7 +3565,6 @@ dependencies = [ "serde", "serde_json", "sha2", - "sha3", "spinoff", "strum 0.27.2", "substrate-bn", @@ -3602,6 +3600,7 @@ dependencies = [ "ethereum-types 0.15.1", "ethrex-blockchain", "ethrex-common", + "ethrex-crypto", "ethrex-rlp", "ethrex-storage", "ethrex-storage-rollup", @@ -3693,6 +3692,7 @@ dependencies = [ "ethereum-types 0.15.1", "ethrex-blockchain", "ethrex-common", + "ethrex-crypto", "ethrex-l2-common", "ethrex-p2p", "ethrex-rlp", @@ -3710,7 +3710,6 @@ dependencies = [ "serde", "serde_json", "sha2", - "sha3", "thiserror 2.0.17", "tokio", "tokio-util", @@ -3763,6 +3762,7 @@ dependencies = [ "bytes", "ethereum-types 0.15.1", "ethrex-common", + "ethrex-crypto", "ethrex-rlp", "ethrex-trie", "hex", @@ -3773,7 +3773,6 @@ dependencies = [ "rustc-hash 2.1.1", "serde", "serde_json", - "sha3", "tempfile", "thiserror 2.0.17", "tokio", @@ -3819,6 +3818,7 @@ dependencies = [ "crossbeam 0.8.4", "digest 0.10.7", "ethereum-types 0.15.1", + "ethrex-crypto", "ethrex-rlp", "ethrex-threadpool", "hasher", @@ -3830,7 +3830,6 @@ dependencies = [ "rocksdb", "serde", "serde_json", - "sha3", "smallvec", "tempfile", "thiserror 2.0.17", @@ -3847,6 +3846,7 @@ dependencies = [ "dyn-clone", "ethereum-types 0.15.1", "ethrex-common", + "ethrex-crypto", "ethrex-levm", "ethrex-rlp", "ethrex-trie", @@ -3854,7 +3854,6 @@ dependencies = [ "lazy_static", "rkyv", "serde", - "sha3", "thiserror 2.0.17", "tracing", ] diff --git a/Cargo.toml b/Cargo.toml index 0a00799c305..b659614eae5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -85,7 +85,6 @@ hex-literal = "0.4.1" crc32fast = "1.4.2" lazy_static = "1.5.0" sha2 = "0.10.8" -sha3 = "0.10.8" tokio-util = { version = "0.7.15", features = ["rt"] } jsonwebtoken = "9.3.0" rand = "0.8.5" diff --git a/crates/blockchain/Cargo.toml b/crates/blockchain/Cargo.toml index 2e859496882..9ade036cd49 100644 --- a/crates/blockchain/Cargo.toml +++ b/crates/blockchain/Cargo.toml @@ -10,13 +10,13 @@ documentation.workspace = true [dependencies] ethrex-rlp.workspace = true ethrex-common.workspace = true +ethrex-crypto.workspace = true ethrex-storage.workspace = true ethrex-trie.workspace = true ethrex-vm.workspace = true secp256k1.workspace = true thiserror.workspace = true -sha3.workspace = true tracing.workspace = true bytes.workspace = true hex.workspace = true diff --git a/crates/blockchain/payload.rs b/crates/blockchain/payload.rs index 40d0b3f7213..c5fd3900177 100644 --- a/crates/blockchain/payload.rs +++ b/crates/blockchain/payload.rs @@ -18,13 +18,12 @@ use ethrex_common::{ }, }; +use ethrex_crypto::keccak::keccak_hash; use ethrex_vm::{Evm, EvmError}; use ethrex_rlp::encode::RLPEncode; use ethrex_storage::{Store, error::StoreError}; -use sha3::{Digest, Keccak256}; - use ethrex_metrics::metrics; #[cfg(feature = "metrics")] @@ -99,18 +98,19 @@ pub enum BuildPayloadArgsError { impl BuildPayloadArgs { /// Computes an 8-byte identifier by hashing the components of the payload arguments. pub fn id(&self) -> Result { - let mut hasher = Keccak256::new(); - hasher.update(self.parent); - hasher.update(self.timestamp.to_be_bytes()); - hasher.update(self.random); - hasher.update(self.fee_recipient); + let mut serialized = Vec::with_capacity(1024); + serialized.extend_from_slice(self.parent.as_bytes()); + serialized.extend_from_slice(&self.timestamp.to_be_bytes()); + serialized.extend_from_slice(self.random.as_bytes()); + serialized.extend_from_slice(self.fee_recipient.as_bytes()); if let Some(withdrawals) = &self.withdrawals { - hasher.update(withdrawals.encode_to_vec()); + withdrawals.encode(&mut serialized); } if let Some(beacon_root) = self.beacon_root { - hasher.update(beacon_root); + serialized.extend_from_slice(beacon_root.as_bytes()); } - let res = &mut hasher.finalize()[..8]; + let mut hashed = keccak_hash(serialized); + let res = &mut hashed[..8]; res[0] = self.version; Ok(u64::from_be_bytes(res.try_into().map_err(|_| { BuildPayloadArgsError::FailedToConvertPayload diff --git a/crates/common/Cargo.toml b/crates/common/Cargo.toml index b333a592d46..4854e6e774e 100644 --- a/crates/common/Cargo.toml +++ b/crates/common/Cargo.toml @@ -20,7 +20,6 @@ serde_json.workspace = true thiserror.workspace = true sha2.workspace = true kzg-rs.workspace = true -sha3.workspace = true secp256k1.workspace = true once_cell = "1.20.2" libc = "0.2" diff --git a/crates/common/constants.rs b/crates/common/constants.rs index 726ab7dd0ce..a163aa07e3b 100644 --- a/crates/common/constants.rs +++ b/crates/common/constants.rs @@ -1,6 +1,6 @@ use crate::H256; +use ethrex_crypto::keccak::keccak_hash; use ethrex_rlp::constants::RLP_NULL; -use sha3::{Digest as _, Keccak256}; use std::{str::FromStr, sync::LazyLock}; // = Keccak256(RLP([])) as of EIP-3675 @@ -35,8 +35,7 @@ pub static EMPTY_KECCACK_HASH: LazyLock = LazyLock::new(|| { ) }); -pub static EMPTY_TRIE_HASH: LazyLock = - LazyLock::new(|| H256::from_slice(&Keccak256::digest([RLP_NULL]))); +pub static EMPTY_TRIE_HASH: LazyLock = LazyLock::new(|| H256(keccak_hash([RLP_NULL]))); // Request related pub static DEPOSIT_TOPIC: LazyLock = LazyLock::new(|| { diff --git a/crates/common/evm.rs b/crates/common/evm.rs index 3bb5dc6b4c2..cda6bbd46ac 100644 --- a/crates/common/evm.rs +++ b/crates/common/evm.rs @@ -1,6 +1,6 @@ use ethereum_types::Address; +use ethrex_crypto::keccak::keccak_hash; use ethrex_rlp::encode::RLPEncode; -use sha3::{Digest, Keccak256}; /// Calculates the address of a new conctract using the CREATE /// opcode as follows: @@ -9,7 +9,5 @@ use sha3::{Digest, Keccak256}; pub fn calculate_create_address(sender_address: Address, sender_nonce: u64) -> Address { let mut encoded = Vec::new(); (sender_address, sender_nonce).encode(&mut encoded); - let mut hasher = Keccak256::new(); - hasher.update(encoded); - Address::from_slice(&hasher.finalize()[12..]) + Address::from_slice(&keccak_hash(encoded)[12..]) } diff --git a/crates/common/trie/Cargo.toml b/crates/common/trie/Cargo.toml index 8c9c95b4912..0c0e4c69f4c 100644 --- a/crates/common/trie/Cargo.toml +++ b/crates/common/trie/Cargo.toml @@ -6,6 +6,7 @@ authors.workspace = true documentation.workspace = true [dependencies] +ethrex-crypto.workspace = true ethrex-rlp.workspace = true ethrex-threadpool.workspace = true @@ -14,7 +15,6 @@ anyhow = "1.0.86" bytes.workspace = true tracing.workspace = true thiserror.workspace = true -sha3.workspace = true hex.workspace = true serde.workspace = true serde_json = "1.0.117" diff --git a/crates/common/trie/node_hash.rs b/crates/common/trie/node_hash.rs index f7743bdf205..5d229cb0a33 100644 --- a/crates/common/trie/node_hash.rs +++ b/crates/common/trie/node_hash.rs @@ -1,6 +1,6 @@ use ethereum_types::H256; +use ethrex_crypto::keccak::keccak_hash; use ethrex_rlp::{decode::RLPDecode, encode::RLPEncode, error::RLPDecodeError, structs::Encoder}; -use sha3::{Digest, Keccak256}; /// Struct representing a trie node hash /// If the encoded node is less than 32 bits, contains the encoded node itself @@ -26,7 +26,7 @@ impl NodeHash { /// Returns the `NodeHash` of an encoded node (encoded using the NodeEncoder) pub fn from_encoded(encoded: &[u8]) -> NodeHash { if encoded.len() >= 32 { - let hash = Keccak256::new_with_prefix(encoded).finalize(); + let hash = keccak_hash(encoded); NodeHash::Hashed(H256::from_slice(&hash)) } else { NodeHash::from_slice(encoded) @@ -51,7 +51,7 @@ impl NodeHash { /// NOTE: This will hash smaller nodes, only use to get the final root hash, not for intermediate node hashes pub fn finalize(self) -> H256 { match self { - NodeHash::Inline(_) => H256::from_slice(&Keccak256::digest(self.as_ref())), + NodeHash::Inline(_) => H256(keccak_hash(self.as_ref())), NodeHash::Hashed(x) => x, } } diff --git a/crates/common/trie/trie.rs b/crates/common/trie/trie.rs index 4102fdbe43b..8e24ca36b53 100644 --- a/crates/common/trie/trie.rs +++ b/crates/common/trie/trie.rs @@ -11,9 +11,9 @@ mod trie_iter; pub mod trie_sorted; mod verify_range; use ethereum_types::H256; +use ethrex_crypto::keccak::keccak_hash; use ethrex_rlp::constants::RLP_NULL; use ethrex_rlp::encode::RLPEncode; -use sha3::{Digest, Keccak256}; use std::collections::{BTreeMap, HashSet}; use std::sync::{Arc, Mutex}; @@ -34,8 +34,8 @@ use lazy_static::lazy_static; lazy_static! { // Hash value for an empty trie, equal to keccak(RLP_NULL) - pub static ref EMPTY_TRIE_HASH: H256 = H256::from_slice( - &Keccak256::digest([RLP_NULL]), + pub static ref EMPTY_TRIE_HASH: H256 = H256( + keccak_hash([RLP_NULL]), ); } diff --git a/crates/common/trie/verify_range.rs b/crates/common/trie/verify_range.rs index ac81a5ad124..1cf01d18697 100644 --- a/crates/common/trie/verify_range.rs +++ b/crates/common/trie/verify_range.rs @@ -1,8 +1,8 @@ use std::collections::{BTreeMap, VecDeque}; use ethereum_types::H256; +use ethrex_crypto::keccak::keccak_hash; use ethrex_rlp::decode::RLPDecode; -use sha3::{Digest, Keccak256}; use crate::{ ProofTrie, Trie, TrieError, ValueRLP, @@ -143,7 +143,7 @@ impl<'a> From<&'a [Vec]> for RangeProof<'a> { let node_refs = proof .iter() .map(|node| { - let hash = H256::from_slice(&Keccak256::digest(node)); + let hash = H256(keccak_hash(node)); let encoded_data = node.as_slice(); (hash, encoded_data) }) diff --git a/crates/common/types/account.rs b/crates/common/types/account.rs index 228ada9ebaf..2918ce54612 100644 --- a/crates/common/types/account.rs +++ b/crates/common/types/account.rs @@ -2,10 +2,10 @@ use std::collections::HashMap; use bytes::Bytes; use ethereum_types::{H256, U256}; +use ethrex_crypto::keccak::keccak_hash; use ethrex_trie::Trie; use rustc_hash::FxHashMap; use serde::{Deserialize, Serialize}; -use sha3::{Digest as _, Keccak256}; use ethrex_rlp::{ decode::RLPDecode, @@ -200,10 +200,7 @@ impl RLPDecode for AccountState { pub fn compute_storage_root(storage: &HashMap) -> H256 { let iter = storage.iter().filter_map(|(k, v)| { - (!v.is_zero()).then_some(( - Keccak256::digest(k.to_big_endian()).to_vec(), - v.encode_to_vec(), - )) + (!v.is_zero()).then_some((keccak_hash(k.to_big_endian()).to_vec(), v.encode_to_vec())) }); Trie::compute_hash_from_unsorted_iter(iter) } diff --git a/crates/common/types/block_execution_witness.rs b/crates/common/types/block_execution_witness.rs index 3f950a097f0..0feec959f81 100644 --- a/crates/common/types/block_execution_witness.rs +++ b/crates/common/types/block_execution_witness.rs @@ -11,13 +11,13 @@ use crate::{ }; use bytes::Bytes; use ethereum_types::{Address, H256, U256}; +use ethrex_crypto::keccak::keccak_hash; use ethrex_rlp::{decode::RLPDecode, encode::RLPEncode}; use ethrex_trie::{EMPTY_TRIE_HASH, NodeRLP, Trie}; use rkyv::{Archive, Deserialize as RDeserialize, Serialize as RSerialize}; use serde::de::{SeqAccess, Visitor}; use serde::ser::SerializeSeq; use serde::{Deserialize, Deserializer, Serialize, Serializer, de}; -use sha3::{Digest, Keccak256}; /// State produced by the guest program execution inside the zkVM. It is /// essentially built from the `ExecutionWitness`. @@ -571,13 +571,9 @@ where } fn hash_address(address: &Address) -> Vec { - Keccak256::new_with_prefix(address.to_fixed_bytes()) - .finalize() - .to_vec() + keccak_hash(address.to_fixed_bytes()).to_vec() } pub fn hash_key(key: &H256) -> Vec { - Keccak256::new_with_prefix(key.to_fixed_bytes()) - .finalize() - .to_vec() + keccak_hash(key.to_fixed_bytes()).to_vec() } diff --git a/crates/common/types/genesis.rs b/crates/common/types/genesis.rs index 1ceeb867144..393c2302865 100644 --- a/crates/common/types/genesis.rs +++ b/crates/common/types/genesis.rs @@ -1,10 +1,10 @@ use bytes::Bytes; use ethereum_types::{Address, Bloom, H256, U256}; +use ethrex_crypto::keccak::keccak_hash; use ethrex_rlp::encode::RLPEncode; use ethrex_trie::Trie; use rkyv::{Archive, Deserialize as RDeserialize, Serialize as RSerialize}; use serde::{Deserialize, Serialize}; -use sha3::{Digest, Keccak256}; use std::{ collections::{BTreeMap, HashMap}, io::{BufReader, Error}, @@ -695,7 +695,7 @@ impl Genesis { pub fn compute_state_root(&self) -> H256 { let iter = self.alloc.iter().map(|(addr, account)| { ( - Keccak256::digest(addr).to_vec(), + keccak_hash(addr).to_vec(), AccountState::from(account).encode_to_vec(), ) }); diff --git a/crates/common/types/transaction.rs b/crates/common/types/transaction.rs index 1bf54103665..7bcd5ef52f3 100644 --- a/crates/common/types/transaction.rs +++ b/crates/common/types/transaction.rs @@ -3,12 +3,12 @@ use std::{cmp::min, fmt::Display}; use crate::utils::keccak; use bytes::Bytes; use ethereum_types::{Address, H256, Signature, U256}; +use ethrex_crypto::keccak::keccak_hash; pub use mempool::MempoolTransaction; use rkyv::{Archive, Deserialize as RDeserialize, Serialize as RSerialize}; use secp256k1::{Message, ecdsa::RecoveryId}; use serde::{Serialize, ser::SerializeStruct}; pub use serde_impl::{AccessListEntry, GenericTransaction, GenericTransactionError}; -use sha3::{Digest, Keccak256}; use ethrex_rlp::{ constants::RLP_NULL, @@ -1266,10 +1266,8 @@ pub fn recover_address_from_message( message: &Bytes, ) -> Result { // Hash message - let payload: [u8; 32] = Keccak256::new_with_prefix(message.as_ref()) - .finalize() - .into(); - recover_address(signature, H256::from_slice(&payload)) + let payload = keccak(message); + recover_address(signature, payload) } pub fn recover_address(signature: Signature, payload: H256) -> Result { @@ -1283,7 +1281,7 @@ pub fn recover_address(signature: Signature, payload: H256) -> Result Result, FromHexError> { } pub fn keccak(data: impl AsRef<[u8]>) -> H256 { - H256(Keccak256::digest(data.as_ref()).into()) + H256(keccak_hash(data)) +} + +/// Allocation-free operations on arrays. + +/// Truncates an array of size N to size M. +/// Fails compilation if N < M. +pub fn truncate_array(data: [u8; N]) -> [u8; M] { + const { assert!(M <= N) }; + let mut res = [0u8; M]; + res.copy_from_slice(&data[..M]); + res +} + +/// Splits an array in two at position M. +/// Fails compilation if N != M + L. +pub fn split_array( + data: [u8; N], +) -> ([u8; M], [u8; L]) { + const { assert!(N == M + L) }; + let mut before = [0u8; M]; + let mut after = [0u8; L]; + before.copy_from_slice(&data[..M]); + after.copy_from_slice(&data[M..]); + (before, after) } #[cfg(test)] diff --git a/crates/l2/common/Cargo.toml b/crates/l2/common/Cargo.toml index c0d2b8a9c4b..d49e9f53959 100644 --- a/crates/l2/common/Cargo.toml +++ b/crates/l2/common/Cargo.toml @@ -8,6 +8,7 @@ documentation.workspace = true [dependencies] ethereum-types.workspace = true ethrex-common.workspace = true +ethrex-crypto.workspace = true ethrex-rlp.workspace = true ethrex-storage.workspace = true ethrex-trie.workspace = true @@ -17,7 +18,6 @@ bytes.workspace = true thiserror.workspace = true serde.workspace = true lambdaworks-crypto.workspace = true -sha3.workspace = true secp256k1.workspace = true hex.workspace = true serde_with.workspace = true diff --git a/crates/networking/p2p/Cargo.toml b/crates/networking/p2p/Cargo.toml index ce819dc570d..a3bfb53d1c1 100644 --- a/crates/networking/p2p/Cargo.toml +++ b/crates/networking/p2p/Cargo.toml @@ -9,6 +9,7 @@ documentation.workspace = true [dependencies] ethrex-common.workspace = true +ethrex-crypto.workspace = true ethrex-blockchain.workspace = true ethrex-rlp.workspace = true ethrex-storage.workspace = true @@ -37,7 +38,7 @@ rocksdb = { workspace = true, optional = true } prometheus = "0.14.0" tokio-stream = "0.1.17" -sha3 = "0.10.8" +sha3 = "0.10.8" # Used for incremental hashing due to MAC serde_json = "1.0.117" diff --git a/crates/networking/p2p/discv4/messages.rs b/crates/networking/p2p/discv4/messages.rs index 3b0ca7c41da..1ccf66bb466 100644 --- a/crates/networking/p2p/discv4/messages.rs +++ b/crates/networking/p2p/discv4/messages.rs @@ -3,7 +3,8 @@ use crate::{ utils::{current_unix_time, node_id}, }; use bytes::BufMut; -use ethrex_common::{H256, H512, H520}; +use ethrex_common::{H256, H512, H520, utils::keccak}; +use ethrex_crypto::keccak::keccak_hash; use ethrex_rlp::{ decode::RLPDecode, encode::RLPEncode, @@ -14,7 +15,6 @@ use secp256k1::{ SecretKey, ecdsa::{RecoverableSignature, RecoveryId}, }; -use sha3::{Digest, Keccak256}; use std::{convert::Into, io::ErrorKind}; #[derive(Debug, thiserror::Error)] @@ -64,14 +64,13 @@ impl Packet { let packet_type = encoded_packet[header_size]; let encoded_msg = &encoded_packet[header_size..]; - let head_digest = Keccak256::digest(&encoded_packet[hash_len..]); - let header_hash = H256::from_slice(&head_digest); + let header_hash = keccak(&encoded_packet[hash_len..]); if hash != header_hash { return Err(PacketDecodeErr::HashMismatch); } - let digest: [u8; 32] = Keccak256::digest(encoded_msg).into(); + let digest: [u8; 32] = keccak_hash(encoded_msg); let rid = RecoveryId::try_from(Into::::into(signature_bytes[64])) .map_err(|_| PacketDecodeErr::InvalidSignature)?; @@ -153,7 +152,7 @@ impl Message { self.encode_with_type(&mut data); - let digest: [u8; 32] = Keccak256::digest(&data[signature_size..]).into(); + let digest: [u8; 32] = keccak_hash(&data[signature_size..]); let (recovery_id, signature) = secp256k1::SECP256K1 .sign_ecdsa_recoverable(&secp256k1::Message::from_digest(digest), node_signer) @@ -162,7 +161,7 @@ impl Message { data[..signature_size - 1].copy_from_slice(&signature); data[signature_size - 1] = Into::::into(recovery_id) as u8; - let hash = Keccak256::digest(&data[..]); + let hash = keccak_hash(&data[..]); buf.put_slice(&hash); buf.put_slice(&data[..]); } @@ -1048,7 +1047,7 @@ mod tests { buf[32] ^= 0xFF; // re hash the data as we have updated the message - let hash = Keccak256::digest(&buf[32..]); + let hash = keccak_hash(&buf[32..]); let mut updated_buf = Vec::new(); updated_buf.put_slice(&hash); updated_buf.put_slice(&buf[32..]); @@ -1087,7 +1086,7 @@ mod tests { buf[32 + 64] = 4; // re hash the data as we have updated the message - let hash = Keccak256::digest(&buf[32..]); + let hash = keccak_hash(&buf[32..]); let mut updated_buf = Vec::new(); updated_buf.put_slice(&hash); updated_buf.put_slice(&buf[32..]); diff --git a/crates/networking/p2p/rlpx/connection/codec.rs b/crates/networking/p2p/rlpx/connection/codec.rs index e17d5b0b38c..1874a3f3d0e 100644 --- a/crates/networking/p2p/rlpx/connection/codec.rs +++ b/crates/networking/p2p/rlpx/connection/codec.rs @@ -12,7 +12,11 @@ use aes::{ cipher::{BlockEncrypt as _, KeyInit as _, KeyIvInit, StreamCipher as _}, }; use bytes::{Buf, BytesMut}; -use ethrex_common::{H128, H256}; +use ethrex_common::{ + H128, H256, + utils::{keccak, truncate_array}, +}; +use ethrex_crypto::keccak::keccak_hash; use ethrex_rlp::{decode::RLPDecode, encode::RLPEncode as _}; use sha3::{Digest as _, Keccak256}; use tokio::io::{AsyncRead, AsyncWrite}; @@ -50,13 +54,11 @@ impl RLPxCodec { )?; // shared-secret = keccak256(ephemeral-key || keccak256(nonce || initiator-nonce)) - let shared_secret = - Keccak256::digest([ephemeral_key_secret, hashed_nonces].concat()).into(); + let shared_secret = keccak_hash([ephemeral_key_secret, hashed_nonces].concat()); // aes-secret = keccak256(ephemeral-key || shared-secret) - let aes_key = - H256(Keccak256::digest([ephemeral_key_secret, shared_secret].concat()).into()); + let aes_key = keccak([ephemeral_key_secret, shared_secret].concat()); // mac-secret = keccak256(ephemeral-key || aes-secret) - let mac_key = H256(Keccak256::digest([ephemeral_key_secret, aes_key.0].concat()).into()); + let mac_key = keccak([ephemeral_key_secret, aes_key.0].concat()); // egress-mac = keccak256.init((mac-secret ^ remote-nonce) || auth) let egress_mac = Keccak256::default() @@ -120,18 +122,7 @@ impl Decoder for RLPxCodec { // Validate MAC header // header-mac-seed = aes(mac-secret, keccak256.digest(egress-mac)[:16]) ^ header-ciphertext let header_mac_seed = { - let mac_digest: [u8; 16] = self - .ingress_mac - .clone() - .finalize() - .get(..16) - .ok_or_else(|| { - PeerConnectionError::CryptographyError("Invalid mac digest".to_owned()) - })? - .try_into() - .map_err(|_| { - PeerConnectionError::CryptographyError("Invalid mac digest".to_owned()) - })?; + let mac_digest: [u8; 16] = truncate_array(self.ingress_mac.clone().finalize().into()); let mut seed = mac_digest.into(); mac_aes_cipher.encrypt_block(&mut seed); (H128(seed.into()) @@ -149,19 +140,7 @@ impl Decoder for RLPxCodec { temp_ingress_mac.update(header_mac_seed); // header-mac = keccak256.digest(egress-mac)[:16] - let expected_header_mac = H128( - temp_ingress_mac - .clone() - .finalize() - .get(..16) - .ok_or_else(|| { - PeerConnectionError::CryptographyError("Invalid header mac".to_owned()) - })? - .try_into() - .map_err(|_| { - PeerConnectionError::CryptographyError("Invalid header mac".to_owned()) - })?, - ); + let expected_header_mac = H128(truncate_array(temp_ingress_mac.clone().finalize().into())); if header_mac != expected_header_mac.0 { return Err(PeerConnectionError::InvalidMessageFrame( @@ -227,31 +206,14 @@ impl Decoder for RLPxCodec { // check MAC self.ingress_mac.update(&frame_ciphertext); let frame_mac_seed = { - let mac_digest: [u8; 16] = self - .ingress_mac - .clone() - .finalize() - .get(..16) - .ok_or_else(|| { - PeerConnectionError::CryptographyError("Invalid mac digest".to_owned()) - })? - .try_into() - .map_err(|_| { - PeerConnectionError::CryptographyError("Invalid mac digest".to_owned()) - })?; + let mac_digest: [u8; 16] = truncate_array(self.ingress_mac.clone().finalize().into()); let mut seed = mac_digest.into(); mac_aes_cipher.encrypt_block(&mut seed); (H128(seed.into()) ^ H128(mac_digest)).0 }; self.ingress_mac.update(frame_mac_seed); - let expected_frame_mac: [u8; 16] = self - .ingress_mac - .clone() - .finalize() - .get(..16) - .ok_or_else(|| PeerConnectionError::CryptographyError("Invalid frame mac".to_owned()))? - .try_into() - .map_err(|_| PeerConnectionError::CryptographyError("Invalid frame mac".to_owned()))?; + let expected_frame_mac: [u8; 16] = + truncate_array(self.ingress_mac.clone().finalize().into()); if frame_mac != expected_frame_mac { return Err(PeerConnectionError::InvalidMessageFrame( @@ -333,18 +295,7 @@ impl Encoder for RLPxCodec { })?)?; let header_mac_seed = { - let mac_digest: [u8; 16] = self - .egress_mac - .clone() - .finalize() - .get(..16) - .ok_or_else(|| { - PeerConnectionError::CryptographyError("Invalid mac digest".to_owned()) - })? - .try_into() - .map_err(|_| { - PeerConnectionError::CryptographyError("Invalid mac digest".to_owned()) - })?; + let mac_digest: [u8; 16] = truncate_array(self.egress_mac.clone().finalize().into()); let mut seed = mac_digest.into(); mac_aes_cipher.encrypt_block(&mut seed); let header_data = header @@ -359,11 +310,9 @@ impl Encoder for RLPxCodec { H128(seed.into()) ^ H128(header_data) }; self.egress_mac.update(header_mac_seed); - let header_mac = self.egress_mac.clone().finalize(); - let header_mac_data = header_mac.get(..16).ok_or_else(|| { - PeerConnectionError::CryptographyError("Invalid header mac".to_owned()) - })?; - header.extend_from_slice(header_mac_data); + let header_mac = self.egress_mac.clone().finalize().into(); + let header_mac_data: [u8; 16] = truncate_array(header_mac); + header.extend_from_slice(&header_mac_data); // Write header buffer.extend_from_slice(&header); @@ -381,18 +330,7 @@ impl Encoder for RLPxCodec { // frame-mac-seed = aes(mac-secret, keccak256.digest(egress-mac)[:16]) ^ keccak256.digest(egress-mac)[:16] let frame_mac_seed = { - let mac_digest: [u8; 16] = self - .egress_mac - .clone() - .finalize() - .get(..16) - .ok_or_else(|| { - PeerConnectionError::CryptographyError("Invalid mac digest".to_owned()) - })? - .try_into() - .map_err(|_| { - PeerConnectionError::CryptographyError("Invalid mac digest".to_owned()) - })?; + let mac_digest: [u8; 16] = truncate_array(self.egress_mac.clone().finalize().into()); let mut seed = mac_digest.into(); mac_aes_cipher.encrypt_block(&mut seed); (H128(seed.into()) ^ H128(mac_digest)).0 diff --git a/crates/networking/p2p/rlpx/connection/handshake.rs b/crates/networking/p2p/rlpx/connection/handshake.rs index c53e09b881c..177ea98b82f 100644 --- a/crates/networking/p2p/rlpx/connection/handshake.rs +++ b/crates/networking/p2p/rlpx/connection/handshake.rs @@ -15,6 +15,7 @@ use crate::{ }; use aes::cipher::{KeyIvInit, StreamCipher}; use ethrex_common::{H128, H256, H512, Signature}; +use ethrex_crypto::keccak::keccak_hash; use ethrex_rlp::{ decode::RLPDecode, encode::RLPEncode, @@ -27,7 +28,6 @@ use secp256k1::{ PublicKey, SecretKey, ecdsa::{RecoverableSignature, RecoveryId}, }; -use sha3::{Digest, Keccak256}; use std::{ collections::HashMap, net::SocketAddr, @@ -78,7 +78,7 @@ pub(crate) async fn perform( // Local node is initator // keccak256(nonce || initiator-nonce) let hashed_nonces: [u8; 32] = - Keccak256::digest([remote_state.nonce.0, local_state.nonce.0].concat()).into(); + keccak_hash([remote_state.nonce.0, local_state.nonce.0].concat()); let codec = RLPxCodec::new(&local_state, &remote_state, hashed_nonces, eth_version)?; trace!(peer=%node, "Completed handshake as initiator"); (context, node, Framed::new(stream, codec)) @@ -98,7 +98,7 @@ pub(crate) async fn perform( // Remote node is initiator // keccak256(nonce || initiator-nonce) let hashed_nonces: [u8; 32] = - Keccak256::digest([local_state.nonce.0, remote_state.nonce.0].concat()).into(); + keccak_hash([local_state.nonce.0, remote_state.nonce.0].concat()); let codec = RLPxCodec::new(&local_state, &remote_state, hashed_nonces, eth_version)?; let node = Node::new( peer_addr.ip(), diff --git a/crates/networking/p2p/rlpx/utils.rs b/crates/networking/p2p/rlpx/utils.rs index 23c5f64ab2e..ad6c0e0baf0 100644 --- a/crates/networking/p2p/rlpx/utils.rs +++ b/crates/networking/p2p/rlpx/utils.rs @@ -1,13 +1,13 @@ +use ethrex_common::utils::keccak; use ethrex_common::{H256, H512}; use ethrex_rlp::error::{RLPDecodeError, RLPEncodeError}; use secp256k1::ecdh::shared_secret_point; use secp256k1::{PublicKey, SecretKey}; -use sha3::{Digest, Keccak256}; +use sha2::{Digest, Sha256}; use snap::raw::{Decoder as SnappyDecoder, Encoder as SnappyEncoder, max_compress_len}; use std::array::TryFromSliceError; pub fn sha256(data: &[u8]) -> [u8; 32] { - use sha2::{Digest, Sha256}; Sha256::digest(data).into() } use crate::rlpx::error::CryptographyError; @@ -47,7 +47,7 @@ pub fn kdf(secret: &[u8], output: &mut [u8]) -> Result<(), CryptographyError> { /// Cpmputes the node_id from a public key (aka computes the Keccak256 hash of the given public key) pub fn node_id(public_key: &H512) -> H256 { - H256(Keccak256::new_with_prefix(public_key).finalize().into()) + keccak(public_key) } /// Decompresses the received public key diff --git a/crates/networking/p2p/types.rs b/crates/networking/p2p/types.rs index e4b191f60d8..0196e8834c0 100644 --- a/crates/networking/p2p/types.rs +++ b/crates/networking/p2p/types.rs @@ -1,6 +1,7 @@ use bytes::{BufMut, Bytes}; use ethrex_common::types::ForkId; use ethrex_common::{H256, H264, H512}; +use ethrex_crypto::keccak::keccak_hash; use ethrex_rlp::{ decode::RLPDecode, encode::RLPEncode, @@ -9,7 +10,6 @@ use ethrex_rlp::{ }; use secp256k1::{PublicKey, SecretKey}; use serde::{Deserialize, Serialize, ser::Serializer}; -use sha3::{Digest, Keccak256}; use std::net::Ipv6Addr; use std::{ fmt::Display, @@ -388,14 +388,13 @@ impl NodeRecord { Ok(H512::from_slice(&signature_bytes)) } - pub fn get_signature_digest(&self) -> Vec { + pub fn get_signature_digest(&self) -> [u8; 32] { let mut rlp = vec![]; structs::Encoder::new(&mut rlp) .encode_field(&self.seq) .encode_key_value_list::(&self.pairs) .finish(); - let digest = Keccak256::digest(&rlp); - digest.to_vec() + keccak_hash(&rlp) } } diff --git a/crates/networking/rpc/Cargo.toml b/crates/networking/rpc/Cargo.toml index 513016d088e..0c352372664 100644 --- a/crates/networking/rpc/Cargo.toml +++ b/crates/networking/rpc/Cargo.toml @@ -20,6 +20,7 @@ ethrex-common.workspace = true ethrex-storage.workspace = true ethrex-vm.workspace = true ethrex-blockchain.workspace = true +ethrex-crypto.workspace = true ethrex-p2p.workspace = true ethrex-rlp.workspace = true ethrex-trie.workspace = true @@ -32,7 +33,6 @@ jsonwebtoken.workspace = true rand.workspace = true tokio-util = { workspace = true, features = ["codec"] } reqwest.workspace = true -sha3 = "0.10.8" sha2.workspace = true jemalloc_pprof = { version = "0.8.0", optional = true, features = ["flamegraph", "symbolize"] } diff --git a/crates/storage/Cargo.toml b/crates/storage/Cargo.toml index c95898bae78..307f4f869a3 100644 --- a/crates/storage/Cargo.toml +++ b/crates/storage/Cargo.toml @@ -10,6 +10,7 @@ documentation.workspace = true [dependencies] ethrex-rlp.workspace = true ethrex-common.workspace = true +ethrex-crypto.workspace = true ethrex-trie.workspace = true async-trait.workspace = true @@ -18,7 +19,6 @@ anyhow = "1.0.86" bytes.workspace = true tracing.workspace = true thiserror.workspace = true -sha3.workspace = true hex.workspace = true serde.workspace = true serde_json = "1.0.117" diff --git a/crates/storage/store.rs b/crates/storage/store.rs index 75c97ce1073..14e9dfec5a2 100644 --- a/crates/storage/store.rs +++ b/crates/storage/store.rs @@ -12,11 +12,12 @@ use ethrex_common::{ BlockNumber, ChainConfig, Code, ForkId, Genesis, GenesisAccount, Index, Receipt, Transaction, }, + utils::keccak, }; +use ethrex_crypto::keccak::keccak_hash; use ethrex_rlp::decode::RLPDecode; use ethrex_rlp::encode::RLPEncode; use ethrex_trie::{Nibbles, NodeRLP, Trie, TrieLogger, TrieNode, TrieWitness}; -use sha3::{Digest as _, Keccak256}; use std::{collections::hash_map::Entry, sync::Arc}; use std::{ collections::{BTreeMap, HashMap}, @@ -1420,23 +1421,15 @@ impl Iterator for AncestorIterator { } pub fn hash_address(address: &Address) -> Vec { - Keccak256::new_with_prefix(address.to_fixed_bytes()) - .finalize() - .to_vec() + keccak_hash(address.to_fixed_bytes()).to_vec() } fn hash_address_fixed(address: &Address) -> H256 { - H256( - Keccak256::new_with_prefix(address.to_fixed_bytes()) - .finalize() - .into(), - ) + keccak(address.to_fixed_bytes()) } pub fn hash_key(key: &H256) -> Vec { - Keccak256::new_with_prefix(key.to_fixed_bytes()) - .finalize() - .to_vec() + keccak_hash(key.to_fixed_bytes()).to_vec() } #[derive(Debug, Default, Clone)] @@ -1463,6 +1456,7 @@ mod tests { Bloom, H160, constants::EMPTY_KECCACK_HASH, types::{Transaction, TxType}, + utils::keccak, }; use ethrex_rlp::decode::RLPDecode; use std::{fs, str::FromStr}; @@ -1518,7 +1512,7 @@ mod tests { let mut accounts: Vec<_> = (0u64..1_000) .map(|i| { ( - H256(Keccak256::digest(i.to_be_bytes()).into()), + keccak(i.to_be_bytes()), AccountState { nonce: 2 * i, balance: U256::from(3 * i), @@ -1544,14 +1538,9 @@ mod tests { } async fn test_iter_storage(store: Store) { - let address = H256(Keccak256::digest(12345u64.to_be_bytes()).into()); + let address = keccak(12345u64.to_be_bytes()); let mut slots: Vec<_> = (0u64..1_000) - .map(|i| { - ( - H256(Keccak256::digest(i.to_be_bytes()).into()), - U256::from(2 * i), - ) - }) + .map(|i| (keccak(i.to_be_bytes()), U256::from(2 * i))) .collect(); slots.sort_by_key(|a| a.0); let mut trie = store diff --git a/crates/vm/Cargo.toml b/crates/vm/Cargo.toml index 9ae87fdb635..3f9bdbe185f 100644 --- a/crates/vm/Cargo.toml +++ b/crates/vm/Cargo.toml @@ -7,6 +7,7 @@ documentation.workspace = true [dependencies] ethrex-common.workspace = true +ethrex-crypto.workspace = true ethrex-levm = { path = "./levm", default-features = false } ethrex-trie.workspace = true ethrex-rlp.workspace = true @@ -18,7 +19,6 @@ hex.workspace = true lazy_static.workspace = true tracing.workspace = true serde.workspace = true -sha3.workspace = true rkyv.workspace = true bincode = "1" diff --git a/crates/vm/levm/Cargo.toml b/crates/vm/levm/Cargo.toml index b2fdf9a43b1..5ef7a39cff5 100644 --- a/crates/vm/levm/Cargo.toml +++ b/crates/vm/levm/Cargo.toml @@ -17,7 +17,6 @@ thiserror.workspace = true serde = { workspace = true, features = ["derive", "rc"] } serde_json.workspace = true -sha3 = "0.10.8" datatest-stable = "0.2.9" walkdir = "2.5.0" secp256k1.workspace = true diff --git a/crates/vm/levm/bench/revm_comparison/Cargo.toml b/crates/vm/levm/bench/revm_comparison/Cargo.toml index 9c9bca74ee7..4f9e3b53886 100644 --- a/crates/vm/levm/bench/revm_comparison/Cargo.toml +++ b/crates/vm/levm/bench/revm_comparison/Cargo.toml @@ -14,6 +14,7 @@ path = "src/lib.rs" ethrex-levm = { path = "../../" } ethrex-vm = { path = "../../.." } ethrex-common = { path = "../../../../common" } +ethrex-crypto = { path = "../../../../common/crypto" } ethrex-storage = { path = "../../../../storage" } ethrex-blockchain = { path = "../../../../blockchain" } rustc-hash = "2.1.1" @@ -21,7 +22,6 @@ hex = "0.4.3" bytes = { version = "1.6.0", features = ["serde"] } revm = "9.0.0" -sha3 = "0.10.8" [[bin]] name = "compile" @@ -30,4 +30,3 @@ path = "src/compile.rs" [[bin]] name = "benchmark" path = "src/benchmark.rs" - diff --git a/crates/vm/levm/src/opcode_handlers/keccak.rs b/crates/vm/levm/src/opcode_handlers/keccak.rs index 552c5909378..155e7cfc67b 100644 --- a/crates/vm/levm/src/opcode_handlers/keccak.rs +++ b/crates/vm/levm/src/opcode_handlers/keccak.rs @@ -6,7 +6,7 @@ use crate::{ vm::VM, }; use ethrex_common::utils::u256_from_big_endian; -use sha3::{Digest, Keccak256}; +use ethrex_crypto::keccak::keccak_hash; // KECCAK256 (1) // Opcodes: KECCAK256 @@ -25,11 +25,10 @@ impl<'a> VM<'a> { size, )?)?; - let mut hasher = Keccak256::new(); - hasher.update(current_call_frame.memory.load_range(offset, size)?); + let hash = keccak_hash(current_call_frame.memory.load_range(offset, size)?); current_call_frame .stack - .push1(u256_from_big_endian(&hasher.finalize()))?; + .push1(u256_from_big_endian(&hash))?; Ok(OpcodeResult::Continue) } diff --git a/crates/vm/levm/src/precompiles.rs b/crates/vm/levm/src/precompiles.rs index 3bb885d6edf..635e1cff26c 100644 --- a/crates/vm/levm/src/precompiles.rs +++ b/crates/vm/levm/src/precompiles.rs @@ -39,7 +39,7 @@ use p256::{ ecdsa::{Signature as P256Signature, signature::hazmat::PrehashVerifier}, elliptic_curve::bigint::U256 as P256Uint, }; -use sha3::Digest; +use sha2::Digest; use std::borrow::Cow; use std::ops::Mul; @@ -376,8 +376,6 @@ pub(crate) fn fill_with_zeros(calldata: &Bytes, target_len: usize) -> Bytes { #[cfg(all(not(feature = "sp1"), not(feature = "risc0")))] pub fn ecrecover(calldata: &Bytes, gas_remaining: &mut u64, _fork: Fork) -> Result { - use sha3::Keccak256; - use crate::gas_cost::ECRECOVER_COST; increase_precompile_consumed_gas(ECRECOVER_COST, gas_remaining)?; @@ -420,7 +418,8 @@ pub fn ecrecover(calldata: &Bytes, gas_remaining: &mut u64, _fork: Fork) -> Resu }; // We need to take the 64 bytes from the public key (discarding the first pos of the slice) - let public_key_hash = Keccak256::digest(&public_key.serialize_uncompressed()[1..]); + let public_key_hash = + ethrex_crypto::keccak::keccak_hash(&public_key.serialize_uncompressed()[1..]); // Address is the last 20 bytes of the hash. #[expect(clippy::indexing_slicing)] diff --git a/crates/vm/levm/src/utils.rs b/crates/vm/levm/src/utils.rs index 2b5d305da58..7b6e95739b2 100644 --- a/crates/vm/levm/src/utils.rs +++ b/crates/vm/levm/src/utils.rs @@ -26,13 +26,13 @@ use ethrex_common::{ utils::{keccak, u256_to_big_endian}, }; use ethrex_common::{types::TxKind, utils::u256_from_big_endian_const}; +use ethrex_crypto::keccak::keccak_hash; use ethrex_rlp; use ethrex_rlp::encode::RLPEncode; use secp256k1::{ Message, ecdsa::{RecoverableSignature, RecoveryId}, }; -use sha3::{Digest, Keccak256}; use std::collections::{BTreeMap, HashMap}; pub type Storage = HashMap; @@ -366,16 +366,12 @@ pub fn eip7702_recover_address( return Ok(None); } - let rlp_buf = (auth_tuple.chain_id, auth_tuple.address, auth_tuple.nonce).encode_to_vec(); + let mut rlp_buf = Vec::with_capacity(128); + rlp_buf.push(MAGIC); + (auth_tuple.chain_id, auth_tuple.address, auth_tuple.nonce).encode(&mut rlp_buf); + let bytes = keccak_hash(&rlp_buf); - let mut hasher = Keccak256::new(); - hasher.update([MAGIC]); - hasher.update(rlp_buf); - let bytes = &mut hasher.finalize(); - - let Ok(message) = Message::from_digest_slice(bytes) else { - return Ok(None); - }; + let message = Message::from_digest(bytes); let bytes = [ auth_tuple.r_signature.to_big_endian(), @@ -399,14 +395,10 @@ pub fn eip7702_recover_address( }; let public_key = authority.serialize_uncompressed(); - let mut hasher = Keccak256::new(); - hasher.update(public_key.get(1..).ok_or(InternalError::Slicing)?); - let address_hash = hasher.finalize(); + let address_hash = keccak_hash(&public_key[1..]); // Get the last 20 bytes of the hash -> Address - let authority_address_bytes: [u8; 20] = address_hash - .get(12..32) - .ok_or(InternalError::Slicing)? + let authority_address_bytes: [u8; 20] = address_hash[12..] .try_into() .map_err(|_| InternalError::TypeConversion)?; Ok(Some(Address::from_slice(&authority_address_bytes))) diff --git a/tooling/Cargo.toml b/tooling/Cargo.toml index baca13995d8..60a6b934771 100644 --- a/tooling/Cargo.toml +++ b/tooling/Cargo.toml @@ -22,6 +22,7 @@ documentation = "https://docs.ethrex.xyz" [workspace.dependencies] ethrex-blockchain = { path = "../crates/blockchain" } ethrex-common = { path = "../crates/common" } +ethrex-crypto = { path = "../crates/common/crypto" } ethrex-config = { path = "../crates/common/config" } ethrex-p2p = { path = "../crates/networking/p2p" } ethrex-rpc = { path = "../crates/networking/rpc" } @@ -56,7 +57,6 @@ hex-literal = "0.4.1" crc32fast = "1.4.2" lazy_static = "1.5.0" sha2 = "0.10.8" -sha3 = "0.10.8" tokio-util = { version = "0.7.15", features = ["rt"] } jsonwebtoken = "9.3.0" rand = "0.8.5" From 7134c90a2e5197a997810763732ba1f5d85c611c Mon Sep 17 00:00:00 2001 From: Mario Rugiero Date: Wed, 5 Nov 2025 18:39:10 -0300 Subject: [PATCH 09/40] fixes --- crates/l2/common/src/merkle_tree.rs | 18 +++++++++--------- crates/networking/rpc/rpc.rs | 4 ++-- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/crates/l2/common/src/merkle_tree.rs b/crates/l2/common/src/merkle_tree.rs index 35c45a8d32f..95939298418 100644 --- a/crates/l2/common/src/merkle_tree.rs +++ b/crates/l2/common/src/merkle_tree.rs @@ -1,6 +1,6 @@ use ethrex_common::H256; +use ethrex_crypto::keccak::keccak_hash; use lambdaworks_crypto::merkle_tree::{merkle::MerkleTree, traits::IsMerkleTreeBackend}; -use sha3::{Digest, Keccak256}; // We use a newtype wrapper around `H256` because Rust's orphan rule // prevents implementing a foreign trait (`IsMerkleTreeBackend`) for a foreign type (`H256`). @@ -29,15 +29,15 @@ impl IsMerkleTreeBackend for TreeData { /// /// Source: https://github.com/OpenZeppelin/openzeppelin-contracts/blob/1a87de932664d9b905612f4d9d1655fd27a41722/contracts/utils/cryptography/MerkleProof.sol#L114-L128 fn hash_new_parent(child_1: &Self::Node, child_2: &Self::Node) -> Self::Node { - let mut hasher = Keccak256::new(); - if child_1 < child_2 { - hasher.update(child_1); - hasher.update(child_2); + let (left, right) = if child_1 < child_2 { + (child_1, child_2) } else { - hasher.update(child_2); - hasher.update(child_1); - } - hasher.finalize().into() + (child_2, child_1) + }; + let mut data = [0u8; 64]; + data[..32].copy_from_slice(left); + data[64..].copy_from_slice(right); + keccak_hash(data) } } diff --git a/crates/networking/rpc/rpc.rs b/crates/networking/rpc/rpc.rs index 4db2a42df10..c1de957d89a 100644 --- a/crates/networking/rpc/rpc.rs +++ b/crates/networking/rpc/rpc.rs @@ -612,8 +612,8 @@ mod tests { H160, types::{ChainConfig, Genesis}, }; + use ethrex_crypto::keccak::keccak_hash; use ethrex_storage::{EngineType, Store}; - use sha3::{Digest, Keccak256}; use std::io::BufReader; use std::str::FromStr; use std::{fs::File, path::Path}; @@ -653,7 +653,7 @@ mod tests { "result": { "enode": "enode://d860a01f9722d78051619d1e2351aba3f43f943f6f00718d1b9baa4101932a1f5011f16bb2b1bb35db20d6fe28fa0bf09636d26a87d31de9ec6203eeedb1f666@127.0.0.1:30303", "enr": enr_url, - "id": hex::encode(Keccak256::digest(local_p2p_node.public_key)), + "id": hex::encode(keccak_hash(local_p2p_node.public_key)), "ip": "127.0.0.1", "name": "ethrex/test", "ports": { From 7ed1d90ba7f08e09b61a4fd9cda44fe25574f750 Mon Sep 17 00:00:00 2001 From: Mario Rugiero Date: Wed, 5 Nov 2025 18:44:52 -0300 Subject: [PATCH 10/40] use option raw to avoid having to modify braces --- crates/common/crypto/keccak/keccak1600-armv8.s | 16 ++++++++-------- crates/common/crypto/keccak/mod.rs | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/crates/common/crypto/keccak/keccak1600-armv8.s b/crates/common/crypto/keccak/keccak1600-armv8.s index b352d764b92..4f6966b82db 100644 --- a/crates/common/crypto/keccak/keccak1600-armv8.s +++ b/crates/common/crypto/keccak/keccak1600-armv8.s @@ -605,7 +605,7 @@ Loop_ce: .inst 0xce3a62f7 //bcax v23.16b,v23.16b,v26.16b, v24.16b .inst 0xce286b18 //bcax v24.16b,v24.16b,v8.16b,v26.16b // A[1][3]=A[4][1] - ld1r {{v26.2d}},[x10],#8 + ld1r {v26.2d},[x10],#8 .inst 0xce330fd1 //bcax v17.16b,v30.16b, v19.16b,v3.16b // A[0][3]=A[3][3] .inst 0xce2f4c72 //bcax v18.16b,v3.16b,v15.16b,v19.16b // A[0][3]=A[3][3] @@ -720,21 +720,21 @@ Loop_absorb_ce: blo Labsorbed_ce cmp x3,#104 - ld1 {{v27.8b,v28.8b,v29.8b,v30.8b}},[x1],#32 + ld1 {v27.8b,v28.8b,v29.8b,v30.8b},[x1],#32 eor v0.16b,v0.16b,v27.16b eor v1.16b,v1.16b,v28.16b eor v2.16b,v2.16b,v29.16b eor v3.16b,v3.16b,v30.16b - ld1 {{v27.8b,v28.8b,v29.8b,v30.8b}},[x1],#32 + ld1 {v27.8b,v28.8b,v29.8b,v30.8b},[x1],#32 eor v4.16b,v4.16b,v27.16b eor v5.16b,v5.16b,v28.16b eor v6.16b,v6.16b,v29.16b eor v7.16b,v7.16b,v30.16b - ld1 {{v31.8b}},[x1],#8 // A[1][4] ^= *inp++ + ld1 {v31.8b},[x1],#8 // A[1][4] ^= *inp++ eor v8.16b,v8.16b,v31.16b blo Lprocess_block_ce - ld1 {{v27.8b,v28.8b,v29.8b,v30.8b}},[x1],#32 + ld1 {v27.8b,v28.8b,v29.8b,v30.8b},[x1],#32 eor v9.16b,v9.16b,v27.16b eor v10.16b,v10.16b,v28.16b eor v11.16b,v11.16b,v29.16b @@ -742,18 +742,18 @@ Loop_absorb_ce: beq Lprocess_block_ce cmp x3,#144 - ld1 {{v27.8b,v28.8b,v29.8b,v30.8b}},[x1],#32 + ld1 {v27.8b,v28.8b,v29.8b,v30.8b},[x1],#32 eor v13.16b,v13.16b,v27.16b eor v14.16b,v14.16b,v28.16b eor v15.16b,v15.16b,v29.16b eor v16.16b,v16.16b,v30.16b blo Lprocess_block_ce - ld1 {{v31.8b}},[x1],#8 // A[3][3] ^= *inp++ + ld1 {v31.8b},[x1],#8 // A[3][3] ^= *inp++ eor v17.16b,v17.16b,v31.16b beq Lprocess_block_ce - ld1 {{v28.8b,v29.8b,v30.8b}},[x1],#24 + ld1 {v28.8b,v29.8b,v30.8b},[x1],#24 eor v18.16b,v18.16b,v28.16b eor v19.16b,v19.16b,v29.16b eor v20.16b,v20.16b,v30.16b diff --git a/crates/common/crypto/keccak/mod.rs b/crates/common/crypto/keccak/mod.rs index b08c9b8c799..104a32ec0ad 100644 --- a/crates/common/crypto/keccak/mod.rs +++ b/crates/common/crypto/keccak/mod.rs @@ -1,5 +1,5 @@ #[cfg(target_arch = "aarch64")] -std::arch::global_asm!(include_str!("keccak1600-armv8.s")); +std::arch::global_asm!(include_str!("keccak1600-armv8.s"), options(raw)); #[cfg(target_arch = "x86_64")] std::arch::global_asm!(include_str!("keccak1600-x86_64.s"), options(att_syntax)); From 87c8eefdfcd3c3aaec47f482574da9a5a4f1753c Mon Sep 17 00:00:00 2001 From: Mario Rugiero Date: Wed, 5 Nov 2025 18:46:59 -0300 Subject: [PATCH 11/40] updated modified header --- crates/common/crypto/keccak/keccak1600-armv8.s | 2 -- 1 file changed, 2 deletions(-) diff --git a/crates/common/crypto/keccak/keccak1600-armv8.s b/crates/common/crypto/keccak/keccak1600-armv8.s index 4f6966b82db..de354ecaba0 100644 --- a/crates/common/crypto/keccak/keccak1600-armv8.s +++ b/crates/common/crypto/keccak/keccak1600-armv8.s @@ -1,6 +1,4 @@ // Modified: -// - All instances of curly brackets need to be escaped with a second bracket to avoid Rust's -// `global_asm` trying to interpret them as a template substitution. // - Ran `cpp` to substitute constants. // - Commented out ARM assembly annotations (.size, .type) used only for debugging purposes and not understood by // Rust. From b5ff6c852931074fec36d81e91d3159367711b4d Mon Sep 17 00:00:00 2001 From: Mario Rugiero Date: Thu, 6 Nov 2025 19:47:52 -0300 Subject: [PATCH 12/40] cargo.lock --- Cargo.lock | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index 360ed37979f..e02aeb07aed 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3310,6 +3310,8 @@ dependencies = [ "secp256k1", "serde", "serde_json", + "spawned-concurrency", + "spawned-rt", "thiserror 2.0.17", "tikv-jemallocator", "tokio", @@ -3710,6 +3712,8 @@ dependencies = [ "serde", "serde_json", "sha2", + "spawned-concurrency", + "spawned-rt", "thiserror 2.0.17", "tokio", "tokio-util", From ba715ab0f71a396461d939a881cc3d57fb3e9aab Mon Sep 17 00:00:00 2001 From: Edgar Luque Date: Fri, 7 Nov 2025 09:31:48 +0100 Subject: [PATCH 13/40] add update and finalize via Keccak256Asm --- crates/common/crypto/keccak/mod.rs | 199 +++++++++++++++++++++++++++++ 1 file changed, 199 insertions(+) diff --git a/crates/common/crypto/keccak/mod.rs b/crates/common/crypto/keccak/mod.rs index 104a32ec0ad..cf1977596d1 100644 --- a/crates/common/crypto/keccak/mod.rs +++ b/crates/common/crypto/keccak/mod.rs @@ -54,6 +54,96 @@ pub fn keccak_hash(data: impl AsRef<[u8]>) -> [u8; 32] { hash_buf } +pub struct Keccak256Asm { + state: State, + tail_buf: [u8; BLOCK_SIZE], + tail_len: usize, +} + +impl Keccak256Asm { + pub fn new() -> Self { + Self { + state: State::default(), + tail_buf: [0; BLOCK_SIZE], + tail_len: 0, + } + } + + pub fn update(&mut self, mut data: &[u8]) { + unsafe { + // partial block + if self.tail_len > 0 { + let need = BLOCK_SIZE - self.tail_len; + if data.len() < need { + // still partial block + self.tail_buf[self.tail_len..self.tail_len + data.len()].copy_from_slice(data); + self.tail_len += data.len(); + return; + } + + // complete block + self.tail_buf[self.tail_len..BLOCK_SIZE].copy_from_slice(&data[..need]); + + SHA3_absorb( + &mut self.state, + self.tail_buf.as_ptr(), + self.tail_buf.len(), + BLOCK_SIZE, + ); + + self.tail_len = 0; + self.tail_buf.fill(0); + data = &data[need..]; + } + } + + match data { + [] => {} + data if data.len() < BLOCK_SIZE => unsafe { + self.tail_len = data.len(); + self.tail_buf + .get_unchecked_mut(..self.tail_len) + .copy_from_slice(data); + }, + data => unsafe { + let rem = SHA3_absorb(&mut self.state, data.as_ptr(), data.len(), BLOCK_SIZE); + self.tail_len = rem; + if rem != 0 { + let tail_data = data.get_unchecked(data.len() - rem..); + self.tail_buf + .get_unchecked_mut(..rem) + .copy_from_slice(tail_data); + } + }, + } + } + + pub fn finalize(mut self) -> [u8; 32] { + let mut hash_buf = [0u8; 32]; + + unsafe { + *self.tail_buf.get_unchecked_mut(self.tail_len) = 0x01; + *self.tail_buf.get_unchecked_mut(BLOCK_SIZE - 1) |= 0x80; + + SHA3_absorb( + &mut self.state, + self.tail_buf.as_ptr(), + self.tail_buf.len(), + BLOCK_SIZE, + ); + + SHA3_squeeze( + &mut self.state, + hash_buf.as_mut_ptr(), + hash_buf.len(), + BLOCK_SIZE, + ); + } + + hash_buf + } +} + #[cfg(test)] mod test { use super::*; @@ -111,4 +201,113 @@ mod test { "3e4916729e2522af4937548f5848a5b49067eec910a0a6a890b0c71dde08854e", ); } + + #[test] + fn keccak_asm_empty() { + let keccak = Keccak256Asm::new(); + assert_eq!( + keccak + .finalize() + .into_iter() + .map(|x| format!("{x:02x}")) + .collect::(), + "c5d2460186f7233c927e7db2dcc703c0e500b653ca82273b7bfad8045d85a470", + ); + } + + #[test] + fn keccak_asm_half_block() { + let mut keccak = Keccak256Asm::new(); + let buf: [u8; BLOCK_SIZE >> 1] = + array::from_fn(|i| (i << 5 & 0xF0 | ((i << 1) + 1) & 0x0F) as u8); + keccak.update(&buf); + + assert_eq!( + keccak + .finalize() + .into_iter() + .map(|x| format!("{x:02x}")) + .collect::(), + "337bf14237b641240bd3204e9991c8b96a5349613735ade90a5c2b8806355c11", + ); + } + + #[test] + fn keccak_asm_full_block() { + let mut keccak = Keccak256Asm::new(); + let buf: [u8; BLOCK_SIZE] = + array::from_fn(|i| (i << 5 & 0xF0 | ((i << 1) + 1) & 0x0F) as u8); + keccak.update(&buf); + + assert_eq!( + keccak + .finalize() + .into_iter() + .map(|x| format!("{x:02x}")) + .collect::(), + "3f7424fa94a2f8c5a733b86dac312d85685f9af3dea919694cc6a8abfc075460", + ); + } + + #[test] + fn keccak_asm_almost_full_block() { + let mut keccak = Keccak256Asm::new(); + let buf: [u8; BLOCK_SIZE - 1] = + array::from_fn(|i| (i << 5 & 0xF0 | ((i << 1) + 1) & 0x0F) as u8); + keccak.update(&buf); + + assert_eq!( + keccak + .finalize() + .into_iter() + .map(|x| format!("{x:02x}")) + .collect::(), + "3e4916729e2522af4937548f5848a5b49067eec910a0a6a890b0c71dde08854e", + ); + } + + #[test] + fn keccak_asm_two_half_updates() { + let mut keccak = Keccak256Asm::new(); + + let full: [u8; BLOCK_SIZE] = + array::from_fn(|i| (i << 5 & 0xF0 | ((i << 1) + 1) & 0x0F) as u8); + + let half = BLOCK_SIZE / 2; + + keccak.update(&full[..half]); + keccak.update(&full[half..]); + + let buf = keccak + .finalize() + .into_iter() + .map(|x| format!("{x:02x}")) + .collect::(); + + assert_eq!( + buf, + "3f7424fa94a2f8c5a733b86dac312d85685f9af3dea919694cc6a8abfc075460" + ); + } + + #[test] + fn keccak_compare_one_shot_vs_two_updates() { + let full: Vec = (0..BLOCK_SIZE) + .map(|i| (i << 5 & 0xF0 | ((i << 1) + 1) & 0x0F) as u8) + .collect(); + + let mut k1 = Keccak256Asm::new(); + let mut k2 = Keccak256Asm::new(); + + k1.update(&full); + + k2.update(&full[..BLOCK_SIZE / 2]); + k2.update(&full[BLOCK_SIZE / 2..]); + + let h1 = k1.finalize(); + + let h2 = k2.finalize(); + + assert_eq!(h1, h2); + } } From 36d90047328cacc8f8b2de02afadacc53cd9336a Mon Sep 17 00:00:00 2001 From: Edgar Luque Date: Fri, 7 Nov 2025 09:33:38 +0100 Subject: [PATCH 14/40] inline --- crates/common/crypto/keccak/mod.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/crates/common/crypto/keccak/mod.rs b/crates/common/crypto/keccak/mod.rs index cf1977596d1..e6c4e11147a 100644 --- a/crates/common/crypto/keccak/mod.rs +++ b/crates/common/crypto/keccak/mod.rs @@ -61,6 +61,7 @@ pub struct Keccak256Asm { } impl Keccak256Asm { + #[inline] pub fn new() -> Self { Self { state: State::default(), @@ -69,6 +70,7 @@ impl Keccak256Asm { } } + #[inline] pub fn update(&mut self, mut data: &[u8]) { unsafe { // partial block @@ -118,6 +120,7 @@ impl Keccak256Asm { } } + #[inline] pub fn finalize(mut self) -> [u8; 32] { let mut hash_buf = [0u8; 32]; From 267469b88769a52472d89f09ec523359d20b4ddd Mon Sep 17 00:00:00 2001 From: Edgar Luque Date: Fri, 7 Nov 2025 10:10:12 +0100 Subject: [PATCH 15/40] test --- crates/common/crypto/keccak/mod.rs | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/crates/common/crypto/keccak/mod.rs b/crates/common/crypto/keccak/mod.rs index e6c4e11147a..5913c0c6fe3 100644 --- a/crates/common/crypto/keccak/mod.rs +++ b/crates/common/crypto/keccak/mod.rs @@ -313,4 +313,20 @@ mod test { assert_eq!(h1, h2); } + + #[test] + fn keccac_compare_small_than_block() { + let mut one = Keccak256Asm::new(); + let mut two = Keccak256Asm::new(); + + let a = vec![1u8; 30]; + let b = vec![1u8; 40]; + + one.update(&a); + one.update(&b); + + two.update(&[1u8; 70]); + + assert_eq!(one.finalize(), two.finalize()); + } } From d3b7544a6a92f29478e1a6d4074e25b36c7087b7 Mon Sep 17 00:00:00 2001 From: Edgar Luque Date: Fri, 7 Nov 2025 10:15:59 +0100 Subject: [PATCH 16/40] fixes and use keccak asm --- crates/common/crypto/keccak/mod.rs | 19 ++++++++++++++++--- .../networking/p2p/rlpx/connection/codec.rs | 19 +++++++++---------- 2 files changed, 25 insertions(+), 13 deletions(-) diff --git a/crates/common/crypto/keccak/mod.rs b/crates/common/crypto/keccak/mod.rs index 5913c0c6fe3..0bf1280e9ab 100644 --- a/crates/common/crypto/keccak/mod.rs +++ b/crates/common/crypto/keccak/mod.rs @@ -5,7 +5,7 @@ std::arch::global_asm!(include_str!("keccak1600-x86_64.s"), options(att_syntax)) const BLOCK_SIZE: usize = 136; -#[derive(Default)] +#[derive(Default, Clone, Copy, Debug)] #[repr(transparent)] struct State([u64; 25]); @@ -54,12 +54,23 @@ pub fn keccak_hash(data: impl AsRef<[u8]>) -> [u8; 32] { hash_buf } +#[derive(Debug, Clone, Copy)] pub struct Keccak256Asm { state: State, tail_buf: [u8; BLOCK_SIZE], tail_len: usize, } +impl Default for Keccak256Asm { + fn default() -> Self { + Self { + state: State::default(), + tail_buf: [0; BLOCK_SIZE], + tail_len: 0, + } + } +} + impl Keccak256Asm { #[inline] pub fn new() -> Self { @@ -71,7 +82,8 @@ impl Keccak256Asm { } #[inline] - pub fn update(&mut self, mut data: &[u8]) { + pub fn update(&mut self, data: impl AsRef<[u8]>) -> Self { + let mut data = data.as_ref(); unsafe { // partial block if self.tail_len > 0 { @@ -80,7 +92,7 @@ impl Keccak256Asm { // still partial block self.tail_buf[self.tail_len..self.tail_len + data.len()].copy_from_slice(data); self.tail_len += data.len(); - return; + return *self; } // complete block @@ -118,6 +130,7 @@ impl Keccak256Asm { } }, } + *self } #[inline] diff --git a/crates/networking/p2p/rlpx/connection/codec.rs b/crates/networking/p2p/rlpx/connection/codec.rs index 1874a3f3d0e..7c35880f761 100644 --- a/crates/networking/p2p/rlpx/connection/codec.rs +++ b/crates/networking/p2p/rlpx/connection/codec.rs @@ -16,9 +16,8 @@ use ethrex_common::{ H128, H256, utils::{keccak, truncate_array}, }; -use ethrex_crypto::keccak::keccak_hash; +use ethrex_crypto::keccak::{Keccak256Asm, keccak_hash}; use ethrex_rlp::{decode::RLPDecode, encode::RLPEncode as _}; -use sha3::{Digest as _, Keccak256}; use tokio::io::{AsyncRead, AsyncWrite}; use tokio_util::codec::{Decoder, Encoder, Framed}; @@ -30,8 +29,8 @@ type Aes256Ctr64BE = ctr::Ctr64BE; pub struct RLPxCodec { pub(crate) mac_key: H256, - pub(crate) ingress_mac: Keccak256, - pub(crate) egress_mac: Keccak256, + pub(crate) ingress_mac: Keccak256Asm, + pub(crate) egress_mac: Keccak256Asm, pub(crate) ingress_aes: Aes256Ctr64BE, pub(crate) egress_aes: Aes256Ctr64BE, pub(crate) eth_version: Arc>, @@ -61,14 +60,14 @@ impl RLPxCodec { let mac_key = keccak([ephemeral_key_secret, aes_key.0].concat()); // egress-mac = keccak256.init((mac-secret ^ remote-nonce) || auth) - let egress_mac = Keccak256::default() - .chain_update(mac_key ^ remote_state.nonce) - .chain_update(&local_state.init_message); + let egress_mac = Keccak256Asm::default() + .update(mac_key ^ remote_state.nonce) + .update(&local_state.init_message); // ingress-mac = keccak256.init((mac-secret ^ initiator-nonce) || ack) - let ingress_mac = Keccak256::default() - .chain_update(mac_key ^ local_state.nonce) - .chain_update(&remote_state.init_message); + let ingress_mac = Keccak256Asm::default() + .update(mac_key ^ local_state.nonce) + .update(&remote_state.init_message); let ingress_aes = ::new(&aes_key.0.into(), &[0; 16].into()); let egress_aes = ingress_aes.clone(); From b7828eaf2a71a85e4b2687858618db5f88d4bf51 Mon Sep 17 00:00:00 2001 From: Edgar Luque Date: Fri, 7 Nov 2025 12:24:11 +0100 Subject: [PATCH 17/40] remove sha3 from p2p --- Cargo.lock | 1 - crates/networking/p2p/Cargo.toml | 1 - crates/networking/p2p/rlpx/error.rs | 5 +++-- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e02aeb07aed..1a4b87607b6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3622,7 +3622,6 @@ dependencies = [ "serde", "serde_json", "sha2", - "sha3", "snap", "spawned-concurrency", "spawned-rt", diff --git a/crates/networking/p2p/Cargo.toml b/crates/networking/p2p/Cargo.toml index 0e3968abeca..f026745a83e 100644 --- a/crates/networking/p2p/Cargo.toml +++ b/crates/networking/p2p/Cargo.toml @@ -39,7 +39,6 @@ prometheus = "0.14.0" tokio-stream = "0.1.17" # Used for incremental hashing due to MAC, intentionally not from workspace. -sha3 = { version = "0.10.8", features = [ "asm" ] } serde_json = "1.0.117" diff --git a/crates/networking/p2p/rlpx/error.rs b/crates/networking/p2p/rlpx/error.rs index fe5e1db9426..f1c510a1916 100644 --- a/crates/networking/p2p/rlpx/error.rs +++ b/crates/networking/p2p/rlpx/error.rs @@ -1,5 +1,6 @@ use super::{message::Message, p2p::DisconnectReason}; use crate::discv4::peer_table::PeerTableError; +use aes::cipher::InvalidLength; use ethrex_blockchain::error::{ChainError, MempoolError}; use ethrex_rlp::error::{RLPDecodeError, RLPEncodeError}; use ethrex_storage::error::StoreError; @@ -107,8 +108,8 @@ impl From for PeerConnectionError { } } -impl From for PeerConnectionError { - fn from(e: sha3::digest::InvalidLength) -> Self { +impl From for PeerConnectionError { + fn from(e: InvalidLength) -> Self { PeerConnectionError::CryptographyError(e.to_string()) } } From 94022b26b57af6faaec635adbfdda9b9f6060cac Mon Sep 17 00:00:00 2001 From: Edgar Luque Date: Fri, 7 Nov 2025 12:25:20 +0100 Subject: [PATCH 18/40] fix duplicate keys --- tooling/Cargo.lock | 20 +++++++++++--------- tooling/Cargo.toml | 1 - 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/tooling/Cargo.lock b/tooling/Cargo.lock index 34d4b2965ba..c3860c604b3 100644 --- a/tooling/Cargo.lock +++ b/tooling/Cargo.lock @@ -3162,6 +3162,8 @@ dependencies = [ "secp256k1 0.30.0", "serde", "serde_json", + "spawned-concurrency", + "spawned-rt", "thiserror 2.0.17", "tikv-jemallocator", "tokio", @@ -3179,6 +3181,7 @@ version = "5.0.0" dependencies = [ "bytes", "ethrex-common 5.0.0", + "ethrex-crypto", "ethrex-metrics", "ethrex-rlp 5.0.0", "ethrex-storage 5.0.0", @@ -3187,7 +3190,6 @@ dependencies = [ "hex", "rustc-hash 2.1.1", "secp256k1 0.30.0", - "sha3", "thiserror 2.0.17", "tokio", "tokio-util", @@ -3243,7 +3245,6 @@ dependencies = [ "serde", "serde_json", "sha2 0.10.9", - "sha3", "thiserror 2.0.17", "tinyvec", "tracing", @@ -3353,6 +3354,7 @@ dependencies = [ "bytes", "ethereum-types 0.15.1", "ethrex-common 5.0.0", + "ethrex-crypto", "ethrex-rlp 5.0.0", "ethrex-storage 5.0.0", "ethrex-trie 5.0.0", @@ -3363,7 +3365,6 @@ dependencies = [ "secp256k1 0.30.0", "serde", "serde_with", - "sha3", "thiserror 2.0.17", ] @@ -3422,7 +3423,6 @@ dependencies = [ "serde", "serde_json", "sha2 0.10.9", - "sha3", "strum 0.27.2", "thiserror 2.0.17", "walkdir", @@ -3456,6 +3456,7 @@ dependencies = [ "ethereum-types 0.15.1", "ethrex-blockchain", "ethrex-common 5.0.0", + "ethrex-crypto", "ethrex-rlp 5.0.0", "ethrex-storage 5.0.0", "ethrex-storage-rollup", @@ -3474,7 +3475,6 @@ dependencies = [ "serde", "serde_json", "sha2 0.10.9", - "sha3", "snap", "spawned-concurrency", "spawned-rt", @@ -3556,6 +3556,7 @@ dependencies = [ "ethereum-types 0.15.1", "ethrex-blockchain", "ethrex-common 5.0.0", + "ethrex-crypto", "ethrex-p2p", "ethrex-rlp 5.0.0", "ethrex-storage 5.0.0", @@ -3569,7 +3570,8 @@ dependencies = [ "serde", "serde_json", "sha2 0.10.9", - "sha3", + "spawned-concurrency", + "spawned-rt", "thiserror 2.0.17", "tokio", "tokio-util", @@ -3645,6 +3647,7 @@ dependencies = [ "bytes", "ethereum-types 0.15.1", "ethrex-common 5.0.0", + "ethrex-crypto", "ethrex-rlp 5.0.0", "ethrex-trie 5.0.0", "hex", @@ -3654,7 +3657,6 @@ dependencies = [ "rustc-hash 2.1.1", "serde", "serde_json", - "sha3", "thiserror 2.0.17", "tokio", "tracing", @@ -3716,6 +3718,7 @@ dependencies = [ "crossbeam 0.8.4", "digest 0.10.7", "ethereum-types 0.15.1", + "ethrex-crypto", "ethrex-rlp 5.0.0", "ethrex-threadpool", "hex", @@ -3723,7 +3726,6 @@ dependencies = [ "rocksdb", "serde", "serde_json", - "sha3", "smallvec", "thiserror 2.0.17", "tracing", @@ -3739,6 +3741,7 @@ dependencies = [ "dyn-clone", "ethereum-types 0.15.1", "ethrex-common 5.0.0", + "ethrex-crypto", "ethrex-levm", "ethrex-rlp 5.0.0", "ethrex-trie 5.0.0", @@ -3746,7 +3749,6 @@ dependencies = [ "lazy_static", "rkyv", "serde", - "sha3", "thiserror 2.0.17", "tracing", ] diff --git a/tooling/Cargo.toml b/tooling/Cargo.toml index 60a6b934771..9b0a056937d 100644 --- a/tooling/Cargo.toml +++ b/tooling/Cargo.toml @@ -31,7 +31,6 @@ ethrex-vm = { path = "../crates/vm", default-features = false } ethrex-levm = { path = "../crates/vm/levm" } ethrex-trie = { path = "../crates/common/trie" } ethrex-rlp = { path = "../crates/common/rlp" } -ethrex-crypto = { path = "../crates/common/crypto" } ethrex-l2 = { path = "../crates/l2" } ethrex-l2-common = { path = "../crates/l2/common" } ethrex-sdk = { path = "../crates/l2/sdk" } From 3358cb6f91a0fd466f1ff5de69cd290423cb0cea Mon Sep 17 00:00:00 2001 From: Edgar Luque Date: Fri, 7 Nov 2025 13:02:37 +0100 Subject: [PATCH 19/40] rename --- crates/common/crypto/keccak/mod.rs | 24 +++++++++---------- .../networking/p2p/rlpx/connection/codec.rs | 10 ++++---- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/crates/common/crypto/keccak/mod.rs b/crates/common/crypto/keccak/mod.rs index 0bf1280e9ab..b3cd6139cfc 100644 --- a/crates/common/crypto/keccak/mod.rs +++ b/crates/common/crypto/keccak/mod.rs @@ -55,13 +55,13 @@ pub fn keccak_hash(data: impl AsRef<[u8]>) -> [u8; 32] { } #[derive(Debug, Clone, Copy)] -pub struct Keccak256Asm { +pub struct Keccak256 { state: State, tail_buf: [u8; BLOCK_SIZE], tail_len: usize, } -impl Default for Keccak256Asm { +impl Default for Keccak256 { fn default() -> Self { Self { state: State::default(), @@ -71,7 +71,7 @@ impl Default for Keccak256Asm { } } -impl Keccak256Asm { +impl Keccak256 { #[inline] pub fn new() -> Self { Self { @@ -220,7 +220,7 @@ mod test { #[test] fn keccak_asm_empty() { - let keccak = Keccak256Asm::new(); + let keccak = Keccak256::new(); assert_eq!( keccak .finalize() @@ -233,7 +233,7 @@ mod test { #[test] fn keccak_asm_half_block() { - let mut keccak = Keccak256Asm::new(); + let mut keccak = Keccak256::new(); let buf: [u8; BLOCK_SIZE >> 1] = array::from_fn(|i| (i << 5 & 0xF0 | ((i << 1) + 1) & 0x0F) as u8); keccak.update(&buf); @@ -250,7 +250,7 @@ mod test { #[test] fn keccak_asm_full_block() { - let mut keccak = Keccak256Asm::new(); + let mut keccak = Keccak256::new(); let buf: [u8; BLOCK_SIZE] = array::from_fn(|i| (i << 5 & 0xF0 | ((i << 1) + 1) & 0x0F) as u8); keccak.update(&buf); @@ -267,7 +267,7 @@ mod test { #[test] fn keccak_asm_almost_full_block() { - let mut keccak = Keccak256Asm::new(); + let mut keccak = Keccak256::new(); let buf: [u8; BLOCK_SIZE - 1] = array::from_fn(|i| (i << 5 & 0xF0 | ((i << 1) + 1) & 0x0F) as u8); keccak.update(&buf); @@ -284,7 +284,7 @@ mod test { #[test] fn keccak_asm_two_half_updates() { - let mut keccak = Keccak256Asm::new(); + let mut keccak = Keccak256::new(); let full: [u8; BLOCK_SIZE] = array::from_fn(|i| (i << 5 & 0xF0 | ((i << 1) + 1) & 0x0F) as u8); @@ -312,8 +312,8 @@ mod test { .map(|i| (i << 5 & 0xF0 | ((i << 1) + 1) & 0x0F) as u8) .collect(); - let mut k1 = Keccak256Asm::new(); - let mut k2 = Keccak256Asm::new(); + let mut k1 = Keccak256::new(); + let mut k2 = Keccak256::new(); k1.update(&full); @@ -329,8 +329,8 @@ mod test { #[test] fn keccac_compare_small_than_block() { - let mut one = Keccak256Asm::new(); - let mut two = Keccak256Asm::new(); + let mut one = Keccak256::new(); + let mut two = Keccak256::new(); let a = vec![1u8; 30]; let b = vec![1u8; 40]; diff --git a/crates/networking/p2p/rlpx/connection/codec.rs b/crates/networking/p2p/rlpx/connection/codec.rs index 7c35880f761..e39d49e1781 100644 --- a/crates/networking/p2p/rlpx/connection/codec.rs +++ b/crates/networking/p2p/rlpx/connection/codec.rs @@ -16,7 +16,7 @@ use ethrex_common::{ H128, H256, utils::{keccak, truncate_array}, }; -use ethrex_crypto::keccak::{Keccak256Asm, keccak_hash}; +use ethrex_crypto::keccak::{Keccak256, keccak_hash}; use ethrex_rlp::{decode::RLPDecode, encode::RLPEncode as _}; use tokio::io::{AsyncRead, AsyncWrite}; use tokio_util::codec::{Decoder, Encoder, Framed}; @@ -29,8 +29,8 @@ type Aes256Ctr64BE = ctr::Ctr64BE; pub struct RLPxCodec { pub(crate) mac_key: H256, - pub(crate) ingress_mac: Keccak256Asm, - pub(crate) egress_mac: Keccak256Asm, + pub(crate) ingress_mac: Keccak256, + pub(crate) egress_mac: Keccak256, pub(crate) ingress_aes: Aes256Ctr64BE, pub(crate) egress_aes: Aes256Ctr64BE, pub(crate) eth_version: Arc>, @@ -60,12 +60,12 @@ impl RLPxCodec { let mac_key = keccak([ephemeral_key_secret, aes_key.0].concat()); // egress-mac = keccak256.init((mac-secret ^ remote-nonce) || auth) - let egress_mac = Keccak256Asm::default() + let egress_mac = Keccak256::default() .update(mac_key ^ remote_state.nonce) .update(&local_state.init_message); // ingress-mac = keccak256.init((mac-secret ^ initiator-nonce) || ack) - let ingress_mac = Keccak256Asm::default() + let ingress_mac = Keccak256::default() .update(mac_key ^ local_state.nonce) .update(&remote_state.init_message); From abb40c9f214449f2a5a5df47da0ff943f7dbb80f Mon Sep 17 00:00:00 2001 From: Edgar Luque Date: Fri, 7 Nov 2025 13:11:46 +0100 Subject: [PATCH 20/40] lint --- crates/common/crypto/keccak/mod.rs | 8 ++++---- crates/common/utils.rs | 4 ++-- .../networking/p2p/rlpx/connection/codec.rs | 19 +++++++++---------- crates/vm/levm/src/precompiles.rs | 1 - 4 files changed, 15 insertions(+), 17 deletions(-) diff --git a/crates/common/crypto/keccak/mod.rs b/crates/common/crypto/keccak/mod.rs index b3cd6139cfc..64452acbe11 100644 --- a/crates/common/crypto/keccak/mod.rs +++ b/crates/common/crypto/keccak/mod.rs @@ -236,7 +236,7 @@ mod test { let mut keccak = Keccak256::new(); let buf: [u8; BLOCK_SIZE >> 1] = array::from_fn(|i| (i << 5 & 0xF0 | ((i << 1) + 1) & 0x0F) as u8); - keccak.update(&buf); + keccak.update(buf); assert_eq!( keccak @@ -253,7 +253,7 @@ mod test { let mut keccak = Keccak256::new(); let buf: [u8; BLOCK_SIZE] = array::from_fn(|i| (i << 5 & 0xF0 | ((i << 1) + 1) & 0x0F) as u8); - keccak.update(&buf); + keccak.update(buf); assert_eq!( keccak @@ -270,7 +270,7 @@ mod test { let mut keccak = Keccak256::new(); let buf: [u8; BLOCK_SIZE - 1] = array::from_fn(|i| (i << 5 & 0xF0 | ((i << 1) + 1) & 0x0F) as u8); - keccak.update(&buf); + keccak.update(buf); assert_eq!( keccak @@ -338,7 +338,7 @@ mod test { one.update(&a); one.update(&b); - two.update(&[1u8; 70]); + two.update([1u8; 70]); assert_eq!(one.finalize(), two.finalize()); } diff --git a/crates/common/utils.rs b/crates/common/utils.rs index 7ae2d03793c..2da129c2df0 100644 --- a/crates/common/utils.rs +++ b/crates/common/utils.rs @@ -68,8 +68,8 @@ pub fn keccak(data: impl AsRef<[u8]>) -> H256 { H256(keccak_hash(data)) } -/// Allocation-free operations on arrays. - +// Allocation-free operations on arrays. +/// /// Truncates an array of size N to size M. /// Fails compilation if N < M. pub fn truncate_array(data: [u8; N]) -> [u8; M] { diff --git a/crates/networking/p2p/rlpx/connection/codec.rs b/crates/networking/p2p/rlpx/connection/codec.rs index e39d49e1781..c14dd05cee5 100644 --- a/crates/networking/p2p/rlpx/connection/codec.rs +++ b/crates/networking/p2p/rlpx/connection/codec.rs @@ -121,7 +121,7 @@ impl Decoder for RLPxCodec { // Validate MAC header // header-mac-seed = aes(mac-secret, keccak256.digest(egress-mac)[:16]) ^ header-ciphertext let header_mac_seed = { - let mac_digest: [u8; 16] = truncate_array(self.ingress_mac.clone().finalize().into()); + let mac_digest: [u8; 16] = truncate_array(self.ingress_mac.finalize()); let mut seed = mac_digest.into(); mac_aes_cipher.encrypt_block(&mut seed); (H128(seed.into()) @@ -135,11 +135,11 @@ impl Decoder for RLPxCodec { // ingress-mac = keccak256.update(ingress-mac, header-mac-seed) // Use temporary value as it can be discarded if the buffer does not contain yet the full message - let mut temp_ingress_mac = self.ingress_mac.clone(); + let mut temp_ingress_mac = self.ingress_mac; temp_ingress_mac.update(header_mac_seed); // header-mac = keccak256.digest(egress-mac)[:16] - let expected_header_mac = H128(truncate_array(temp_ingress_mac.clone().finalize().into())); + let expected_header_mac = H128(truncate_array(temp_ingress_mac.finalize())); if header_mac != expected_header_mac.0 { return Err(PeerConnectionError::InvalidMessageFrame( @@ -205,14 +205,13 @@ impl Decoder for RLPxCodec { // check MAC self.ingress_mac.update(&frame_ciphertext); let frame_mac_seed = { - let mac_digest: [u8; 16] = truncate_array(self.ingress_mac.clone().finalize().into()); + let mac_digest: [u8; 16] = truncate_array(self.ingress_mac.finalize()); let mut seed = mac_digest.into(); mac_aes_cipher.encrypt_block(&mut seed); (H128(seed.into()) ^ H128(mac_digest)).0 }; self.ingress_mac.update(frame_mac_seed); - let expected_frame_mac: [u8; 16] = - truncate_array(self.ingress_mac.clone().finalize().into()); + let expected_frame_mac: [u8; 16] = truncate_array(self.ingress_mac.finalize()); if frame_mac != expected_frame_mac { return Err(PeerConnectionError::InvalidMessageFrame( @@ -294,7 +293,7 @@ impl Encoder for RLPxCodec { })?)?; let header_mac_seed = { - let mac_digest: [u8; 16] = truncate_array(self.egress_mac.clone().finalize().into()); + let mac_digest: [u8; 16] = truncate_array(self.egress_mac.finalize()); let mut seed = mac_digest.into(); mac_aes_cipher.encrypt_block(&mut seed); let header_data = header @@ -309,7 +308,7 @@ impl Encoder for RLPxCodec { H128(seed.into()) ^ H128(header_data) }; self.egress_mac.update(header_mac_seed); - let header_mac = self.egress_mac.clone().finalize().into(); + let header_mac = self.egress_mac.finalize(); let header_mac_data: [u8; 16] = truncate_array(header_mac); header.extend_from_slice(&header_mac_data); @@ -329,13 +328,13 @@ impl Encoder for RLPxCodec { // frame-mac-seed = aes(mac-secret, keccak256.digest(egress-mac)[:16]) ^ keccak256.digest(egress-mac)[:16] let frame_mac_seed = { - let mac_digest: [u8; 16] = truncate_array(self.egress_mac.clone().finalize().into()); + let mac_digest: [u8; 16] = truncate_array(self.egress_mac.finalize()); let mut seed = mac_digest.into(); mac_aes_cipher.encrypt_block(&mut seed); (H128(seed.into()) ^ H128(mac_digest)).0 }; self.egress_mac.update(frame_mac_seed); - let frame_mac = self.egress_mac.clone().finalize(); + let frame_mac = self.egress_mac.finalize(); // Write frame-mac buffer.extend_from_slice(&frame_mac[..16]); diff --git a/crates/vm/levm/src/precompiles.rs b/crates/vm/levm/src/precompiles.rs index 6119d6adf04..856e52844c0 100644 --- a/crates/vm/levm/src/precompiles.rs +++ b/crates/vm/levm/src/precompiles.rs @@ -422,7 +422,6 @@ pub fn ecrecover(calldata: &Bytes, gas_remaining: &mut u64, _fork: Fork) -> Resu ethrex_crypto::keccak::keccak_hash(&public_key.serialize_uncompressed()[1..]); // Address is the last 20 bytes of the hash. - #[expect(clippy::indexing_slicing)] let recovered_address_bytes = &public_key_hash[12..]; let mut out = [0u8; 32]; From 779211e4a420c380634bec5b3f5862aac229116d Mon Sep 17 00:00:00 2001 From: Edgar Luque Date: Fri, 7 Nov 2025 17:06:31 +0100 Subject: [PATCH 21/40] add fallback --- Cargo.lock | 1 + crates/common/crypto/Cargo.toml | 4 +- crates/common/crypto/keccak/mod.rs | 295 +++++++++++------- .../networking/p2p/rlpx/connection/codec.rs | 24 +- 4 files changed, 191 insertions(+), 133 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1a4b87607b6..7855e69b2ad 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3410,6 +3410,7 @@ dependencies = [ "c-kzg", "kzg-rs", "thiserror 2.0.17", + "tiny-keccak", ] [[package]] diff --git a/crates/common/crypto/Cargo.toml b/crates/common/crypto/Cargo.toml index c9e9c761904..7e4ee6e17aa 100644 --- a/crates/common/crypto/Cargo.toml +++ b/crates/common/crypto/Cargo.toml @@ -15,9 +15,11 @@ c-kzg = { version = "2.1.1", default-features = false, optional = true, features "std", ] } kzg-rs.workspace = true - thiserror.workspace = true +#[target.'cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))'.dependencies] +tiny-keccak = { version = "2.0.2", features = ["keccak"] } + [features] default = [] c-kzg = ["dep:c-kzg"] diff --git a/crates/common/crypto/keccak/mod.rs b/crates/common/crypto/keccak/mod.rs index 64452acbe11..4227cc4d561 100644 --- a/crates/common/crypto/keccak/mod.rs +++ b/crates/common/crypto/keccak/mod.rs @@ -3,100 +3,148 @@ std::arch::global_asm!(include_str!("keccak1600-armv8.s"), options(raw)); #[cfg(target_arch = "x86_64")] std::arch::global_asm!(include_str!("keccak1600-x86_64.s"), options(att_syntax)); -const BLOCK_SIZE: usize = 136; +pub use imp::*; -#[derive(Default, Clone, Copy, Debug)] -#[repr(transparent)] -struct State([u64; 25]); +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] +mod imp { + const BLOCK_SIZE: usize = 136; -unsafe extern "C" { - #[link_name = "SHA3_absorb"] - unsafe fn SHA3_absorb(state: *mut State, buf: *const u8, len: usize, r: usize) -> usize; - unsafe fn SHA3_squeeze(state: *mut State, buf: *mut u8, len: usize, r: usize); -} + #[derive(Default, Clone, Copy)] + #[repr(transparent)] + struct State([u64; 25]); -pub fn keccak_hash(data: impl AsRef<[u8]>) -> [u8; 32] { - let mut state = State::default(); - let mut tail_buf = [0; BLOCK_SIZE]; - let mut hash_buf = [0; 32]; - - let tail_len; - match data.as_ref() { - [] => tail_len = 0, - data if data.len() < BLOCK_SIZE => unsafe { - tail_len = data.len(); - tail_buf.get_unchecked_mut(..tail_len).copy_from_slice(data); - }, - data => unsafe { - tail_len = SHA3_absorb(&mut state, data.as_ptr(), data.len(), BLOCK_SIZE); - if tail_len != 0 { - let tail_data = data.get_unchecked(data.len() - tail_len..); - tail_buf - .get_unchecked_mut(..tail_len) - .copy_from_slice(tail_data); - } - }, + unsafe extern "C" { + #[link_name = "SHA3_absorb"] + unsafe fn SHA3_absorb(state: *mut State, buf: *const u8, len: usize, r: usize) -> usize; + unsafe fn SHA3_squeeze(state: *mut State, buf: *mut u8, len: usize, r: usize); } - unsafe { - *tail_buf.get_unchecked_mut(tail_len) = 0x01; - *tail_buf.get_unchecked_mut(BLOCK_SIZE - 1) |= 0x80; - - SHA3_absorb(&mut state, tail_buf.as_ptr(), tail_buf.len(), BLOCK_SIZE); - SHA3_squeeze( - &mut state, - hash_buf.as_mut_ptr(), - hash_buf.len(), - BLOCK_SIZE, - ); - } + pub fn keccak_hash(data: impl AsRef<[u8]>) -> [u8; 32] { + let mut state = State::default(); + let mut tail_buf = [0; BLOCK_SIZE]; + let mut hash_buf = [0; 32]; - hash_buf -} + let tail_len; + match data.as_ref() { + [] => tail_len = 0, + data if data.len() < BLOCK_SIZE => unsafe { + tail_len = data.len(); + tail_buf.get_unchecked_mut(..tail_len).copy_from_slice(data); + }, + data => unsafe { + tail_len = SHA3_absorb(&mut state, data.as_ptr(), data.len(), BLOCK_SIZE); + if tail_len != 0 { + let tail_data = data.get_unchecked(data.len() - tail_len..); + tail_buf + .get_unchecked_mut(..tail_len) + .copy_from_slice(tail_data); + } + }, + } -#[derive(Debug, Clone, Copy)] -pub struct Keccak256 { - state: State, - tail_buf: [u8; BLOCK_SIZE], - tail_len: usize, -} + unsafe { + *tail_buf.get_unchecked_mut(tail_len) = 0x01; + *tail_buf.get_unchecked_mut(BLOCK_SIZE - 1) |= 0x80; -impl Default for Keccak256 { - fn default() -> Self { - Self { - state: State::default(), - tail_buf: [0; BLOCK_SIZE], - tail_len: 0, + SHA3_absorb(&mut state, tail_buf.as_ptr(), tail_buf.len(), BLOCK_SIZE); + SHA3_squeeze( + &mut state, + hash_buf.as_mut_ptr(), + hash_buf.len(), + BLOCK_SIZE, + ); } + + hash_buf + } + + #[derive(Clone)] + pub struct Keccak256 { + state: State, + tail_buf: [u8; BLOCK_SIZE], + tail_len: usize, } -} -impl Keccak256 { - #[inline] - pub fn new() -> Self { - Self { - state: State::default(), - tail_buf: [0; BLOCK_SIZE], - tail_len: 0, + impl Default for Keccak256 { + fn default() -> Self { + Self { + state: State::default(), + tail_buf: [0; BLOCK_SIZE], + tail_len: 0, + } } } - #[inline] - pub fn update(&mut self, data: impl AsRef<[u8]>) -> Self { - let mut data = data.as_ref(); - unsafe { - // partial block - if self.tail_len > 0 { - let need = BLOCK_SIZE - self.tail_len; - if data.len() < need { - // still partial block - self.tail_buf[self.tail_len..self.tail_len + data.len()].copy_from_slice(data); - self.tail_len += data.len(); - return *self; + impl Keccak256 { + #[inline] + pub fn new() -> Self { + Self { + state: State::default(), + tail_buf: [0; BLOCK_SIZE], + tail_len: 0, + } + } + + #[inline] + pub fn update(&mut self, data: impl AsRef<[u8]>) -> Self { + let mut data = data.as_ref(); + unsafe { + // partial block + if self.tail_len > 0 { + let need = BLOCK_SIZE - self.tail_len; + if data.len() < need { + // still partial block + self.tail_buf[self.tail_len..self.tail_len + data.len()] + .copy_from_slice(data); + self.tail_len += data.len(); + return self.clone(); + } + + // complete block + self.tail_buf[self.tail_len..BLOCK_SIZE].copy_from_slice(&data[..need]); + + SHA3_absorb( + &mut self.state, + self.tail_buf.as_ptr(), + self.tail_buf.len(), + BLOCK_SIZE, + ); + + self.tail_len = 0; + self.tail_buf.fill(0); + data = &data[need..]; } + } - // complete block - self.tail_buf[self.tail_len..BLOCK_SIZE].copy_from_slice(&data[..need]); + match data { + [] => {} + data if data.len() < BLOCK_SIZE => unsafe { + self.tail_len = data.len(); + self.tail_buf + .get_unchecked_mut(..self.tail_len) + .copy_from_slice(data); + }, + data => unsafe { + let rem = SHA3_absorb(&mut self.state, data.as_ptr(), data.len(), BLOCK_SIZE); + self.tail_len = rem; + if rem != 0 { + let tail_data = data.get_unchecked(data.len() - rem..); + self.tail_buf + .get_unchecked_mut(..rem) + .copy_from_slice(tail_data); + } + }, + } + self.clone() + } + + #[inline] + pub fn finalize(mut self) -> [u8; 32] { + let mut hash_buf = [0u8; 32]; + + unsafe { + *self.tail_buf.get_unchecked_mut(self.tail_len) = 0x01; + *self.tail_buf.get_unchecked_mut(BLOCK_SIZE - 1) |= 0x80; SHA3_absorb( &mut self.state, @@ -105,58 +153,63 @@ impl Keccak256 { BLOCK_SIZE, ); - self.tail_len = 0; - self.tail_buf.fill(0); - data = &data[need..]; + SHA3_squeeze( + &mut self.state, + hash_buf.as_mut_ptr(), + hash_buf.len(), + BLOCK_SIZE, + ); } - } - match data { - [] => {} - data if data.len() < BLOCK_SIZE => unsafe { - self.tail_len = data.len(); - self.tail_buf - .get_unchecked_mut(..self.tail_len) - .copy_from_slice(data); - }, - data => unsafe { - let rem = SHA3_absorb(&mut self.state, data.as_ptr(), data.len(), BLOCK_SIZE); - self.tail_len = rem; - if rem != 0 { - let tail_data = data.get_unchecked(data.len() - rem..); - self.tail_buf - .get_unchecked_mut(..rem) - .copy_from_slice(tail_data); - } - }, + hash_buf } - *self } +} - #[inline] - pub fn finalize(mut self) -> [u8; 32] { - let mut hash_buf = [0u8; 32]; +#[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))] +mod imp { + use tiny_keccak::{Hasher, Keccak}; - unsafe { - *self.tail_buf.get_unchecked_mut(self.tail_len) = 0x01; - *self.tail_buf.get_unchecked_mut(BLOCK_SIZE - 1) |= 0x80; + pub fn keccak_hash(data: impl AsRef<[u8]>) -> [u8; 32] { + let mut out = [0u8; 32]; + let mut h = Keccak::v256(); + h.update(data.as_ref()); + h.finalize(&mut out); + out + } - SHA3_absorb( - &mut self.state, - self.tail_buf.as_ptr(), - self.tail_buf.len(), - BLOCK_SIZE, - ); + #[derive(Clone)] + pub struct Keccak256 { + h: Keccak, + } - SHA3_squeeze( - &mut self.state, - hash_buf.as_mut_ptr(), - hash_buf.len(), - BLOCK_SIZE, - ); + impl Default for Keccak256 { + fn default() -> Self { + Self::new() + } + } + + impl Keccak256 { + #[inline] + pub fn new() -> Self { + Self { h: Keccak::v256() } } - hash_buf + #[inline] + pub fn update(&mut self, data: impl AsRef<[u8]>) -> Self { + let d = data.as_ref(); + if !d.is_empty() { + self.h.update(d); + } + self.clone() + } + + #[inline] + pub fn finalize(self) -> [u8; 32] { + let mut out = [0u8; 32]; + self.h.finalize(&mut out); + out + } } } @@ -165,6 +218,8 @@ mod test { use super::*; use std::array; + const BLOCK_SIZE: usize = 136; + #[test] fn keccak_empty() { assert_eq!( diff --git a/crates/networking/p2p/rlpx/connection/codec.rs b/crates/networking/p2p/rlpx/connection/codec.rs index c14dd05cee5..c3315801e51 100644 --- a/crates/networking/p2p/rlpx/connection/codec.rs +++ b/crates/networking/p2p/rlpx/connection/codec.rs @@ -87,8 +87,8 @@ impl std::fmt::Debug for RLPxCodec { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("RLPxCodec") .field("mac_key", &self.mac_key) - .field("ingress_mac", &self.ingress_mac) - .field("egress_mac", &self.egress_mac) + .field("ingress_mac", &"ingress_mac") + .field("egress_mac", &"egress_mac") .field("ingress_aes", &"Aes256Ctr64BE") .field("egress_aes", &"Aes256Ctr64BE") .field("eth_version", &self.eth_version) @@ -121,7 +121,7 @@ impl Decoder for RLPxCodec { // Validate MAC header // header-mac-seed = aes(mac-secret, keccak256.digest(egress-mac)[:16]) ^ header-ciphertext let header_mac_seed = { - let mac_digest: [u8; 16] = truncate_array(self.ingress_mac.finalize()); + let mac_digest: [u8; 16] = truncate_array(self.ingress_mac.clone().finalize()); let mut seed = mac_digest.into(); mac_aes_cipher.encrypt_block(&mut seed); (H128(seed.into()) @@ -135,11 +135,11 @@ impl Decoder for RLPxCodec { // ingress-mac = keccak256.update(ingress-mac, header-mac-seed) // Use temporary value as it can be discarded if the buffer does not contain yet the full message - let mut temp_ingress_mac = self.ingress_mac; + let mut temp_ingress_mac = self.ingress_mac.clone(); temp_ingress_mac.update(header_mac_seed); // header-mac = keccak256.digest(egress-mac)[:16] - let expected_header_mac = H128(truncate_array(temp_ingress_mac.finalize())); + let expected_header_mac = H128(truncate_array(temp_ingress_mac.clone().finalize())); if header_mac != expected_header_mac.0 { return Err(PeerConnectionError::InvalidMessageFrame( @@ -193,7 +193,7 @@ impl Decoder for RLPxCodec { src.advance(total_message_size); // The buffer contains the full message and will be consumed; update the ingress_mac and aes values - self.ingress_mac = temp_ingress_mac; + self.ingress_mac = temp_ingress_mac.clone(); self.ingress_aes = temp_ingress_aes; let (frame_ciphertext, frame_mac) = frame_data @@ -205,13 +205,13 @@ impl Decoder for RLPxCodec { // check MAC self.ingress_mac.update(&frame_ciphertext); let frame_mac_seed = { - let mac_digest: [u8; 16] = truncate_array(self.ingress_mac.finalize()); + let mac_digest: [u8; 16] = truncate_array(self.ingress_mac.clone().finalize()); let mut seed = mac_digest.into(); mac_aes_cipher.encrypt_block(&mut seed); (H128(seed.into()) ^ H128(mac_digest)).0 }; self.ingress_mac.update(frame_mac_seed); - let expected_frame_mac: [u8; 16] = truncate_array(self.ingress_mac.finalize()); + let expected_frame_mac: [u8; 16] = truncate_array(self.ingress_mac.clone().finalize()); if frame_mac != expected_frame_mac { return Err(PeerConnectionError::InvalidMessageFrame( @@ -293,7 +293,7 @@ impl Encoder for RLPxCodec { })?)?; let header_mac_seed = { - let mac_digest: [u8; 16] = truncate_array(self.egress_mac.finalize()); + let mac_digest: [u8; 16] = truncate_array(self.egress_mac.clone().finalize()); let mut seed = mac_digest.into(); mac_aes_cipher.encrypt_block(&mut seed); let header_data = header @@ -308,7 +308,7 @@ impl Encoder for RLPxCodec { H128(seed.into()) ^ H128(header_data) }; self.egress_mac.update(header_mac_seed); - let header_mac = self.egress_mac.finalize(); + let header_mac = self.egress_mac.clone().finalize(); let header_mac_data: [u8; 16] = truncate_array(header_mac); header.extend_from_slice(&header_mac_data); @@ -328,13 +328,13 @@ impl Encoder for RLPxCodec { // frame-mac-seed = aes(mac-secret, keccak256.digest(egress-mac)[:16]) ^ keccak256.digest(egress-mac)[:16] let frame_mac_seed = { - let mac_digest: [u8; 16] = truncate_array(self.egress_mac.finalize()); + let mac_digest: [u8; 16] = truncate_array(self.egress_mac.clone().finalize()); let mut seed = mac_digest.into(); mac_aes_cipher.encrypt_block(&mut seed); (H128(seed.into()) ^ H128(mac_digest)).0 }; self.egress_mac.update(frame_mac_seed); - let frame_mac = self.egress_mac.finalize(); + let frame_mac = self.egress_mac.clone().finalize(); // Write frame-mac buffer.extend_from_slice(&frame_mac[..16]); From 323cf5e6a9e5de537a79c91afb639c2ac8be36e4 Mon Sep 17 00:00:00 2001 From: Edgar Luque Date: Mon, 10 Nov 2025 10:57:41 +0100 Subject: [PATCH 22/40] fix error --- crates/l2/tee/quote-gen/Cargo.lock | 57 ++++++++-------- .../vm/levm/bench/revm_comparison/Cargo.lock | 67 +++++++++---------- .../bench/revm_comparison/src/benchmark.rs | 4 +- 3 files changed, 62 insertions(+), 66 deletions(-) diff --git a/crates/l2/tee/quote-gen/Cargo.lock b/crates/l2/tee/quote-gen/Cargo.lock index e39b4d47898..ab86573205e 100644 --- a/crates/l2/tee/quote-gen/Cargo.lock +++ b/crates/l2/tee/quote-gen/Cargo.lock @@ -2072,10 +2072,11 @@ dependencies = [ [[package]] name = "ethrex-blockchain" -version = "5.0.0" +version = "6.0.0" dependencies = [ "bytes", "ethrex-common", + "ethrex-crypto", "ethrex-metrics", "ethrex-rlp", "ethrex-storage", @@ -2084,7 +2085,6 @@ dependencies = [ "hex", "rustc-hash", "secp256k1", - "sha3", "thiserror 2.0.16", "tokio", "tokio-util", @@ -2093,7 +2093,7 @@ dependencies = [ [[package]] name = "ethrex-common" -version = "5.0.0" +version = "6.0.0" dependencies = [ "bytes", "crc32fast", @@ -2113,7 +2113,6 @@ dependencies = [ "serde 1.0.228", "serde_json", "sha2", - "sha3", "thiserror 2.0.16", "tinyvec", "tracing", @@ -2122,7 +2121,7 @@ dependencies = [ [[package]] name = "ethrex-config" -version = "5.0.0" +version = "6.0.0" dependencies = [ "ethrex-common", "ethrex-p2p", @@ -2133,16 +2132,17 @@ dependencies = [ [[package]] name = "ethrex-crypto" -version = "5.0.0" +version = "6.0.0" dependencies = [ "c-kzg", "kzg-rs", "thiserror 2.0.16", + "tiny-keccak", ] [[package]] name = "ethrex-dev" -version = "5.0.0" +version = "6.0.0" dependencies = [ "bytes", "envy", @@ -2161,7 +2161,7 @@ dependencies = [ [[package]] name = "ethrex-l2" -version = "5.0.0" +version = "6.0.0" dependencies = [ "aligned-sdk", "axum", @@ -2218,11 +2218,12 @@ dependencies = [ [[package]] name = "ethrex-l2-common" -version = "5.0.0" +version = "6.0.0" dependencies = [ "bytes", "ethereum-types 0.15.1", "ethrex-common", + "ethrex-crypto", "ethrex-rlp", "ethrex-storage", "ethrex-trie", @@ -2233,13 +2234,12 @@ dependencies = [ "secp256k1", "serde 1.0.228", "serde_with", - "sha3", "thiserror 2.0.16", ] [[package]] name = "ethrex-l2-rpc" -version = "5.0.0" +version = "6.0.0" dependencies = [ "axum", "bytes", @@ -2268,7 +2268,7 @@ dependencies = [ [[package]] name = "ethrex-levm" -version = "5.0.0" +version = "6.0.0" dependencies = [ "ark-bn254", "ark-ec", @@ -2292,7 +2292,6 @@ dependencies = [ "serde 1.0.228", "serde_json", "sha2", - "sha3", "strum 0.27.2", "thiserror 2.0.16", "walkdir", @@ -2300,7 +2299,7 @@ dependencies = [ [[package]] name = "ethrex-metrics" -version = "5.0.0" +version = "6.0.0" dependencies = [ "ethrex-common", "serde 1.0.228", @@ -2311,7 +2310,7 @@ dependencies = [ [[package]] name = "ethrex-p2p" -version = "5.0.0" +version = "6.0.0" dependencies = [ "aes", "async-trait", @@ -2322,6 +2321,7 @@ dependencies = [ "ethereum-types 0.15.1", "ethrex-blockchain", "ethrex-common", + "ethrex-crypto", "ethrex-rlp", "ethrex-storage", "ethrex-storage-rollup", @@ -2339,7 +2339,6 @@ dependencies = [ "serde 1.0.228", "serde_json", "sha2", - "sha3", "snap", "spawned-concurrency", "spawned-rt", @@ -2352,7 +2351,7 @@ dependencies = [ [[package]] name = "ethrex-rlp" -version = "5.0.0" +version = "6.0.0" dependencies = [ "bytes", "ethereum-types 0.15.1", @@ -2365,7 +2364,7 @@ dependencies = [ [[package]] name = "ethrex-rpc" -version = "5.0.0" +version = "6.0.0" dependencies = [ "axum", "axum-extra", @@ -2374,6 +2373,7 @@ dependencies = [ "ethereum-types 0.15.1", "ethrex-blockchain", "ethrex-common", + "ethrex-crypto", "ethrex-p2p", "ethrex-rlp", "ethrex-storage", @@ -2388,7 +2388,6 @@ dependencies = [ "serde 1.0.228", "serde_json", "sha2", - "sha3", "spawned-concurrency", "spawned-rt", "thiserror 2.0.16", @@ -2402,7 +2401,7 @@ dependencies = [ [[package]] name = "ethrex-sdk" -version = "5.0.0" +version = "6.0.0" dependencies = [ "bytes", "ethereum-types 0.15.1", @@ -2427,7 +2426,7 @@ dependencies = [ [[package]] name = "ethrex-sdk-contract-utils" -version = "5.0.0" +version = "6.0.0" dependencies = [ "thiserror 2.0.16", "tracing", @@ -2435,7 +2434,7 @@ dependencies = [ [[package]] name = "ethrex-storage" -version = "5.0.0" +version = "6.0.0" dependencies = [ "anyhow", "async-trait", @@ -2443,6 +2442,7 @@ dependencies = [ "bytes", "ethereum-types 0.15.1", "ethrex-common", + "ethrex-crypto", "ethrex-rlp", "ethrex-trie", "hex", @@ -2451,14 +2451,13 @@ dependencies = [ "rustc-hash", "serde 1.0.228", "serde_json", - "sha3", "thiserror 2.0.16", "tracing", ] [[package]] name = "ethrex-storage-rollup" -version = "5.0.0" +version = "6.0.0" dependencies = [ "anyhow", "async-trait", @@ -2484,20 +2483,20 @@ dependencies = [ [[package]] name = "ethrex-trie" -version = "5.0.0" +version = "6.0.0" dependencies = [ "anyhow", "bytes", "crossbeam 0.8.4", "digest", "ethereum-types 0.15.1", + "ethrex-crypto", "ethrex-rlp", "ethrex-threadpool", "hex", "lazy_static", "serde 1.0.228", "serde_json", - "sha3", "smallvec", "thiserror 2.0.16", "tracing", @@ -2505,7 +2504,7 @@ dependencies = [ [[package]] name = "ethrex-vm" -version = "5.0.0" +version = "6.0.0" dependencies = [ "bincode", "bytes", @@ -2513,6 +2512,7 @@ dependencies = [ "dyn-clone", "ethereum-types 0.15.1", "ethrex-common", + "ethrex-crypto", "ethrex-levm", "ethrex-rlp", "ethrex-trie", @@ -2520,7 +2520,6 @@ dependencies = [ "lazy_static", "rkyv", "serde 1.0.228", - "sha3", "thiserror 2.0.16", "tracing", ] @@ -2882,7 +2881,7 @@ dependencies = [ [[package]] name = "guest_program" -version = "5.0.0" +version = "6.0.0" dependencies = [ "bytes", "ethrex-blockchain", diff --git a/crates/vm/levm/bench/revm_comparison/Cargo.lock b/crates/vm/levm/bench/revm_comparison/Cargo.lock index f970693d401..758d0fdde20 100644 --- a/crates/vm/levm/bench/revm_comparison/Cargo.lock +++ b/crates/vm/levm/bench/revm_comparison/Cargo.lock @@ -1084,10 +1084,11 @@ dependencies = [ [[package]] name = "ethrex-blockchain" -version = "5.0.0" +version = "6.0.0" dependencies = [ "bytes", "ethrex-common", + "ethrex-crypto", "ethrex-metrics", "ethrex-rlp", "ethrex-storage", @@ -1096,7 +1097,6 @@ dependencies = [ "hex", "rustc-hash", "secp256k1 0.30.0", - "sha3", "thiserror", "tokio", "tokio-util", @@ -1105,7 +1105,7 @@ dependencies = [ [[package]] name = "ethrex-common" -version = "5.0.0" +version = "6.0.0" dependencies = [ "bytes", "crc32fast", @@ -1125,7 +1125,6 @@ dependencies = [ "serde", "serde_json", "sha2", - "sha3", "thiserror", "tinyvec", "tracing", @@ -1134,15 +1133,16 @@ dependencies = [ [[package]] name = "ethrex-crypto" -version = "5.0.0" +version = "6.0.0" dependencies = [ "kzg-rs", "thiserror", + "tiny-keccak", ] [[package]] name = "ethrex-levm" -version = "5.0.0" +version = "6.0.0" dependencies = [ "ark-bn254", "ark-ec", @@ -1166,7 +1166,6 @@ dependencies = [ "serde", "serde_json", "sha2", - "sha3", "strum", "thiserror", "walkdir", @@ -1174,7 +1173,7 @@ dependencies = [ [[package]] name = "ethrex-metrics" -version = "5.0.0" +version = "6.0.0" dependencies = [ "ethrex-common", "serde", @@ -1185,7 +1184,7 @@ dependencies = [ [[package]] name = "ethrex-rlp" -version = "5.0.0" +version = "6.0.0" dependencies = [ "bytes", "ethereum-types", @@ -1198,7 +1197,7 @@ dependencies = [ [[package]] name = "ethrex-storage" -version = "5.0.0" +version = "6.0.0" dependencies = [ "anyhow", "async-trait", @@ -1206,13 +1205,15 @@ dependencies = [ "bytes", "ethereum-types", "ethrex-common", + "ethrex-crypto", "ethrex-rlp", "ethrex-trie", "hex", + "qfilter", + "rayon", "rustc-hash", "serde", "serde_json", - "sha3", "thiserror", "tracing", ] @@ -1226,20 +1227,20 @@ dependencies = [ [[package]] name = "ethrex-trie" -version = "5.0.0" +version = "6.0.0" dependencies = [ "anyhow", "bytes", "crossbeam", "digest 0.10.7", "ethereum-types", + "ethrex-crypto", "ethrex-rlp", "ethrex-threadpool", "hex", "lazy_static", "serde", "serde_json", - "sha3", "smallvec", "thiserror", "tracing", @@ -1247,7 +1248,7 @@ dependencies = [ [[package]] name = "ethrex-vm" -version = "5.0.0" +version = "6.0.0" dependencies = [ "bincode", "bytes", @@ -1255,6 +1256,7 @@ dependencies = [ "dyn-clone", "ethereum-types", "ethrex-common", + "ethrex-crypto", "ethrex-levm", "ethrex-rlp", "ethrex-trie", @@ -1262,7 +1264,6 @@ dependencies = [ "lazy_static", "rkyv", "serde", - "sha3", "thiserror", "tracing", ] @@ -1783,15 +1784,6 @@ dependencies = [ "signature", ] -[[package]] -name = "keccak" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ecc2af9a1119c51f12a14607e783cb977bde58bc069ff0c3da1095e635d70654" -dependencies = [ - "cpufeatures", -] - [[package]] name = "keccak-asm" version = "0.1.4" @@ -2398,6 +2390,15 @@ dependencies = [ "syn 2.0.107", ] +[[package]] +name = "qfilter" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "746341cd2357c9a4df2d951522b4a8dd1ef553e543119899ad7bf87e938c8fbe" +dependencies = [ + "xxhash-rust", +] + [[package]] name = "quick-error" version = "1.2.3" @@ -2619,13 +2620,13 @@ dependencies = [ "bytes", "ethrex-blockchain", "ethrex-common", + "ethrex-crypto", "ethrex-levm", "ethrex-storage", "ethrex-vm", "hex", "revm", "rustc-hash", - "sha3", ] [[package]] @@ -2948,16 +2949,6 @@ dependencies = [ "digest 0.10.7", ] -[[package]] -name = "sha3" -version = "0.10.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75872d278a8f37ef87fa0ddbda7802605cb18344497949862c0d4dcb291eba60" -dependencies = [ - "digest 0.10.7", - "keccak", -] - [[package]] name = "sha3-asm" version = "0.1.4" @@ -3713,6 +3704,12 @@ dependencies = [ "tap", ] +[[package]] +name = "xxhash-rust" +version = "0.8.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdd20c5420375476fbd4394763288da7eb0cc0b8c11deed431a91562af7335d3" + [[package]] name = "yoke" version = "0.8.0" diff --git a/crates/vm/levm/bench/revm_comparison/src/benchmark.rs b/crates/vm/levm/bench/revm_comparison/src/benchmark.rs index 3ff5397345a..45c8ede6e17 100644 --- a/crates/vm/levm/bench/revm_comparison/src/benchmark.rs +++ b/crates/vm/levm/bench/revm_comparison/src/benchmark.rs @@ -1,5 +1,5 @@ +use ethrex_crypto::keccak::keccak_hash; use revm_comparison::{levm_bench::run_with_levm, revm_bench::run_with_revm}; -use sha3::{Digest, Keccak256}; use std::{fs::File, io::Read}; enum VM { @@ -50,7 +50,7 @@ fn main() { fn generate_calldata(function: &str, n: u64) -> String { let function_signature = format!("{function}(uint256)"); - let hash = Keccak256::digest(function_signature.as_bytes()); + let hash = keccak_hash(function_signature.as_bytes()); let function_selector = &hash[..4]; // Encode argument n (uint256, padded to 32 bytes) From d229c53bd29ba6e8574e9e9904e45be2c523054c Mon Sep 17 00:00:00 2001 From: Edgar Luque Date: Mon, 10 Nov 2025 10:59:29 +0100 Subject: [PATCH 23/40] fix --- crates/l2/common/src/merkle_tree.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/l2/common/src/merkle_tree.rs b/crates/l2/common/src/merkle_tree.rs index 95939298418..f3af818952e 100644 --- a/crates/l2/common/src/merkle_tree.rs +++ b/crates/l2/common/src/merkle_tree.rs @@ -36,7 +36,7 @@ impl IsMerkleTreeBackend for TreeData { }; let mut data = [0u8; 64]; data[..32].copy_from_slice(left); - data[64..].copy_from_slice(right); + data[32..].copy_from_slice(right); keccak_hash(data) } } From 7d48b577d7469ba2a316883b75e90dd6575f670a Mon Sep 17 00:00:00 2001 From: Edgar Luque Date: Mon, 10 Nov 2025 11:02:43 +0100 Subject: [PATCH 24/40] changelog --- CHANGELOG.md | 4 ++++ crates/l2/common/src/merkle_tree.rs | 18 +++++++++--------- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d439fbe30bd..40535f38f4a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ ## Perf +### 2025-11-10 + +- Replace sha3 keccak to an assembly version using ffi [#5247](https://github.com/lambdaclass/ethrex/pull/5247) + ### 2025-11-03 - Avoid unnecessary hash validations [#5167](https://github.com/lambdaclass/ethrex/pull/5167) diff --git a/crates/l2/common/src/merkle_tree.rs b/crates/l2/common/src/merkle_tree.rs index f3af818952e..ab86ff1e3da 100644 --- a/crates/l2/common/src/merkle_tree.rs +++ b/crates/l2/common/src/merkle_tree.rs @@ -1,5 +1,5 @@ use ethrex_common::H256; -use ethrex_crypto::keccak::keccak_hash; +use ethrex_crypto::keccak::{Keccak256}; use lambdaworks_crypto::merkle_tree::{merkle::MerkleTree, traits::IsMerkleTreeBackend}; // We use a newtype wrapper around `H256` because Rust's orphan rule @@ -29,15 +29,15 @@ impl IsMerkleTreeBackend for TreeData { /// /// Source: https://github.com/OpenZeppelin/openzeppelin-contracts/blob/1a87de932664d9b905612f4d9d1655fd27a41722/contracts/utils/cryptography/MerkleProof.sol#L114-L128 fn hash_new_parent(child_1: &Self::Node, child_2: &Self::Node) -> Self::Node { - let (left, right) = if child_1 < child_2 { - (child_1, child_2) + let mut hasher = Keccak256::new(); + if child_1 < child_2 { + hasher.update(child_1); + hasher.update(child_2); } else { - (child_2, child_1) - }; - let mut data = [0u8; 64]; - data[..32].copy_from_slice(left); - data[32..].copy_from_slice(right); - keccak_hash(data) + hasher.update(child_2); + hasher.update(child_1); + } + hasher.finalize().into() } } From 7527510bc9f72bb4a34e314e775641c465a9a708 Mon Sep 17 00:00:00 2001 From: Edgar Luque Date: Mon, 10 Nov 2025 11:06:31 +0100 Subject: [PATCH 25/40] fmt --- crates/l2/common/src/merkle_tree.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/l2/common/src/merkle_tree.rs b/crates/l2/common/src/merkle_tree.rs index ab86ff1e3da..d5d21af5c71 100644 --- a/crates/l2/common/src/merkle_tree.rs +++ b/crates/l2/common/src/merkle_tree.rs @@ -1,5 +1,5 @@ use ethrex_common::H256; -use ethrex_crypto::keccak::{Keccak256}; +use ethrex_crypto::keccak::Keccak256; use lambdaworks_crypto::merkle_tree::{merkle::MerkleTree, traits::IsMerkleTreeBackend}; // We use a newtype wrapper around `H256` because Rust's orphan rule From 93be439f3c3f7dce76f7b7b8b2a87ff4304eea31 Mon Sep 17 00:00:00 2001 From: Edgar Luque Date: Mon, 10 Nov 2025 11:39:02 +0100 Subject: [PATCH 26/40] lint --- crates/l2/common/src/merkle_tree.rs | 2 +- .../prover/src/guest_program/src/risc0/Cargo.lock | 13 ++++++------- .../l2/prover/src/guest_program/src/sp1/Cargo.lock | 13 ++++++------- 3 files changed, 13 insertions(+), 15 deletions(-) diff --git a/crates/l2/common/src/merkle_tree.rs b/crates/l2/common/src/merkle_tree.rs index d5d21af5c71..aadfb3ee78f 100644 --- a/crates/l2/common/src/merkle_tree.rs +++ b/crates/l2/common/src/merkle_tree.rs @@ -37,7 +37,7 @@ impl IsMerkleTreeBackend for TreeData { hasher.update(child_2); hasher.update(child_1); } - hasher.finalize().into() + hasher.finalize() } } diff --git a/crates/l2/prover/src/guest_program/src/risc0/Cargo.lock b/crates/l2/prover/src/guest_program/src/risc0/Cargo.lock index d88f52ad3bb..8aa336b9635 100644 --- a/crates/l2/prover/src/guest_program/src/risc0/Cargo.lock +++ b/crates/l2/prover/src/guest_program/src/risc0/Cargo.lock @@ -1187,6 +1187,7 @@ version = "6.0.0" dependencies = [ "bytes", "ethrex-common", + "ethrex-crypto", "ethrex-metrics", "ethrex-rlp", "ethrex-storage", @@ -1195,7 +1196,6 @@ dependencies = [ "hex", "rustc-hash", "secp256k1", - "sha3", "thiserror", "tokio", "tokio-util", @@ -1224,7 +1224,6 @@ dependencies = [ "serde", "serde_json", "sha2", - "sha3", "thiserror", "tinyvec", "tracing", @@ -1238,6 +1237,7 @@ dependencies = [ "c-kzg", "kzg-rs", "thiserror", + "tiny-keccak", ] [[package]] @@ -1247,6 +1247,7 @@ dependencies = [ "bytes", "ethereum-types", "ethrex-common", + "ethrex-crypto", "ethrex-rlp", "ethrex-storage", "ethrex-trie", @@ -1257,7 +1258,6 @@ dependencies = [ "secp256k1", "serde", "serde_with", - "sha3", "thiserror", ] @@ -1287,7 +1287,6 @@ dependencies = [ "serde", "serde_json", "sha2", - "sha3", "strum", "substrate-bn", "thiserror", @@ -1328,6 +1327,7 @@ dependencies = [ "bytes", "ethereum-types", "ethrex-common", + "ethrex-crypto", "ethrex-rlp", "ethrex-trie", "hex", @@ -1336,7 +1336,6 @@ dependencies = [ "rustc-hash", "serde", "serde_json", - "sha3", "thiserror", "tracing", ] @@ -1357,13 +1356,13 @@ dependencies = [ "crossbeam", "digest", "ethereum-types", + "ethrex-crypto", "ethrex-rlp", "ethrex-threadpool", "hex", "lazy_static", "serde", "serde_json", - "sha3", "smallvec", "thiserror", "tracing", @@ -1379,6 +1378,7 @@ dependencies = [ "dyn-clone", "ethereum-types", "ethrex-common", + "ethrex-crypto", "ethrex-levm", "ethrex-rlp", "ethrex-trie", @@ -1386,7 +1386,6 @@ dependencies = [ "lazy_static", "rkyv", "serde", - "sha3", "thiserror", "tracing", ] diff --git a/crates/l2/prover/src/guest_program/src/sp1/Cargo.lock b/crates/l2/prover/src/guest_program/src/sp1/Cargo.lock index 0cc858d5400..7a1a9e1d784 100644 --- a/crates/l2/prover/src/guest_program/src/sp1/Cargo.lock +++ b/crates/l2/prover/src/guest_program/src/sp1/Cargo.lock @@ -966,6 +966,7 @@ version = "6.0.0" dependencies = [ "bytes", "ethrex-common", + "ethrex-crypto", "ethrex-metrics", "ethrex-rlp", "ethrex-storage", @@ -974,7 +975,6 @@ dependencies = [ "hex", "rustc-hash", "secp256k1", - "sha3", "thiserror", "tokio", "tokio-util", @@ -1003,7 +1003,6 @@ dependencies = [ "serde", "serde_json", "sha2", - "sha3", "thiserror", "tinyvec", "tracing", @@ -1016,6 +1015,7 @@ version = "6.0.0" dependencies = [ "kzg-rs", "thiserror", + "tiny-keccak", ] [[package]] @@ -1025,6 +1025,7 @@ dependencies = [ "bytes", "ethereum-types", "ethrex-common", + "ethrex-crypto", "ethrex-rlp", "ethrex-storage", "ethrex-trie", @@ -1035,7 +1036,6 @@ dependencies = [ "secp256k1", "serde", "serde_with", - "sha3", "thiserror", ] @@ -1065,7 +1065,6 @@ dependencies = [ "serde", "serde_json", "sha2", - "sha3", "strum", "substrate-bn", "thiserror", @@ -1106,6 +1105,7 @@ dependencies = [ "bytes", "ethereum-types", "ethrex-common", + "ethrex-crypto", "ethrex-rlp", "ethrex-trie", "hex", @@ -1114,7 +1114,6 @@ dependencies = [ "rustc-hash", "serde", "serde_json", - "sha3", "thiserror", "tracing", ] @@ -1135,13 +1134,13 @@ dependencies = [ "crossbeam", "digest", "ethereum-types", + "ethrex-crypto", "ethrex-rlp", "ethrex-threadpool", "hex", "lazy_static", "serde", "serde_json", - "sha3", "smallvec", "thiserror", "tracing", @@ -1157,6 +1156,7 @@ dependencies = [ "dyn-clone", "ethereum-types", "ethrex-common", + "ethrex-crypto", "ethrex-levm", "ethrex-rlp", "ethrex-trie", @@ -1164,7 +1164,6 @@ dependencies = [ "lazy_static", "rkyv", "serde", - "sha3", "thiserror", "tracing", ] From 9114c5cbf9d505a391a1071012be17c95887f672 Mon Sep 17 00:00:00 2001 From: Edgar Luque Date: Mon, 10 Nov 2025 11:55:11 +0100 Subject: [PATCH 27/40] use update finalize --- crates/blockchain/payload.rs | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/crates/blockchain/payload.rs b/crates/blockchain/payload.rs index eb41381a894..685eeb596b8 100644 --- a/crates/blockchain/payload.rs +++ b/crates/blockchain/payload.rs @@ -18,7 +18,7 @@ use ethrex_common::{ }, }; -use ethrex_crypto::keccak::keccak_hash; +use ethrex_crypto::keccak::{Keccak256, keccak_hash}; use ethrex_vm::{Evm, EvmError}; use ethrex_rlp::encode::RLPEncode; @@ -98,19 +98,18 @@ pub enum BuildPayloadArgsError { impl BuildPayloadArgs { /// Computes an 8-byte identifier by hashing the components of the payload arguments. pub fn id(&self) -> Result { - let mut serialized = Vec::with_capacity(1024); - serialized.extend_from_slice(self.parent.as_bytes()); - serialized.extend_from_slice(&self.timestamp.to_be_bytes()); - serialized.extend_from_slice(self.random.as_bytes()); - serialized.extend_from_slice(self.fee_recipient.as_bytes()); + let mut hasher = Keccak256::new(); + hasher.update(self.parent); + hasher.update(self.timestamp.to_be_bytes()); + hasher.update(self.random); + hasher.update(self.fee_recipient); if let Some(withdrawals) = &self.withdrawals { - withdrawals.encode(&mut serialized); + hasher.update(withdrawals.encode_to_vec()); } if let Some(beacon_root) = self.beacon_root { - serialized.extend_from_slice(beacon_root.as_bytes()); + hasher.update(beacon_root); } - let mut hashed = keccak_hash(serialized); - let res = &mut hashed[..8]; + let res = &mut hasher.finalize()[..8]; res[0] = self.version; Ok(u64::from_be_bytes(res.try_into().map_err(|_| { BuildPayloadArgsError::FailedToConvertPayload From 51b7f970f3e7d3629f8b9762b449b1e7023093e3 Mon Sep 17 00:00:00 2001 From: Edgar Luque Date: Mon, 10 Nov 2025 15:42:50 +0100 Subject: [PATCH 28/40] lint --- crates/blockchain/payload.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/blockchain/payload.rs b/crates/blockchain/payload.rs index 685eeb596b8..326580789fa 100644 --- a/crates/blockchain/payload.rs +++ b/crates/blockchain/payload.rs @@ -18,7 +18,7 @@ use ethrex_common::{ }, }; -use ethrex_crypto::keccak::{Keccak256, keccak_hash}; +use ethrex_crypto::keccak::{Keccak256}; use ethrex_vm::{Evm, EvmError}; use ethrex_rlp::encode::RLPEncode; From c3bcdd8516acd42e2db6d25fc3d5dab83cbe4282 Mon Sep 17 00:00:00 2001 From: Javier Chatruc Date: Mon, 10 Nov 2025 18:41:27 -0300 Subject: [PATCH 29/40] fmt --- crates/blockchain/payload.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/blockchain/payload.rs b/crates/blockchain/payload.rs index d14d29e3fb6..5abf4f8ade7 100644 --- a/crates/blockchain/payload.rs +++ b/crates/blockchain/payload.rs @@ -18,7 +18,7 @@ use ethrex_common::{ }, }; -use ethrex_crypto::keccak::{Keccak256}; +use ethrex_crypto::keccak::Keccak256; use ethrex_vm::{Evm, EvmError}; use ethrex_rlp::encode::RLPEncode; From e33eb2cb763f070529f47753e69882d103180a1d Mon Sep 17 00:00:00 2001 From: Mario Rugiero Date: Tue, 11 Nov 2025 12:34:53 -0300 Subject: [PATCH 30/40] clarify x86 choice --- crates/common/crypto/keccak/README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crates/common/crypto/keccak/README.md b/crates/common/crypto/keccak/README.md index 5fbd2630205..3d269d88885 100644 --- a/crates/common/crypto/keccak/README.md +++ b/crates/common/crypto/keccak/README.md @@ -15,7 +15,8 @@ The module exposes a single function: ```rust pub fn keccak_hash(data: impl AsRef<[u8]>) -> [u8; 32]; ``` -There are no feature flags. If building for `x86_64`, it will link an optimized assembly implementation. Because it uses generic `x86_64` code, no fallback is needed. If building for `ARMv8`, it will link an optimized implementation using on `sha3` instructions and a fallback one using generic `ARMv8` instructions. This detection is performed at runtime, i.e. dynamic dispatch. +There are no feature flags. If building for `x86_64`, it will link an optimized assembly implementation. Because it uses generic `x86_64` code, no fallback is needed. The choice for the generic version was made due to the AVX2 version being ~40% slower than the generic one. +If building for `ARMv8`, it will link an optimized implementation using on `sha3` instructions and a fallback one using generic `ARMv8` instructions. This detection is performed at runtime, i.e. dynamic dispatch. For other architectures, it falls back to `tiny_keccak`. This is specially necessary for proving, as the ZKVMs are RISC-V based, but they are not guaranteed to support all of its extensions. We may revisit adding assembly versions for them at a later time. ## Code Generation From 2c6cd32d785b63f234629114325035b19308cab5 Mon Sep 17 00:00:00 2001 From: Mario Rugiero Date: Tue, 11 Nov 2025 13:10:01 -0300 Subject: [PATCH 31/40] document choices better --- crates/common/crypto/keccak/README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/crates/common/crypto/keccak/README.md b/crates/common/crypto/keccak/README.md index 3d269d88885..9c893b5d870 100644 --- a/crates/common/crypto/keccak/README.md +++ b/crates/common/crypto/keccak/README.md @@ -15,8 +15,9 @@ The module exposes a single function: ```rust pub fn keccak_hash(data: impl AsRef<[u8]>) -> [u8; 32]; ``` -There are no feature flags. If building for `x86_64`, it will link an optimized assembly implementation. Because it uses generic `x86_64` code, no fallback is needed. The choice for the generic version was made due to the AVX2 version being ~40% slower than the generic one. -If building for `ARMv8`, it will link an optimized implementation using on `sha3` instructions and a fallback one using generic `ARMv8` instructions. This detection is performed at runtime, i.e. dynamic dispatch. +There are no feature flags. If building for `x86_64`, it will link an optimized assembly implementation. Because it uses generic `x86_64` code, no fallback is needed. +If building for `ARMv8`, it will link an optimized implementation using generic `ARMv8` instructions. +In both cases we chose the baseline instruction sets. This was not due to compatibility, which can be handled with dynamic dispatch, but because in the case of `ARMv8` using specialized `SHA3` instructions showed no improvement, and in `x86_64` using `AVX2` actually showed a regression of 30% in throughput. For other architectures, it falls back to `tiny_keccak`. This is specially necessary for proving, as the ZKVMs are RISC-V based, but they are not guaranteed to support all of its extensions. We may revisit adding assembly versions for them at a later time. ## Code Generation From e0f944c465f4aced7f0ed8de9116928b6073421c Mon Sep 17 00:00:00 2001 From: Mario Rugiero Date: Tue, 11 Nov 2025 13:22:31 -0300 Subject: [PATCH 32/40] remove outdated instruction --- crates/common/crypto/keccak/README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/crates/common/crypto/keccak/README.md b/crates/common/crypto/keccak/README.md index 9c893b5d870..9181432db7a 100644 --- a/crates/common/crypto/keccak/README.md +++ b/crates/common/crypto/keccak/README.md @@ -27,7 +27,6 @@ The implementation is currently rather manual: ```shell $ cd cryptogams/arm $ ./keccak-1600-armv8.pl linux64 keccak1600-armv8.s -$ ./keccak-1600-armv8.pl linux64+sha3 keccak1600-armv8.s $ cd ../x86_64 $ ./keccak1600-x86_64.pl linux64 keccak1600-x86_64.s ``` From 965501cd4eeed56154917ccb0a79a3059bb7fe14 Mon Sep 17 00:00:00 2001 From: Mario Rugiero Date: Tue, 11 Nov 2025 13:44:23 -0300 Subject: [PATCH 33/40] fix instructions --- crates/common/crypto/keccak/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/common/crypto/keccak/README.md b/crates/common/crypto/keccak/README.md index 9181432db7a..710fb39a61e 100644 --- a/crates/common/crypto/keccak/README.md +++ b/crates/common/crypto/keccak/README.md @@ -30,7 +30,7 @@ $ ./keccak-1600-armv8.pl linux64 keccak1600-armv8.s $ cd ../x86_64 $ ./keccak1600-x86_64.pl linux64 keccak1600-x86_64.s ``` -- With the code generated, we manually copy the functions defined to the matching `naked_asm` blocks inside the module. +- The x86 can be directly imported by the Rust compiler with the current options, but the ARM code requires a few changes, commented at the top of the `keccak1600-armv8.s` file. ## Copyright Notice From 6746c0ec531b63491e00b38fbd7db300a0868191 Mon Sep 17 00:00:00 2001 From: Mario Rugiero Date: Tue, 11 Nov 2025 13:47:39 -0300 Subject: [PATCH 34/40] update docs --- crates/common/crypto/keccak/README.md | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/crates/common/crypto/keccak/README.md b/crates/common/crypto/keccak/README.md index 710fb39a61e..9873dff4a63 100644 --- a/crates/common/crypto/keccak/README.md +++ b/crates/common/crypto/keccak/README.md @@ -11,9 +11,16 @@ The code is adapted from the output of the scripts written by the [cryptogams](h The goal of this module is to have an efficient implementation of Keccak256 for Ethrex, reusing audited code as much as possible, while keeping complexity as low as possible. To achieve low complexity, we leave explicitly out of scope implementing `Digest`, having implementations for all variants of CPUs (we keep a selected subset of those provided by _Cryptogams_) and compile-time translation of source files. -The module exposes a single function: +The module exposes only the following: ```rust pub fn keccak_hash(data: impl AsRef<[u8]>) -> [u8; 32]; +struct Keccak256; +impl Keccak256 { + fn new() -> Self; + fn update(&self, impl AsRef<[u8]>) -> Self; + fn finalize(self) -> [u8; 32]; +} +impl Default for Keccak256; ``` There are no feature flags. If building for `x86_64`, it will link an optimized assembly implementation. Because it uses generic `x86_64` code, no fallback is needed. If building for `ARMv8`, it will link an optimized implementation using generic `ARMv8` instructions. From 411d5ce3f44602f65d44dfab99107d0b0acc8eb3 Mon Sep 17 00:00:00 2001 From: Mario Rugiero Date: Tue, 11 Nov 2025 13:48:51 -0300 Subject: [PATCH 35/40] remove armv8+sha3 --- .../crypto/keccak/keccak1600-armv8-sha3.s | 842 ------------------ 1 file changed, 842 deletions(-) delete mode 100644 crates/common/crypto/keccak/keccak1600-armv8-sha3.s diff --git a/crates/common/crypto/keccak/keccak1600-armv8-sha3.s b/crates/common/crypto/keccak/keccak1600-armv8-sha3.s deleted file mode 100644 index b6a60da5e92..00000000000 --- a/crates/common/crypto/keccak/keccak1600-armv8-sha3.s +++ /dev/null @@ -1,842 +0,0 @@ -.arch armv8.2-a+sha3 -.text - -.align 8 // strategic alignment and padding that allows to use - // address value as loop termination condition... -.quad 0,0,0,0,0,0,0,0 -.type iotas,%object -iotas: -.quad 0x0000000000000001 -.quad 0x0000000000008082 -.quad 0x800000000000808a -.quad 0x8000000080008000 -.quad 0x000000000000808b -.quad 0x0000000080000001 -.quad 0x8000000080008081 -.quad 0x8000000000008009 -.quad 0x000000000000008a -.quad 0x0000000000000088 -.quad 0x0000000080008009 -.quad 0x000000008000000a -.Liotas12: -.quad 0x000000008000808b -.quad 0x800000000000008b -.quad 0x8000000000008089 -.quad 0x8000000000008003 -.quad 0x8000000000008002 -.quad 0x8000000000000080 -.quad 0x000000000000800a -.quad 0x800000008000000a -.quad 0x8000000080008081 -.quad 0x8000000000008080 -.quad 0x0000000080000001 -.quad 0x8000000080008008 -.size iotas,.-iotas -.type KeccakF1600_int,%function -.align 5 -KeccakF1600_int: -.inst 0xd503233f // paciasp - stp x28,x30,[sp,#16] // stack is pre-allocated - b .Loop -.align 4 -.Loop: - ////////////////////////////////////////// Theta - eor x26,x0,x5 - stp x4,x9,[sp,#0] // offload pair... - eor x27,x1,x6 - eor x28,x2,x7 - eor x30,x3,x8 - eor x4,x4,x9 - eor x26,x26,x10 - eor x27,x27,x11 - eor x28,x28,x12 - eor x30,x30,x13 - eor x4,x4,x14 - eor x26,x26,x15 - eor x27,x27,x16 - eor x28,x28,x17 - eor x30,x30,x25 - eor x4,x4,x19 - eor x26,x26,x20 - eor x28,x28,x22 - eor x27,x27,x21 - eor x30,x30,x23 - eor x4,x4,x24 - - eor x9,x26,x28,ror#63 - - eor x1,x1,x9 - eor x6,x6,x9 - eor x11,x11,x9 - eor x16,x16,x9 - eor x21,x21,x9 - - eor x9,x27,x30,ror#63 - eor x28,x28,x4,ror#63 - eor x30,x30,x26,ror#63 - eor x4,x4,x27,ror#63 - - eor x27, x2,x9 // mov x27,x2 - eor x7,x7,x9 - eor x12,x12,x9 - eor x17,x17,x9 - eor x22,x22,x9 - - eor x0,x0,x4 - eor x5,x5,x4 - eor x10,x10,x4 - eor x15,x15,x4 - eor x20,x20,x4 - ldp x4,x9,[sp,#0] // re-load offloaded data - eor x26, x3,x28 // mov x26,x3 - eor x8,x8,x28 - eor x13,x13,x28 - eor x25,x25,x28 - eor x23,x23,x28 - - eor x28, x4,x30 // mov x28,x4 - eor x9,x9,x30 - eor x14,x14,x30 - eor x19,x19,x30 - eor x24,x24,x30 - - ////////////////////////////////////////// Rho+Pi - mov x30,x1 - ror x1,x6,#64-44 - //mov x27,x2 - ror x2,x12,#64-43 - //mov x26,x3 - ror x3,x25,#64-21 // ? - //mov x28,x4 - ror x4,x24,#64-14 // ? - - ror x6,x9,#64-20 // ? - ror x12,x13,#64-25 // ? - ror x25,x17,#64-15 - ror x24,x21,#64-2 // ? - - ror x9,x22,#64-61 - ror x13,x19,#64-8 - ror x17,x11,#64-10 - ror x21,x8,#64-55 - - ror x22,x14,#64-39 - ror x19,x23,#64-56 - ror x11,x7,#64-6 // ? - ror x8,x16,#64-45 - - ror x14,x20,#64-18 - ror x23,x15,#64-41 - ror x7,x10,#64-3 - ror x16,x5,#64-36 // ? - - ror x5,x26,#64-28 // ? - ror x10,x30,#64-1 - ror x15,x28,#64-27 // ? - ror x20,x27,#64-62 // ? - - ////////////////////////////////////////// Chi+Iota - bic x26,x2,x1 - bic x27,x3,x2 - bic x28,x0,x4 - bic x30,x1,x0 - eor x0,x0,x26 - bic x26,x4,x3 - eor x1,x1,x27 - ldr x27,[sp,#16] - eor x3,x3,x28 - eor x4,x4,x30 - eor x2,x2,x26 - ldr x30,[x27],#8 // Iota[i++] - - bic x26,x7,x6 - tst x27,#255 // are we done? - str x27,[sp,#16] - bic x27,x8,x7 - bic x28,x5,x9 - eor x0,x0,x30 // A[0][0] ^= Iota - bic x30,x6,x5 - eor x5,x5,x26 - bic x26,x9,x8 - eor x6,x6,x27 - eor x8,x8,x28 - eor x9,x9,x30 - eor x7,x7,x26 - - bic x26,x12,x11 - bic x27,x13,x12 - bic x28,x10,x14 - bic x30,x11,x10 - eor x10,x10,x26 - bic x26,x14,x13 - eor x11,x11,x27 - eor x13,x13,x28 - eor x14,x14,x30 - eor x12,x12,x26 - - bic x26,x17,x16 - bic x27,x25,x17 - bic x28,x15,x19 - bic x30,x16,x15 - eor x15,x15,x26 - bic x26,x19,x25 - eor x16,x16,x27 - eor x25,x25,x28 - eor x19,x19,x30 - eor x17,x17,x26 - - bic x26,x22,x21 - bic x27,x23,x22 - bic x28,x20,x24 - bic x30,x21,x20 - eor x20,x20,x26 - bic x26,x24,x23 - eor x21,x21,x27 - eor x23,x23,x28 - eor x24,x24,x30 - eor x22,x22,x26 - - bne .Loop - - ldr x30,[sp,#16+__SIZEOF_POINTER__] -.inst 0xd50323bf // autiasp - ret -.size KeccakF1600_int,.-KeccakF1600_int - -.type KeccakF1600,%function -.align 5 -KeccakF1600: -.inst 0xd503233f // paciasp - stp x29,x30,[sp,#-16*__SIZEOF_POINTER__]! - add x29,sp,#0 - stp x19,x20,[sp,#2*__SIZEOF_POINTER__] - stp x21,x22,[sp,#4*__SIZEOF_POINTER__] - stp x23,x24,[sp,#6*__SIZEOF_POINTER__] - stp x25,x26,[sp,#8*__SIZEOF_POINTER__] - stp x27,x28,[sp,#10*__SIZEOF_POINTER__] - sub sp,sp,#16+4*__SIZEOF_POINTER__ - - str x0,[sp,#16+2*__SIZEOF_POINTER__] // offload argument - mov x26,x0 - ldp x0,x1,[x0,#16*0] - ldp x2,x3,[x26,#16*1] - ldp x4,x5,[x26,#16*2] - ldp x6,x7,[x26,#16*3] - ldp x8,x9,[x26,#16*4] - ldp x10,x11,[x26,#16*5] - ldp x12,x13,[x26,#16*6] - ldp x14,x15,[x26,#16*7] - ldp x16,x17,[x26,#16*8] - ldp x25,x19,[x26,#16*9] - ldp x20,x21,[x26,#16*10] - ldp x22,x23,[x26,#16*11] - ldr x24,[x26,#16*12] - - adr x28,iotas - bl KeccakF1600_int - - ldr x26,[sp,#16+2*__SIZEOF_POINTER__] - stp x0,x1,[x26,#16*0] - stp x2,x3,[x26,#16*1] - stp x4,x5,[x26,#16*2] - stp x6,x7,[x26,#16*3] - stp x8,x9,[x26,#16*4] - stp x10,x11,[x26,#16*5] - stp x12,x13,[x26,#16*6] - stp x14,x15,[x26,#16*7] - stp x16,x17,[x26,#16*8] - stp x25,x19,[x26,#16*9] - stp x20,x21,[x26,#16*10] - stp x22,x23,[x26,#16*11] - str x24,[x26,#16*12] - - ldp x19,x20,[x29,#2*__SIZEOF_POINTER__] - add sp,sp,#16+4*__SIZEOF_POINTER__ - ldp x21,x22,[x29,#4*__SIZEOF_POINTER__] - ldp x23,x24,[x29,#6*__SIZEOF_POINTER__] - ldp x25,x26,[x29,#8*__SIZEOF_POINTER__] - ldp x27,x28,[x29,#10*__SIZEOF_POINTER__] - ldp x29,x30,[sp],#16*__SIZEOF_POINTER__ -.inst 0xd50323bf // autiasp - ret -.size KeccakF1600,.-KeccakF1600 - -.globl SHA3_absorb -.type SHA3_absorb,%function -.align 5 -SHA3_absorb: -.inst 0xd503233f // paciasp - stp x29,x30,[sp,#-16*__SIZEOF_POINTER__]! - add x29,sp,#0 - stp x19,x20,[sp,#2*__SIZEOF_POINTER__] - stp x21,x22,[sp,#4*__SIZEOF_POINTER__] - stp x23,x24,[sp,#6*__SIZEOF_POINTER__] - stp x25,x26,[sp,#8*__SIZEOF_POINTER__] - stp x27,x28,[sp,#10*__SIZEOF_POINTER__] - sub sp,sp,#16+4*__SIZEOF_POINTER__+16 - - stp x0,x1,[sp,#16+2*__SIZEOF_POINTER__] // offload arguments - stp x2,x3,[sp,#16+4*__SIZEOF_POINTER__] - - mov x26,x0 // uint64_t A[5][5] - mov x27,x1 // const void *inp - mov x28,x2 // size_t len - mov x30,x3 // size_t bsz - ldp x0,x1,[x26,#16*0] - ldp x2,x3,[x26,#16*1] - ldp x4,x5,[x26,#16*2] - ldp x6,x7,[x26,#16*3] - ldp x8,x9,[x26,#16*4] - ldp x10,x11,[x26,#16*5] - ldp x12,x13,[x26,#16*6] - ldp x14,x15,[x26,#16*7] - ldp x16,x17,[x26,#16*8] - ldp x25,x19,[x26,#16*9] - ldp x20,x21,[x26,#16*10] - ldp x22,x23,[x26,#16*11] - ldr x24,[x26,#16*12] - b .Loop_absorb - -.align 4 -.Loop_absorb: - subs x26,x28,x30 // len - bsz - blo .Labsorbed - - str x26,[sp,#16+4*__SIZEOF_POINTER__] // save len - bsz - cmp x30,#104 - ldr x26,[x27,#0] // A[0][0] ^= *inp++ -#ifdef __AARCH64EB__ - rev x26,x26 -#endif - eor x0,x0,x26 - ldr x26,[x27,#8] // A[0][1] ^= *inp++ -#ifdef __AARCH64EB__ - rev x26,x26 -#endif - eor x1,x1,x26 - ldr x26,[x27,#16] // A[0][2] ^= *inp++ -#ifdef __AARCH64EB__ - rev x26,x26 -#endif - eor x2,x2,x26 - ldr x26,[x27,#24] // A[0][3] ^= *inp++ -#ifdef __AARCH64EB__ - rev x26,x26 -#endif - eor x3,x3,x26 - ldr x26,[x27,#32] // A[0][4] ^= *inp++ -#ifdef __AARCH64EB__ - rev x26,x26 -#endif - eor x4,x4,x26 - ldr x26,[x27,#40] // A[1][0] ^= *inp++ -#ifdef __AARCH64EB__ - rev x26,x26 -#endif - eor x5,x5,x26 - ldr x26,[x27,#48] // A[1][1] ^= *inp++ -#ifdef __AARCH64EB__ - rev x26,x26 -#endif - eor x6,x6,x26 - ldr x26,[x27,#56] // A[1][2] ^= *inp++ -#ifdef __AARCH64EB__ - rev x26,x26 -#endif - eor x7,x7,x26 - ldr x26,[x27,#64] // A[1][3] ^= *inp++ -#ifdef __AARCH64EB__ - rev x26,x26 -#endif - eor x8,x8,x26 - blo .Lprocess_block - - ldr x26,[x27,#72] // A[1][4] ^= *inp++ -#ifdef __AARCH64EB__ - rev x26,x26 -#endif - eor x9,x9,x26 - ldr x26,[x27,#80] // A[2][0] ^= *inp++ -#ifdef __AARCH64EB__ - rev x26,x26 -#endif - eor x10,x10,x26 - ldr x26,[x27,#88] // A[2][1] ^= *inp++ -#ifdef __AARCH64EB__ - rev x26,x26 -#endif - eor x11,x11,x26 - ldr x26,[x27,#96] // A[2][2] ^= *inp++ -#ifdef __AARCH64EB__ - rev x26,x26 -#endif - eor x12,x12,x26 - beq .Lprocess_block - - cmp x30,#144 - ldr x26,[x27,#104] // A[2][3] ^= *inp++ -#ifdef __AARCH64EB__ - rev x26,x26 -#endif - eor x13,x13,x26 - ldr x26,[x27,#112] // A[2][4] ^= *inp++ -#ifdef __AARCH64EB__ - rev x26,x26 -#endif - eor x14,x14,x26 - ldr x26,[x27,#120] // A[3][0] ^= *inp++ -#ifdef __AARCH64EB__ - rev x26,x26 -#endif - eor x15,x15,x26 - ldr x26,[x27,#128] // A[3][1] ^= *inp++ -#ifdef __AARCH64EB__ - rev x26,x26 -#endif - eor x16,x16,x26 - blo .Lprocess_block - - ldr x26,[x27,#136] // A[3][2] ^= *inp++ -#ifdef __AARCH64EB__ - rev x26,x26 -#endif - eor x17,x17,x26 - beq .Lprocess_block - - ldr x26,[x27,#144] // A[3][3] ^= *inp++ -#ifdef __AARCH64EB__ - rev x26,x26 -#endif - eor x25,x25,x26 - ldr x26,[x27,#152] // A[3][4] ^= *inp++ -#ifdef __AARCH64EB__ - rev x26,x26 -#endif - eor x19,x19,x26 - ldr x26,[x27,#160] // A[4][0] ^= *inp++ -#ifdef __AARCH64EB__ - rev x26,x26 -#endif - eor x20,x20,x26 - -.Lprocess_block: - add x27,x27,x30 - str x27,[sp,#16+3*__SIZEOF_POINTER__] // save inp - - adr x28,iotas - bl KeccakF1600_int - - ldr x27,[sp,#16+3*__SIZEOF_POINTER__] // restore arguments - ldp x28,x30,[sp,#16+4*__SIZEOF_POINTER__] - b .Loop_absorb - -.align 4 -.Labsorbed: - ldr x27,[sp,#16+2*__SIZEOF_POINTER__] - stp x0,x1,[x27,#16*0] - stp x2,x3,[x27,#16*1] - stp x4,x5,[x27,#16*2] - stp x6,x7,[x27,#16*3] - stp x8,x9,[x27,#16*4] - stp x10,x11,[x27,#16*5] - stp x12,x13,[x27,#16*6] - stp x14,x15,[x27,#16*7] - stp x16,x17,[x27,#16*8] - stp x25,x19,[x27,#16*9] - stp x20,x21,[x27,#16*10] - stp x22,x23,[x27,#16*11] - str x24,[x27,#16*12] - - mov x0,x28 // return value - ldp x19,x20,[x29,#2*__SIZEOF_POINTER__] - add sp,sp,#16+4*__SIZEOF_POINTER__+16 - ldp x21,x22,[x29,#4*__SIZEOF_POINTER__] - ldp x23,x24,[x29,#6*__SIZEOF_POINTER__] - ldp x25,x26,[x29,#8*__SIZEOF_POINTER__] - ldp x27,x28,[x29,#10*__SIZEOF_POINTER__] - ldp x29,x30,[sp],#16*__SIZEOF_POINTER__ -.inst 0xd50323bf // autiasp - ret -.size SHA3_absorb,.-SHA3_absorb -.globl SHA3_squeeze -.type SHA3_squeeze,%function -.align 5 -SHA3_squeeze: -.inst 0xd503233f // paciasp - stp x29,x30,[sp,#-6*__SIZEOF_POINTER__]! - add x29,sp,#0 - stp x19,x20,[sp,#2*__SIZEOF_POINTER__] - stp x21,x22,[sp,#4*__SIZEOF_POINTER__] - - mov x19,x0 // put aside arguments - mov x20,x1 - mov x21,x2 - mov x22,x3 - -.Loop_squeeze: - ldr x4,[x0],#8 - cmp x21,#8 - blo .Lsqueeze_tail -#ifdef __AARCH64EB__ - rev x4,x4 -#endif - str x4,[x20],#8 - subs x21,x21,#8 - beq .Lsqueeze_done - - subs x3,x3,#8 - bhi .Loop_squeeze - - mov x0,x19 - bl KeccakF1600 - mov x0,x19 - mov x3,x22 - b .Loop_squeeze - -.align 4 -.Lsqueeze_tail: - strb w4,[x20],#1 - lsr x4,x4,#8 - subs x21,x21,#1 - beq .Lsqueeze_done - strb w4,[x20],#1 - lsr x4,x4,#8 - subs x21,x21,#1 - beq .Lsqueeze_done - strb w4,[x20],#1 - lsr x4,x4,#8 - subs x21,x21,#1 - beq .Lsqueeze_done - strb w4,[x20],#1 - lsr x4,x4,#8 - subs x21,x21,#1 - beq .Lsqueeze_done - strb w4,[x20],#1 - lsr x4,x4,#8 - subs x21,x21,#1 - beq .Lsqueeze_done - strb w4,[x20],#1 - lsr x4,x4,#8 - subs x21,x21,#1 - beq .Lsqueeze_done - strb w4,[x20],#1 - -.Lsqueeze_done: - ldp x19,x20,[sp,#2*__SIZEOF_POINTER__] - ldp x21,x22,[sp,#4*__SIZEOF_POINTER__] - ldp x29,x30,[sp],#6*__SIZEOF_POINTER__ -.inst 0xd50323bf // autiasp - ret -.size SHA3_squeeze,.-SHA3_squeeze -.type KeccakF1600_ce,%function -.align 5 -KeccakF1600_ce: -.Loop_ce: - ////////////////////////////////////////////////// Theta - eor3 v25.16b,v20.16b,v15.16b,v10.16b - eor3 v26.16b,v21.16b,v16.16b,v11.16b - eor3 v27.16b,v22.16b,v17.16b,v12.16b - eor3 v28.16b,v23.16b,v18.16b,v13.16b - eor3 v29.16b,v24.16b,v19.16b,v14.16b - eor3 v25.16b,v25.16b, v5.16b,v0.16b - eor3 v26.16b,v26.16b, v6.16b,v1.16b - eor3 v27.16b,v27.16b, v7.16b,v2.16b - eor3 v28.16b,v28.16b, v8.16b,v3.16b - eor3 v29.16b,v29.16b, v9.16b,v4.16b - - rax1 v30.2d,v25.2d,v27.2d // D[1] - rax1 v31.2d,v26.2d,v28.2d // D[2] - rax1 v27.2d,v27.2d,v29.2d // D[3] - rax1 v28.2d,v28.2d,v25.2d // D[4] - rax1 v29.2d,v29.2d,v26.2d // D[0] - - ////////////////////////////////////////////////// Theta+Rho+Pi - xar v25.2d, v1.2d,v30.2d,#64-1 // C[0]=A[2][0] - - xar v1.2d,v6.2d,v30.2d,#64-44 - xar v6.2d,v9.2d,v28.2d,#64-20 - xar v9.2d,v22.2d,v31.2d,#64-61 - xar v22.2d,v14.2d,v28.2d,#64-39 - xar v14.2d,v20.2d,v29.2d,#64-18 - - xar v26.2d, v2.2d,v31.2d,#64-62 // C[1]=A[4][0] - - xar v2.2d,v12.2d,v31.2d,#64-43 - xar v12.2d,v13.2d,v27.2d,#64-25 - xar v13.2d,v19.2d,v28.2d,#64-8 - xar v19.2d,v23.2d,v27.2d,#64-56 - xar v23.2d,v15.2d,v29.2d,#64-41 - - xar v15.2d,v4.2d,v28.2d,#64-27 - - xar v28.2d, v24.2d,v28.2d,#64-14 // D[4]=A[0][4] - xar v24.2d,v21.2d,v30.2d,#64-2 - xar v8.2d,v8.2d,v27.2d,#64-55 // A[1][3]=A[4][1] - xar v4.2d,v16.2d,v30.2d,#64-45 // A[0][4]=A[1][3] - xar v16.2d,v5.2d,v29.2d,#64-36 - - xar v5.2d,v3.2d,v27.2d,#64-28 - - eor v0.16b,v0.16b,v29.16b - - xar v27.2d, v18.2d,v27.2d,#64-21 // D[3]=A[0][3] - xar v3.2d,v17.2d,v31.2d,#64-15 // A[0][3]=A[3][3] - xar v30.2d, v11.2d,v30.2d,#64-10 // D[1]=A[3][2] - xar v31.2d, v7.2d,v31.2d,#64-6 // D[2]=A[2][1] - xar v29.2d, v10.2d,v29.2d,#64-3 // D[0]=A[1][2] - - ////////////////////////////////////////////////// Chi+Iota - bcax v20.16b,v26.16b, v22.16b,v8.16b // A[1][3]=A[4][1] - bcax v21.16b,v8.16b,v23.16b,v22.16b // A[1][3]=A[4][1] - bcax v22.16b,v22.16b,v24.16b,v23.16b - bcax v23.16b,v23.16b,v26.16b, v24.16b - bcax v24.16b,v24.16b,v8.16b,v26.16b // A[1][3]=A[4][1] - - ld1r {v26.2d},[x10],#8 - - bcax v17.16b,v30.16b, v19.16b,v3.16b // A[0][3]=A[3][3] - bcax v18.16b,v3.16b,v15.16b,v19.16b // A[0][3]=A[3][3] - bcax v19.16b,v19.16b,v16.16b,v15.16b - bcax v15.16b,v15.16b,v30.16b, v16.16b - bcax v16.16b,v16.16b,v3.16b,v30.16b // A[0][3]=A[3][3] - - bcax v10.16b,v25.16b, v12.16b,v31.16b - bcax v11.16b,v31.16b, v13.16b,v12.16b - bcax v12.16b,v12.16b,v14.16b,v13.16b - bcax v13.16b,v13.16b,v25.16b, v14.16b - bcax v14.16b,v14.16b,v31.16b, v25.16b - - bcax v7.16b,v29.16b, v9.16b,v4.16b // A[0][4]=A[1][3] - bcax v8.16b,v4.16b,v5.16b,v9.16b // A[0][4]=A[1][3] - bcax v9.16b,v9.16b,v6.16b,v5.16b - bcax v5.16b,v5.16b,v29.16b, v6.16b - bcax v6.16b,v6.16b,v4.16b,v29.16b // A[0][4]=A[1][3] - - bcax v3.16b,v27.16b, v0.16b,v28.16b - bcax v4.16b,v28.16b, v1.16b,v0.16b - bcax v0.16b,v0.16b,v2.16b,v1.16b - bcax v1.16b,v1.16b,v27.16b, v2.16b - bcax v2.16b,v2.16b,v28.16b, v27.16b - - eor v0.16b,v0.16b,v26.16b - - tst x10,#255 - bne .Loop_ce - - ret -.size KeccakF1600_ce,.-KeccakF1600_ce - -.type KeccakF1600_cext,%function -.align 5 -KeccakF1600_cext: -.inst 0xd503233f // paciasp - stp x29,x30,[sp,#-2*__SIZEOF_POINTER__-64]! - add x29,sp,#0 - stp d8,d9,[sp,#2*__SIZEOF_POINTER__+0] // per ABI requirement - stp d10,d11,[sp,#2*__SIZEOF_POINTER__+16] - stp d12,d13,[sp,#2*__SIZEOF_POINTER__+32] - stp d14,d15,[sp,#2*__SIZEOF_POINTER__+48] - ldp d0,d1,[x0,#8*0] - ldp d2,d3,[x0,#8*2] - ldp d4,d5,[x0,#8*4] - ldp d6,d7,[x0,#8*6] - ldp d8,d9,[x0,#8*8] - ldp d10,d11,[x0,#8*10] - ldp d12,d13,[x0,#8*12] - ldp d14,d15,[x0,#8*14] - ldp d16,d17,[x0,#8*16] - ldp d18,d19,[x0,#8*18] - ldp d20,d21,[x0,#8*20] - ldp d22,d23,[x0,#8*22] - ldr d24,[x0,#8*24] - adr x10,iotas - bl KeccakF1600_ce - ldr x30,[sp,#__SIZEOF_POINTER__] - stp d0,d1,[x0,#8*0] - stp d2,d3,[x0,#8*2] - stp d4,d5,[x0,#8*4] - stp d6,d7,[x0,#8*6] - stp d8,d9,[x0,#8*8] - stp d10,d11,[x0,#8*10] - stp d12,d13,[x0,#8*12] - stp d14,d15,[x0,#8*14] - stp d16,d17,[x0,#8*16] - stp d18,d19,[x0,#8*18] - stp d20,d21,[x0,#8*20] - stp d22,d23,[x0,#8*22] - str d24,[x0,#8*24] - - ldp d8,d9,[sp,#2*__SIZEOF_POINTER__+0] - ldp d10,d11,[sp,#2*__SIZEOF_POINTER__+16] - ldp d12,d13,[sp,#2*__SIZEOF_POINTER__+32] - ldp d14,d15,[sp,#2*__SIZEOF_POINTER__+48] - ldr x29,[sp],#2*__SIZEOF_POINTER__+64 -.inst 0xd50323bf // autiasp - ret -.size KeccakF1600_cext,.-KeccakF1600_cext -.globl SHA3_absorb_cext -.type SHA3_absorb_cext,%function -.align 5 -SHA3_absorb_cext: -.inst 0xd503233f // paciasp - stp x29,x30,[sp,#-2*__SIZEOF_POINTER__-64]! - add x29,sp,#0 - stp d8,d9,[sp,#2*__SIZEOF_POINTER__+0] // per ABI requirement - stp d10,d11,[sp,#2*__SIZEOF_POINTER__+16] - stp d12,d13,[sp,#2*__SIZEOF_POINTER__+32] - stp d14,d15,[sp,#2*__SIZEOF_POINTER__+48] - ldp d0,d1,[x0,#8*0] - ldp d2,d3,[x0,#8*2] - ldp d4,d5,[x0,#8*4] - ldp d6,d7,[x0,#8*6] - ldp d8,d9,[x0,#8*8] - ldp d10,d11,[x0,#8*10] - ldp d12,d13,[x0,#8*12] - ldp d14,d15,[x0,#8*14] - ldp d16,d17,[x0,#8*16] - ldp d18,d19,[x0,#8*18] - ldp d20,d21,[x0,#8*20] - ldp d22,d23,[x0,#8*22] - ldr d24,[x0,#8*24] - b .Loop_absorb_ce - -.align 4 -.Loop_absorb_ce: - subs x2,x2,x3 // len - bsz - blo .Labsorbed_ce - - cmp x3,#104 - ld1 {v27.8b,v28.8b,v29.8b,v30.8b},[x1],#32 - eor v0.16b,v0.16b,v27.16b - eor v1.16b,v1.16b,v28.16b - eor v2.16b,v2.16b,v29.16b - eor v3.16b,v3.16b,v30.16b - ld1 {v27.8b,v28.8b,v29.8b,v30.8b},[x1],#32 - eor v4.16b,v4.16b,v27.16b - eor v5.16b,v5.16b,v28.16b - eor v6.16b,v6.16b,v29.16b - eor v7.16b,v7.16b,v30.16b - ld1 {v31.8b},[x1],#8 // A[1][4] ^= *inp++ - eor v8.16b,v8.16b,v31.16b - blo .Lprocess_block_ce - - ld1 {v27.8b,v28.8b,v29.8b,v30.8b},[x1],#32 - eor v9.16b,v9.16b,v27.16b - eor v10.16b,v10.16b,v28.16b - eor v11.16b,v11.16b,v29.16b - eor v12.16b,v12.16b,v30.16b - beq .Lprocess_block_ce - - cmp x3,#144 - ld1 {v27.8b,v28.8b,v29.8b,v30.8b},[x1],#32 - eor v13.16b,v13.16b,v27.16b - eor v14.16b,v14.16b,v28.16b - eor v15.16b,v15.16b,v29.16b - eor v16.16b,v16.16b,v30.16b - blo .Lprocess_block_ce - - ld1 {v31.8b},[x1],#8 // A[3][3] ^= *inp++ - eor v17.16b,v17.16b,v31.16b - beq .Lprocess_block_ce - - ld1 {v28.8b,v29.8b,v30.8b},[x1],#24 - eor v18.16b,v18.16b,v28.16b - eor v19.16b,v19.16b,v29.16b - eor v20.16b,v20.16b,v30.16b - -.Lprocess_block_ce: - adr x10,iotas - bl KeccakF1600_ce - - b .Loop_absorb_ce - -.align 4 -.Labsorbed_ce: - stp d0,d1,[x0,#8*0] - stp d2,d3,[x0,#8*2] - stp d4,d5,[x0,#8*4] - stp d6,d7,[x0,#8*6] - stp d8,d9,[x0,#8*8] - stp d10,d11,[x0,#8*10] - stp d12,d13,[x0,#8*12] - stp d14,d15,[x0,#8*14] - stp d16,d17,[x0,#8*16] - stp d18,d19,[x0,#8*18] - stp d20,d21,[x0,#8*20] - stp d22,d23,[x0,#8*22] - str d24,[x0,#8*24] - add x0,x2,x3 // return value - - ldp d8,d9,[sp,#2*__SIZEOF_POINTER__+0] - ldp d10,d11,[sp,#2*__SIZEOF_POINTER__+16] - ldp d12,d13,[sp,#2*__SIZEOF_POINTER__+32] - ldp d14,d15,[sp,#2*__SIZEOF_POINTER__+48] - ldp x29,x30,[sp],#2*__SIZEOF_POINTER__+64 -.inst 0xd50323bf // autiasp - ret -.size SHA3_absorb_cext,.-SHA3_absorb_cext -.globl SHA3_squeeze_cext -.type SHA3_squeeze_cext,%function -.align 5 -SHA3_squeeze_cext: -.inst 0xd503233f // paciasp - stp x29,x30,[sp,#-2*__SIZEOF_POINTER__]! - add x29,sp,#0 - mov x9,x0 - mov x10,x3 - -.Loop_squeeze_ce: - ldr x4,[x9],#8 - cmp x2,#8 - blo .Lsqueeze_tail_ce -#ifdef __AARCH64EB__ - rev x4,x4 -#endif - str x4,[x1],#8 - beq .Lsqueeze_done_ce - - sub x2,x2,#8 - subs x10,x10,#8 - bhi .Loop_squeeze_ce - - bl KeccakF1600_cext - ldr x30,[sp,#__SIZEOF_POINTER__] - mov x9,x0 - mov x10,x3 - b .Loop_squeeze_ce - -.align 4 -.Lsqueeze_tail_ce: - strb w4,[x1],#1 - lsr x4,x4,#8 - subs x2,x2,#1 - beq .Lsqueeze_done_ce - strb w4,[x1],#1 - lsr x4,x4,#8 - subs x2,x2,#1 - beq .Lsqueeze_done_ce - strb w4,[x1],#1 - lsr x4,x4,#8 - subs x2,x2,#1 - beq .Lsqueeze_done_ce - strb w4,[x1],#1 - lsr x4,x4,#8 - subs x2,x2,#1 - beq .Lsqueeze_done_ce - strb w4,[x1],#1 - lsr x4,x4,#8 - subs x2,x2,#1 - beq .Lsqueeze_done_ce - strb w4,[x1],#1 - lsr x4,x4,#8 - subs x2,x2,#1 - beq .Lsqueeze_done_ce - strb w4,[x1],#1 - -.Lsqueeze_done_ce: - ldr x29,[sp],#2*__SIZEOF_POINTER__ -.inst 0xd50323bf // autiasp - ret -.size SHA3_squeeze_cext,.-SHA3_squeeze_cext -.byte 75,101,99,99,97,107,45,49,54,48,48,32,97,98,115,111,114,98,32,97,110,100,32,115,113,117,101,101,122,101,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,64,100,111,116,45,97,115,109,0 -.align 2 From 49f1a02e4a6c8ff76c195e8da2db73ac616d6334 Mon Sep 17 00:00:00 2001 From: Mario Rugiero Date: Tue, 11 Nov 2025 15:47:20 -0300 Subject: [PATCH 36/40] rewrite keccak_hash in terms of update and finalize --- crates/common/crypto/keccak/mod.rs | 39 +++--------------------------- 1 file changed, 3 insertions(+), 36 deletions(-) diff --git a/crates/common/crypto/keccak/mod.rs b/crates/common/crypto/keccak/mod.rs index 4227cc4d561..8e9204f02ac 100644 --- a/crates/common/crypto/keccak/mod.rs +++ b/crates/common/crypto/keccak/mod.rs @@ -20,42 +20,9 @@ mod imp { } pub fn keccak_hash(data: impl AsRef<[u8]>) -> [u8; 32] { - let mut state = State::default(); - let mut tail_buf = [0; BLOCK_SIZE]; - let mut hash_buf = [0; 32]; - - let tail_len; - match data.as_ref() { - [] => tail_len = 0, - data if data.len() < BLOCK_SIZE => unsafe { - tail_len = data.len(); - tail_buf.get_unchecked_mut(..tail_len).copy_from_slice(data); - }, - data => unsafe { - tail_len = SHA3_absorb(&mut state, data.as_ptr(), data.len(), BLOCK_SIZE); - if tail_len != 0 { - let tail_data = data.get_unchecked(data.len() - tail_len..); - tail_buf - .get_unchecked_mut(..tail_len) - .copy_from_slice(tail_data); - } - }, - } - - unsafe { - *tail_buf.get_unchecked_mut(tail_len) = 0x01; - *tail_buf.get_unchecked_mut(BLOCK_SIZE - 1) |= 0x80; - - SHA3_absorb(&mut state, tail_buf.as_ptr(), tail_buf.len(), BLOCK_SIZE); - SHA3_squeeze( - &mut state, - hash_buf.as_mut_ptr(), - hash_buf.len(), - BLOCK_SIZE, - ); - } - - hash_buf + let mut state = Keccak256::new(); + state.update(data); + state.finalize() } #[derive(Clone)] From 374502d14ce7c40aa8e88176fadcf8b89869c3eb Mon Sep 17 00:00:00 2001 From: Mario Rugiero Date: Tue, 11 Nov 2025 16:12:55 -0300 Subject: [PATCH 37/40] remove unused function --- crates/common/utils.rs | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/crates/common/utils.rs b/crates/common/utils.rs index 2da129c2df0..ab0009c0e9b 100644 --- a/crates/common/utils.rs +++ b/crates/common/utils.rs @@ -79,19 +79,6 @@ pub fn truncate_array(data: [u8; N]) -> [u8; M] res } -/// Splits an array in two at position M. -/// Fails compilation if N != M + L. -pub fn split_array( - data: [u8; N], -) -> ([u8; M], [u8; L]) { - const { assert!(N == M + L) }; - let mut before = [0u8; M]; - let mut after = [0u8; L]; - before.copy_from_slice(&data[..M]); - after.copy_from_slice(&data[M..]); - (before, after) -} - #[cfg(test)] mod test { use ethereum_types::U256; From e82e8093db93dce0272e3cc62da221999c6a35de Mon Sep 17 00:00:00 2001 From: Mario Rugiero Date: Tue, 11 Nov 2025 16:14:50 -0300 Subject: [PATCH 38/40] remove stale comment --- crates/networking/p2p/Cargo.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/crates/networking/p2p/Cargo.toml b/crates/networking/p2p/Cargo.toml index f026745a83e..a3ac7a756ca 100644 --- a/crates/networking/p2p/Cargo.toml +++ b/crates/networking/p2p/Cargo.toml @@ -38,7 +38,6 @@ rocksdb = { workspace = true, optional = true } prometheus = "0.14.0" tokio-stream = "0.1.17" -# Used for incremental hashing due to MAC, intentionally not from workspace. serde_json = "1.0.117" From 6bac6e3ec4cec2cf26891c268b33316423a4dae1 Mon Sep 17 00:00:00 2001 From: Mario Rugiero Date: Tue, 11 Nov 2025 17:30:30 -0300 Subject: [PATCH 39/40] refactor new --- crates/common/crypto/keccak/mod.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/crates/common/crypto/keccak/mod.rs b/crates/common/crypto/keccak/mod.rs index 8e9204f02ac..f3d5e9c06f8 100644 --- a/crates/common/crypto/keccak/mod.rs +++ b/crates/common/crypto/keccak/mod.rs @@ -45,11 +45,7 @@ mod imp { impl Keccak256 { #[inline] pub fn new() -> Self { - Self { - state: State::default(), - tail_buf: [0; BLOCK_SIZE], - tail_len: 0, - } + Self::default() } #[inline] From cdf6afd2e764fc4f555b18e1b1a8e04d52abf241 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Rodr=C3=ADguez=20Chatruc?= <49622509+jrchatruc@users.noreply.github.com> Date: Tue, 11 Nov 2025 17:36:46 -0300 Subject: [PATCH 40/40] Update crates/common/crypto/keccak/README.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Tomás Grüner <47506558+MegaRedHand@users.noreply.github.com> --- crates/common/crypto/keccak/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/common/crypto/keccak/README.md b/crates/common/crypto/keccak/README.md index 9873dff4a63..6bb78ea1524 100644 --- a/crates/common/crypto/keccak/README.md +++ b/crates/common/crypto/keccak/README.md @@ -33,7 +33,7 @@ The implementation is currently rather manual: - Code is generated by running the scripts in the _Cryptogams_ project (currently at commit `680f98c1765a7cb89c193db169ed048599f92186`), as follows: ```shell $ cd cryptogams/arm -$ ./keccak-1600-armv8.pl linux64 keccak1600-armv8.s +$ ./keccak1600-armv8.pl linux64 keccak1600-armv8.s $ cd ../x86_64 $ ./keccak1600-x86_64.pl linux64 keccak1600-x86_64.s ```