From 418abdf5d0651bbf8e33397142df5eb2a0e91d3e Mon Sep 17 00:00:00 2001 From: Alexander Yastrebov Date: Thu, 6 Feb 2025 16:59:19 +0100 Subject: [PATCH 1/2] field: add benchmark for Element.Bytes --- field/fe_bench_test.go | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/field/fe_bench_test.go b/field/fe_bench_test.go index 84fdf05..fb80ca8 100644 --- a/field/fe_bench_test.go +++ b/field/fe_bench_test.go @@ -47,3 +47,11 @@ func BenchmarkMult32(b *testing.B) { x.Mult32(x, 0xaa42aa42) } } + +func BenchmarkBytes(b *testing.B) { + x := new(Element).One() + b.ResetTimer() + for i := 0; i < b.N; i++ { + x.Bytes() + } +} From c8d4b226c4cebf4a097af29b78d72b3340ce76c7 Mon Sep 17 00:00:00 2001 From: Alexander Yastrebov Date: Thu, 6 Feb 2025 17:00:32 +0100 Subject: [PATCH 2/2] field: speed up Element.Bytes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Write bytes in 64-bit chunks made from adjacent limbs. goos: linux goarch: amd64 pkg: filippo.io/edwards25519/field cpu: Intel(R) Core(TM) i5-8350U CPU @ 1.70GHz │ HEAD~1 │ HEAD │ │ sec/op │ sec/op vs base │ Bytes-8 60.31n ± 1% 13.67n ± 2% -77.34% (p=0.000 n=10) │ HEAD~1 │ HEAD │ │ B/op │ B/op vs base │ Bytes-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ ¹ all samples are equal │ HEAD~1 │ HEAD │ │ allocs/op │ allocs/op vs base │ Bytes-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ ¹ all samples are equal --- field/fe.go | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/field/fe.go b/field/fe.go index 6390754..6260b44 100644 --- a/field/fe.go +++ b/field/fe.go @@ -232,18 +232,22 @@ func (v *Element) bytes(out *[32]byte) []byte { t := *v t.reduce() - var buf [8]byte - for i, l := range [5]uint64{t.l0, t.l1, t.l2, t.l3, t.l4} { - bitsOffset := i * 51 - binary.LittleEndian.PutUint64(buf[:], l<= len(out) { - break - } - out[off] |= bb - } - } + // Pack five 51-bit limbs into four 64-bit words: + // + // 255 204 153 102 51 0 + // ├──l4──┼──l3──┼──l2──┼──l1──┼──l0──┤ + // ├───u3───┼───u2───┼───u1───┼───u0───┤ + // 256 192 128 64 0 + + u0 := t.l1<<51 | t.l0 + u1 := t.l2<<(102-64) | t.l1>>(64-51) + u2 := t.l3<<(153-128) | t.l2>>(128-102) + u3 := t.l4<<(204-192) | t.l3>>(192-153) + + binary.LittleEndian.PutUint64(out[0*8:], u0) + binary.LittleEndian.PutUint64(out[1*8:], u1) + binary.LittleEndian.PutUint64(out[2*8:], u2) + binary.LittleEndian.PutUint64(out[3*8:], u3) return out[:] }