Skip to content

Commit 5b562e6

Browse files
committed
pkg/micro-ecc: fix on GCC 15.2.0 for AVR
- add a work around for a (presumably) compiler bug in GCC 15.2.0 register allocation code by manually a function argument into the Y register. - add missing `"memory"` clobber to inline asm implementations. With GCC 15.2.0 the `vli_mmod_fast_secp256r1()` function would not compute correctly without the `"memory"` clobber.
1 parent 5196bac commit 5b562e6

File tree

2 files changed

+204
-0
lines changed

2 files changed

+204
-0
lines changed
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
From 08db99075ca9e82ed136df20bfc9855623c9b361 Mon Sep 17 00:00:00 2001
2+
From: Marian Buschsieweke <[email protected]>
3+
Date: Sun, 30 Nov 2025 11:29:21 +0100
4+
Subject: [PATCH] asm_avr.inc: fix compilation on GCC 15.2.0
5+
6+
Compilation with GCC 15.2.0 failed with:
7+
8+
asm_avr.inc:915:5: error: cannot find a register in class 'POINTER_Y_REGS' while reloading 'asm'
9+
10+
This works around the issue by manually placing right into the Y
11+
register before the inline assembly.
12+
---
13+
asm_avr.inc | 9 +++++++--
14+
1 file changed, 7 insertions(+), 2 deletions(-)
15+
16+
diff --git a/asm_avr.inc b/asm_avr.inc
17+
index c14bf55..6259d36 100644
18+
--- a/asm_avr.inc
19+
+++ b/asm_avr.inc
20+
@@ -905,13 +905,18 @@ uECC_VLI_API void uECC_vli_mult(uECC_word_t *result,
21+
const uECC_word_t *left,
22+
const uECC_word_t *right,
23+
wordcount_t num_words) {
24+
- volatile uECC_word_t *r = result;
25+
uint8_t r0 = 0;
26+
uint8_t r1 = 0;
27+
uint8_t r2 = 0;
28+
uint8_t zero = 0;
29+
uint8_t k, i;
30+
31+
+ /* manually allocate right to register Y to work around bug in GCC 15.2 */
32+
+ register uint8_t yl asm("r28");
33+
+ register uint8_t yh asm("r29");
34+
+ yl = (uint8_t)(unsigned)right;
35+
+ yh = (uint8_t)(((unsigned)right) >> 8);
36+
+
37+
__asm__ volatile (
38+
"ldi %[k], 1 \n\t" /* k = 1; k < num_words; ++k */
39+
40+
@@ -986,7 +991,7 @@ uECC_VLI_API void uECC_vli_mult(uECC_word_t *result,
41+
"st z+, %[r0] \n\t" /* Store last result byte. */
42+
"eor r1, r1 \n\t" /* fix r1 to be 0 again */
43+
44+
- : "+z" (result), "+x" (left), "+y" (right),
45+
+ : "+z" (result), "+x" (left), "+r" (yl), "+r" (yh),
46+
[r0] "+r" (r0), [r1] "+r" (r1), [r2] "+r" (r2),
47+
[zero] "+r" (zero), [num] "+r" (num_words),
48+
[k] "=&r" (k), [i] "=&r" (i)
49+
--
50+
2.52.0
51+
Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
From 03aa4f6a52e7ecedce09e0336739530261796a90 Mon Sep 17 00:00:00 2001
2+
From: Marian Buschsieweke <[email protected]>
3+
Date: Sun, 30 Nov 2025 20:13:22 +0100
4+
Subject: [PATCH] asm_avr.inc: add missing memory clobber
5+
6+
It seems that older versions of GCC did not optimize memory accesses
7+
across function calls. That made function calls an implicit memory
8+
barrier, even when the function implementation was part of the current
9+
compilation unit and the compiler could infer that a function did not
10+
touch memory.
11+
12+
With inline assembly, the compiler assumes the clobber list to be
13+
exhaustive. If no memory clobber is given and a function does not touch
14+
memory except for the inline assembly, the compiler may assume that
15+
a function does not change memory contents and optimize accordingly.
16+
17+
Adding the required memory clobbers fixes incorrect computations of
18+
`vli_mmod_fast_secp256r1()` on AVR GCC 15.2.0.
19+
---
20+
asm_avr.inc | 30 +++++++++++++++---------------
21+
1 file changed, 15 insertions(+), 15 deletions(-)
22+
23+
diff --git a/asm_avr.inc b/asm_avr.inc
24+
index c14bf55..d600858 100644
25+
--- a/asm_avr.inc
26+
+++ b/asm_avr.inc
27+
@@ -42,9 +42,9 @@ uECC_VLI_API void uECC_vli_clear(uECC_word_t *vli, wordcount_t num_words) {
28+
"1: \n\t"
29+
: "+x" (v)
30+
: [num] "r" (num_words)
31+
- :
32+
+ : "memory"
33+
#if (uECC_MAX_WORDS != uECC_MIN_WORDS)
34+
- "r30", "r31", "cc"
35+
+ , "r30", "r31", "cc"
36+
#endif
37+
);
38+
}
39+
@@ -67,7 +67,7 @@ uECC_VLI_API void uECC_vli_set(uECC_word_t *dest, const uECC_word_t *src, wordco
40+
"1: \n\t"
41+
: "+x" (d), "+y" (src)
42+
: [num] "r" ((uint8_t)(num_words * 2))
43+
- : "r0"
44+
+ : "r0", "memory"
45+
#if (uECC_MAX_WORDS != uECC_MIN_WORDS)
46+
, "r30", "r31", "cc"
47+
#endif
48+
@@ -102,10 +102,10 @@ uECC_VLI_API void uECC_vli_rshift1(uECC_word_t *vli, wordcount_t num_words) {
49+
: "+x" (v)
50+
#if (uECC_MAX_WORDS != uECC_MIN_WORDS)
51+
: [num] "r" (num_words), [jump] "r" ((uint8_t)(3 * (num_words - 1)))
52+
- : "r0", "r30", "r31", "cc"
53+
+ : "r0", "r30", "r31", "cc", "memory"
54+
#else
55+
: [num] "r" (num_words)
56+
- : "r0", "cc"
57+
+ : "r0", "cc", "memory"
58+
#endif
59+
);
60+
}
61+
@@ -152,7 +152,7 @@ uECC_VLI_API uECC_word_t uECC_vli_add(uECC_word_t *result,
62+
: "+x" (left), "+y" (right),
63+
[clb] "=&r" (carry), [rb] "=&r" (right_byte)
64+
: [result] "r" (r), [num] "r" ((uint8_t)(num_words * 2))
65+
- : "r30", "r31", "cc"
66+
+ : "r30", "r31", "cc", "memory"
67+
);
68+
return carry;
69+
}
70+
@@ -199,7 +199,7 @@ uECC_VLI_API uECC_word_t uECC_vli_sub(uECC_word_t *result,
71+
: "+x" (left), "+y" (right),
72+
[clb] "=&r" (carry), [rb] "=&r" (right_byte)
73+
: [result] "r" (r), [num] "r" ((uint8_t)(num_words * 2))
74+
- : "r30", "r31", "cc"
75+
+ : "r30", "r31", "cc", "memory"
76+
);
77+
return carry;
78+
}
79+
@@ -256,7 +256,7 @@ uECC_VLI_API void uECC_vli_mult(uECC_word_t *result,
80+
: "r" (r18)
81+
: "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10",
82+
"r11", "r12", "r13", "r14", "r15", "r16", "r17", "r19", "r20",
83+
- "r21", "r22", "r23", "r24", "r25", "cc"
84+
+ "r21", "r22", "r23", "r24", "r25", "cc", "memory"
85+
);
86+
}
87+
#define asm_mult 1
88+
@@ -308,7 +308,7 @@ uECC_VLI_API void uECC_vli_square(uECC_word_t *result,
89+
: "r" (r20)
90+
: "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10",
91+
"r11", "r12", "r13", "r14", "r15", "r16", "r17", "r18", "r19",
92+
- "r21", "r22", "r23", "r24", "r25", "r28", "r29", "cc"
93+
+ "r21", "r22", "r23", "r24", "r25", "r28", "r29", "cc", "memory"
94+
);
95+
}
96+
#define asm_square 1
97+
@@ -487,7 +487,7 @@ static void vli_mmod_fast_secp160r1(uECC_word_t *result, uECC_word_t *product) {
98+
99+
: "+x" (product), [carry] "+r" (carry)
100+
: "y" (result)
101+
- : "r0", "r18", "r19", "r30", "r31", "cc"
102+
+ : "r0", "r18", "r19", "r30", "r31", "cc", "memory"
103+
);
104+
105+
if (carry > 0) {
106+
@@ -812,7 +812,7 @@ static void vli_mmod_fast_secp256k1(uECC_word_t *result, uECC_word_t *product) {
107+
108+
: "+x" (product), [carry] "+r" (carry)
109+
: "y" (result)
110+
- : "r0", "r18", "r19", "r20", "r21", "r22", "r23", "r24", "r25", "r30", "r31", "cc"
111+
+ : "r0", "r18", "r19", "r20", "r21", "r22", "r23", "r24", "r25", "r30", "r31", "cc", "memory"
112+
);
113+
114+
if (carry > 0) {
115+
@@ -859,7 +859,7 @@ uECC_VLI_API uECC_word_t uECC_vli_add(uECC_word_t *result,
116+
: "+z" (r), "+x" (left), "+y" (right), [i] "+r" (num_words),
117+
[carry] "+r" (carry), [left] "=&r" (left_byte), [right] "=&r" (right_byte)
118+
:
119+
- : "cc"
120+
+ : "cc", "memory"
121+
);
122+
return carry;
123+
}
124+
@@ -892,7 +892,7 @@ uECC_VLI_API uECC_word_t uECC_vli_sub(uECC_word_t *result,
125+
: "+z" (r), "+x" (left), "+y" (right), [i] "+r" (num_words),
126+
[borrow] "+r" (borrow), [left] "=&r" (left_byte), [right] "=&r" (right_byte)
127+
:
128+
- : "cc"
129+
+ : "cc", "memory"
130+
);
131+
return borrow;
132+
}
133+
@@ -991,7 +991,7 @@ uECC_VLI_API void uECC_vli_mult(uECC_word_t *result,
134+
[zero] "+r" (zero), [num] "+r" (num_words),
135+
[k] "=&r" (k), [i] "=&r" (i)
136+
:
137+
- : "r0", "cc"
138+
+ : "r0", "cc", "memory"
139+
);
140+
}
141+
#define asm_mult 1
142+
@@ -1080,7 +1080,7 @@ uECC_VLI_API void uECC_vli_square(uECC_word_t *result,
143+
[k] "=&a" (k)
144+
: [orig] "r" (left), [max] "r" ((uint8_t)(2 * num_words)),
145+
[num] "r" (num_words)
146+
- : "r0", "r26", "r27", "r30", "r31", "cc"
147+
+ : "r0", "r26", "r27", "r30", "r31", "cc", "memory"
148+
);
149+
}
150+
#define asm_square 1
151+
--
152+
2.52.0
153+

0 commit comments

Comments
 (0)