|
| 1 | +From 03aa4f6a52e7ecedce09e0336739530261796a90 Mon Sep 17 00:00:00 2001 |
| 2 | +From: Marian Buschsieweke < [email protected]> |
| 3 | +Date: Sun, 30 Nov 2025 20:13:22 +0100 |
| 4 | +Subject: [PATCH] asm_avr.inc: add missing memory clobber |
| 5 | + |
| 6 | +It seems that older versions of GCC did not optimize memory accesses |
| 7 | +across function calls. That made function calls an implicit memory |
| 8 | +barrier, even when the function implementation was part of the current |
| 9 | +compilation unit and the compiler could infer that a function did not |
| 10 | +touch memory. |
| 11 | + |
| 12 | +With inline assembly, the compiler assumes the clobber list to be |
| 13 | +exhaustive. If no memory clobber is given and a function does not touch |
| 14 | +memory except for the inline assembly, the compiler may assume that |
| 15 | +a function does not change memory contents and optimize accordingly. |
| 16 | + |
| 17 | +Adding the required memory clobbers fixes incorrect computations of |
| 18 | +`vli_mmod_fast_secp256r1()` on AVR GCC 15.2.0. |
| 19 | +--- |
| 20 | + asm_avr.inc | 30 +++++++++++++++--------------- |
| 21 | + 1 file changed, 15 insertions(+), 15 deletions(-) |
| 22 | + |
| 23 | +diff --git a/asm_avr.inc b/asm_avr.inc |
| 24 | +index c14bf55..d600858 100644 |
| 25 | +--- a/asm_avr.inc |
| 26 | ++++ b/asm_avr.inc |
| 27 | +@@ -42,9 +42,9 @@ uECC_VLI_API void uECC_vli_clear(uECC_word_t *vli, wordcount_t num_words) { |
| 28 | + "1: \n\t" |
| 29 | + : "+x" (v) |
| 30 | + : [num] "r" (num_words) |
| 31 | +- : |
| 32 | ++ : "memory" |
| 33 | + #if (uECC_MAX_WORDS != uECC_MIN_WORDS) |
| 34 | +- "r30", "r31", "cc" |
| 35 | ++ , "r30", "r31", "cc" |
| 36 | + #endif |
| 37 | + ); |
| 38 | + } |
| 39 | +@@ -67,7 +67,7 @@ uECC_VLI_API void uECC_vli_set(uECC_word_t *dest, const uECC_word_t *src, wordco |
| 40 | + "1: \n\t" |
| 41 | + : "+x" (d), "+y" (src) |
| 42 | + : [num] "r" ((uint8_t)(num_words * 2)) |
| 43 | +- : "r0" |
| 44 | ++ : "r0", "memory" |
| 45 | + #if (uECC_MAX_WORDS != uECC_MIN_WORDS) |
| 46 | + , "r30", "r31", "cc" |
| 47 | + #endif |
| 48 | +@@ -102,10 +102,10 @@ uECC_VLI_API void uECC_vli_rshift1(uECC_word_t *vli, wordcount_t num_words) { |
| 49 | + : "+x" (v) |
| 50 | + #if (uECC_MAX_WORDS != uECC_MIN_WORDS) |
| 51 | + : [num] "r" (num_words), [jump] "r" ((uint8_t)(3 * (num_words - 1))) |
| 52 | +- : "r0", "r30", "r31", "cc" |
| 53 | ++ : "r0", "r30", "r31", "cc", "memory" |
| 54 | + #else |
| 55 | + : [num] "r" (num_words) |
| 56 | +- : "r0", "cc" |
| 57 | ++ : "r0", "cc", "memory" |
| 58 | + #endif |
| 59 | + ); |
| 60 | + } |
| 61 | +@@ -152,7 +152,7 @@ uECC_VLI_API uECC_word_t uECC_vli_add(uECC_word_t *result, |
| 62 | + : "+x" (left), "+y" (right), |
| 63 | + [clb] "=&r" (carry), [rb] "=&r" (right_byte) |
| 64 | + : [result] "r" (r), [num] "r" ((uint8_t)(num_words * 2)) |
| 65 | +- : "r30", "r31", "cc" |
| 66 | ++ : "r30", "r31", "cc", "memory" |
| 67 | + ); |
| 68 | + return carry; |
| 69 | + } |
| 70 | +@@ -199,7 +199,7 @@ uECC_VLI_API uECC_word_t uECC_vli_sub(uECC_word_t *result, |
| 71 | + : "+x" (left), "+y" (right), |
| 72 | + [clb] "=&r" (carry), [rb] "=&r" (right_byte) |
| 73 | + : [result] "r" (r), [num] "r" ((uint8_t)(num_words * 2)) |
| 74 | +- : "r30", "r31", "cc" |
| 75 | ++ : "r30", "r31", "cc", "memory" |
| 76 | + ); |
| 77 | + return carry; |
| 78 | + } |
| 79 | +@@ -256,7 +256,7 @@ uECC_VLI_API void uECC_vli_mult(uECC_word_t *result, |
| 80 | + : "r" (r18) |
| 81 | + : "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", |
| 82 | + "r11", "r12", "r13", "r14", "r15", "r16", "r17", "r19", "r20", |
| 83 | +- "r21", "r22", "r23", "r24", "r25", "cc" |
| 84 | ++ "r21", "r22", "r23", "r24", "r25", "cc", "memory" |
| 85 | + ); |
| 86 | + } |
| 87 | + #define asm_mult 1 |
| 88 | +@@ -308,7 +308,7 @@ uECC_VLI_API void uECC_vli_square(uECC_word_t *result, |
| 89 | + : "r" (r20) |
| 90 | + : "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", |
| 91 | + "r11", "r12", "r13", "r14", "r15", "r16", "r17", "r18", "r19", |
| 92 | +- "r21", "r22", "r23", "r24", "r25", "r28", "r29", "cc" |
| 93 | ++ "r21", "r22", "r23", "r24", "r25", "r28", "r29", "cc", "memory" |
| 94 | + ); |
| 95 | + } |
| 96 | + #define asm_square 1 |
| 97 | +@@ -487,7 +487,7 @@ static void vli_mmod_fast_secp160r1(uECC_word_t *result, uECC_word_t *product) { |
| 98 | + |
| 99 | + : "+x" (product), [carry] "+r" (carry) |
| 100 | + : "y" (result) |
| 101 | +- : "r0", "r18", "r19", "r30", "r31", "cc" |
| 102 | ++ : "r0", "r18", "r19", "r30", "r31", "cc", "memory" |
| 103 | + ); |
| 104 | + |
| 105 | + if (carry > 0) { |
| 106 | +@@ -812,7 +812,7 @@ static void vli_mmod_fast_secp256k1(uECC_word_t *result, uECC_word_t *product) { |
| 107 | + |
| 108 | + : "+x" (product), [carry] "+r" (carry) |
| 109 | + : "y" (result) |
| 110 | +- : "r0", "r18", "r19", "r20", "r21", "r22", "r23", "r24", "r25", "r30", "r31", "cc" |
| 111 | ++ : "r0", "r18", "r19", "r20", "r21", "r22", "r23", "r24", "r25", "r30", "r31", "cc", "memory" |
| 112 | + ); |
| 113 | + |
| 114 | + if (carry > 0) { |
| 115 | +@@ -859,7 +859,7 @@ uECC_VLI_API uECC_word_t uECC_vli_add(uECC_word_t *result, |
| 116 | + : "+z" (r), "+x" (left), "+y" (right), [i] "+r" (num_words), |
| 117 | + [carry] "+r" (carry), [left] "=&r" (left_byte), [right] "=&r" (right_byte) |
| 118 | + : |
| 119 | +- : "cc" |
| 120 | ++ : "cc", "memory" |
| 121 | + ); |
| 122 | + return carry; |
| 123 | + } |
| 124 | +@@ -892,7 +892,7 @@ uECC_VLI_API uECC_word_t uECC_vli_sub(uECC_word_t *result, |
| 125 | + : "+z" (r), "+x" (left), "+y" (right), [i] "+r" (num_words), |
| 126 | + [borrow] "+r" (borrow), [left] "=&r" (left_byte), [right] "=&r" (right_byte) |
| 127 | + : |
| 128 | +- : "cc" |
| 129 | ++ : "cc", "memory" |
| 130 | + ); |
| 131 | + return borrow; |
| 132 | + } |
| 133 | +@@ -991,7 +991,7 @@ uECC_VLI_API void uECC_vli_mult(uECC_word_t *result, |
| 134 | + [zero] "+r" (zero), [num] "+r" (num_words), |
| 135 | + [k] "=&r" (k), [i] "=&r" (i) |
| 136 | + : |
| 137 | +- : "r0", "cc" |
| 138 | ++ : "r0", "cc", "memory" |
| 139 | + ); |
| 140 | + } |
| 141 | + #define asm_mult 1 |
| 142 | +@@ -1080,7 +1080,7 @@ uECC_VLI_API void uECC_vli_square(uECC_word_t *result, |
| 143 | + [k] "=&a" (k) |
| 144 | + : [orig] "r" (left), [max] "r" ((uint8_t)(2 * num_words)), |
| 145 | + [num] "r" (num_words) |
| 146 | +- : "r0", "r26", "r27", "r30", "r31", "cc" |
| 147 | ++ : "r0", "r26", "r27", "r30", "r31", "cc", "memory" |
| 148 | + ); |
| 149 | + } |
| 150 | + #define asm_square 1 |
| 151 | +-- |
| 152 | +2.52.0 |
| 153 | + |
0 commit comments