Skip to content

Commit b1d0619

Browse files
jfechermichaeljkleinTomAFrench
authored
fix: Handle multi-byte utf8 characters in formatter (#6118)
# Description ## Problem\* Resolves #6108 ## Summary\* ## Additional Context ## Documentation\* Check one: - [x] No documentation needed. - [ ] Documentation included in this PR. - [ ] **[For Experimental Features]** Documentation to be submitted in a separate PR. # PR Checklist\* - [x] I have tested the changes locally. - [x] I have formatted the changes with [Prettier](https://prettier.io/) and/or `cargo fmt` on default settings. --------- Co-authored-by: Michael Klein <[email protected]> Co-authored-by: Michael J Klein <[email protected]> Co-authored-by: TomAFrench <[email protected]>
1 parent 5b1c896 commit b1d0619

11 files changed

Lines changed: 135 additions & 39 deletions

File tree

.github/workflows/test-js-packages.yml

Lines changed: 61 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ jobs:
183183
with:
184184
name: acvm-js
185185
path: ./acvm-repo/acvm_js
186-
186+
187187
- name: Set up test environment
188188
uses: ./.github/actions/setup
189189

@@ -230,13 +230,13 @@ jobs:
230230
steps:
231231
- name: Checkout
232232
uses: actions/checkout@v4
233-
233+
234234
- name: Download nargo binary
235235
uses: actions/download-artifact@v4
236236
with:
237237
name: nargo
238238
path: ./nargo
239-
239+
240240
- name: Download artifact
241241
uses: actions/download-artifact@v4
242242
with:
@@ -248,7 +248,7 @@ jobs:
248248
with:
249249
name: noirc_abi_wasm
250250
path: ./tooling/noirc_abi_wasm
251-
251+
252252
- name: Set nargo on PATH
253253
run: |
254254
nargo_binary="${{ github.workspace }}/nargo/nargo"
@@ -336,13 +336,13 @@ jobs:
336336
with:
337337
name: acvm-js
338338
path: ./acvm-repo/acvm_js
339-
339+
340340
- name: Download noirc_abi package artifact
341341
uses: actions/download-artifact@v4
342342
with:
343343
name: noirc_abi_wasm
344344
path: ./tooling/noirc_abi_wasm
345-
345+
346346
- name: Set nargo on PATH
347347
run: |
348348
nargo_binary="${{ github.workspace }}/nargo/nargo"
@@ -468,7 +468,7 @@ jobs:
468468
working-directory: ./compiler/integration-tests
469469
run: |
470470
yarn test:browser
471-
471+
472472
test-examples:
473473
name: Example scripts
474474
runs-on: ubuntu-latest
@@ -509,6 +509,59 @@ jobs:
509509
working-directory: ./examples/codegen_verifier
510510
run: ./test.sh
511511

512+
external-repo-checks:
513+
needs: [build-nargo]
514+
runs-on: ubuntu-latest
515+
# Only run when 'run-external-checks' label is present
516+
if: contains(github.event.pull_request.labels.*.name, 'run-external-checks')
517+
timeout-minutes: 30
518+
strategy:
519+
fail-fast: false
520+
matrix:
521+
project:
522+
# Disabled as these are currently failing with many visibility errors
523+
# - { repo: AztecProtocol/aztec-nr, path: ./ }
524+
# - { repo: AztecProtocol/aztec-packages, path: ./noir-projects/noir-contracts }
525+
# Disabled as aztec-packages requires a setup-step in order to generate a `Nargo.toml`
526+
#- { repo: AztecProtocol/aztec-packages, path: ./noir-projects/noir-protocol-circuits }
527+
- { repo: zac-williamson/noir-edwards, path: ./, ref: 037e44b2ee8557c51f6aef9bb9d63ea9e32722d1 }
528+
# TODO: Enable these once they're passing against master again.
529+
# - { repo: zac-williamson/noir-bignum, path: ./, ref: 030c2acce1e6b97c44a3bbbf3429ed96f20d72d3 }
530+
# - { repo: vlayer-xyz/monorepo, path: ./, ref: ee46af88c025863872234eb05d890e1e447907cb }
531+
# - { repo: hashcloak/noir-bigint, path: ./, ref: 940ddba3a5201b508e7b37a2ef643551afcf5ed8 }
532+
name: Check external repo - ${{ matrix.project.repo }}
533+
steps:
534+
- name: Checkout
535+
uses: actions/checkout@v4
536+
with:
537+
repository: ${{ matrix.project.repo }}
538+
path: test-repo
539+
ref: ${{ matrix.project.ref }}
540+
541+
- name: Download nargo binary
542+
uses: actions/download-artifact@v4
543+
with:
544+
name: nargo
545+
path: ./nargo
546+
547+
- name: Set nargo on PATH
548+
run: |
549+
nargo_binary="${{ github.workspace }}/nargo/nargo"
550+
chmod +x $nargo_binary
551+
echo "$(dirname $nargo_binary)" >> $GITHUB_PATH
552+
export PATH="$PATH:$(dirname $nargo_binary)"
553+
nargo -V
554+
555+
- name: Remove requirements on compiler version
556+
working-directory: ./test-repo
557+
run: |
558+
# Github actions seems to not expand "**" in globs by default.
559+
shopt -s globstar
560+
sed -i '/^compiler_version/d' ./**/Nargo.toml
561+
- name: Run nargo check
562+
working-directory: ./test-repo/${{ matrix.project.path }}
563+
run: nargo check
564+
512565
# This is a job which depends on all test jobs and reports the overall status.
513566
# This allows us to add/remove test jobs without having to update the required workflows.
514567
tests-end:
@@ -526,7 +579,7 @@ jobs:
526579
- test-integration-node
527580
- test-integration-browser
528581
- test-examples
529-
582+
530583
steps:
531584
- name: Report overall success
532585
run: |

compiler/noirc_frontend/src/lexer/errors.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@ pub enum LexerErrorKind {
3434
InvalidEscape { escaped: char, span: Span },
3535
#[error("Invalid quote delimiter `{delimiter}`, valid delimiters are `{{`, `[`, and `(`")]
3636
InvalidQuoteDelimiter { delimiter: SpannedToken },
37+
#[error("Non-ASCII characters are invalid in comments")]
38+
NonAsciiComment { span: Span },
3739
#[error("Expected `{end_delim}` to close this {start_delim}")]
3840
UnclosedQuote { start_delim: SpannedToken, end_delim: Token },
3941
}
@@ -65,6 +67,7 @@ impl LexerErrorKind {
6567
LexerErrorKind::UnterminatedStringLiteral { span } => *span,
6668
LexerErrorKind::InvalidEscape { span, .. } => *span,
6769
LexerErrorKind::InvalidQuoteDelimiter { delimiter } => delimiter.to_span(),
70+
LexerErrorKind::NonAsciiComment { span, .. } => *span,
6871
LexerErrorKind::UnclosedQuote { start_delim, .. } => start_delim.to_span(),
6972
}
7073
}
@@ -124,6 +127,9 @@ impl LexerErrorKind {
124127
LexerErrorKind::InvalidQuoteDelimiter { delimiter } => {
125128
(format!("Invalid quote delimiter `{delimiter}`"), "Valid delimiters are `{`, `[`, and `(`".to_string(), delimiter.to_span())
126129
},
130+
LexerErrorKind::NonAsciiComment { span } => {
131+
("Non-ASCII character in comment".to_string(), "Invalid comment character: only ASCII is currently supported.".to_string(), *span)
132+
}
127133
LexerErrorKind::UnclosedQuote { start_delim, end_delim } => {
128134
("Unclosed `quote` expression".to_string(), format!("Expected a `{end_delim}` to close this `{start_delim}`"), start_delim.to_span())
129135
}

compiler/noirc_frontend/src/lexer/lexer.rs

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -606,6 +606,11 @@ impl<'a> Lexer<'a> {
606606
};
607607
let comment = self.eat_while(None, |ch| ch != '\n');
608608

609+
if !comment.is_ascii() {
610+
let span = Span::from(start..self.position);
611+
return Err(LexerErrorKind::NonAsciiComment { span });
612+
}
613+
609614
if doc_style.is_none() && self.skip_comments {
610615
return self.next_token();
611616
}
@@ -651,6 +656,11 @@ impl<'a> Lexer<'a> {
651656
}
652657

653658
if depth == 0 {
659+
if !content.is_ascii() {
660+
let span = Span::from(start..self.position);
661+
return Err(LexerErrorKind::NonAsciiComment { span });
662+
}
663+
654664
if doc_style.is_none() && self.skip_comments {
655665
return self.next_token();
656666
}
@@ -1331,6 +1341,7 @@ mod tests {
13311341

13321342
Err(LexerErrorKind::InvalidIntegerLiteral { .. })
13331343
| Err(LexerErrorKind::UnexpectedCharacter { .. })
1344+
| Err(LexerErrorKind::NonAsciiComment { .. })
13341345
| Err(LexerErrorKind::UnterminatedBlockComment { .. }) => {
13351346
expected_token_found = true;
13361347
}
@@ -1389,4 +1400,17 @@ mod tests {
13891400
}
13901401
}
13911402
}
1403+
1404+
#[test]
1405+
fn test_non_ascii_comments() {
1406+
let cases = vec!["// 🙂", "// schön", "/* in the middle 🙂 of a comment */"];
1407+
1408+
for source in cases {
1409+
let mut lexer = Lexer::new(source);
1410+
assert!(
1411+
lexer.any(|token| matches!(token, Err(LexerErrorKind::NonAsciiComment { .. }))),
1412+
"Expected NonAsciiComment error"
1413+
);
1414+
}
1415+
}
13921416
}

noir_stdlib/src/collections/map.nr

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ use crate::default::Default;
44
use crate::hash::{Hash, Hasher, BuildHasher};
55
use crate::collections::bounded_vec::BoundedVec;
66

7-
// We use load factor α_max = 0.75.
7+
// We use load factor alpha_max = 0.75.
88
// Upon exceeding it, assert will fail in order to inform the user
99
// about performance degradation, so that he can adjust the capacity.
1010
global MAX_LOAD_FACTOR_NUMERATOR = 3;
@@ -624,7 +624,7 @@ impl<K, V, let N: u32, B> HashMap<K, V, N, B> {
624624
(hash + (attempt + attempt * attempt) / 2) % N
625625
}
626626

627-
// Amount of elements in the table in relation to available slots exceeds α_max.
627+
// Amount of elements in the table in relation to available slots exceeds alpha_max.
628628
// To avoid a comparatively more expensive division operation
629629
// we conduct cross-multiplication instead.
630630
// n / m >= MAX_LOAD_FACTOR_NUMERATOR / MAX_LOAD_FACTOR_DEN0MINATOR

noir_stdlib/src/ec/mod.nr

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,15 @@
33
// ========
44
// The following three elliptic curve representations are admissible:
55
mod tecurve; // Twisted Edwards curves
6-
mod swcurve; // Elliptic curves in Short Weierstraß form
6+
mod swcurve; // Elliptic curves in Short Weierstrass form
77
mod montcurve; // Montgomery curves
88
mod consts; // Commonly used curve presets
99
//
1010
// Note that Twisted Edwards and Montgomery curves are (birationally) equivalent, so that
11-
// they may be freely converted between one another, whereas Short Weierstraß curves are
11+
// they may be freely converted between one another, whereas Short Weierstrass curves are
1212
// more general. Diagramatically:
1313
//
14-
// tecurve == montcurve swcurve
14+
// tecurve == montcurve `subset` swcurve
1515
//
1616
// Each module is further divided into two submodules, 'affine' and 'curvegroup', depending
1717
// on the preferred coordinate representation. Affine coordinates are none other than the usual
@@ -47,7 +47,7 @@ mod consts; // Commonly used curve presets
4747
// coordinates by calling the `into_group` (resp. `into_affine`) method on them. Finally,
4848
// Points may be freely mapped between their respective Twisted Edwards and Montgomery
4949
// representations by calling the `into_montcurve` or `into_tecurve` methods. For mappings
50-
// between Twisted Edwards/Montgomery curves and Short Weierstraß curves, see the Curve section
50+
// between Twisted Edwards/Montgomery curves and Short Weierstrass curves, see the Curve section
5151
// below, as the underlying mappings are those of curves rather than ambient spaces.
5252
// As a rule, Points in affine (or CurveGroup) coordinates are mapped to Points in affine
5353
// (resp. CurveGroup) coordinates.
@@ -91,21 +91,21 @@ mod consts; // Commonly used curve presets
9191
// Curve configurations may also be converted between different curve representations by calling the `into_swcurve`,
9292
// `into_montcurve` and `into_tecurve` methods subject to the relation between the curve representations mentioned
9393
// above. Note that it is possible to map Points from a Twisted Edwards/Montgomery curve to the corresponding
94-
// Short Weierstraß representation and back, and the methods to do so are exposed as `map_into_swcurve` and
94+
// Short Weierstrass representation and back, and the methods to do so are exposed as `map_into_swcurve` and
9595
// `map_from_swcurve`, which each take one argument, the point to be mapped.
9696
//
9797
// Curve maps
9898
// ==========
9999
// There are a few different ways of mapping Field elements to elliptic curves. Here we provide the simplified
100100
// Shallue-van de Woestijne-Ulas and Elligator 2 methods, the former being applicable to all curve types
101-
// provided above subject to the constraint that the coefficients of the corresponding Short Weierstraß curve satisfies
101+
// provided above subject to the constraint that the coefficients of the corresponding Short Weierstrass curve satisfies
102102
// a*b != 0 and the latter being applicable to Montgomery and Twisted Edwards curves subject to the constraint that
103103
// the coefficients of the corresponding Montgomery curve satisfy j*k != 0 and (j^2 - 4)/k^2 is non-square.
104104
//
105105
// The simplified Shallue-van de Woestijne-Ulas method is exposed as the method `swu_map` on the Curve configuration and
106106
// depends on two parameters, a Field element z != -1 for which g(x) - z is irreducible over Field and g(b/(z*a)) is
107107
// square, where g(x) = x^3 + a*x + b is the right-hand side of the defining equation of the corresponding Short
108-
// Weierstraß curve, and a Field element u to be mapped onto the curve. For example, in the case of bjj_affine above,
108+
// Weierstrass curve, and a Field element u to be mapped onto the curve. For example, in the case of bjj_affine above,
109109
// it may be determined using the scripts provided at <https://github.com/cfrg/draft-irtf-cfrg-hash-to-curve> that z = 5.
110110
//
111111
// The Elligator 2 method is exposed as the method `elligator2_map` on the Curve configurations of Montgomery and

noir_stdlib/src/ec/montcurve.nr

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ mod affine {
145145
TECurve::new((j + 2) / k, (j - 2) / k, gen.into_tecurve())
146146
}
147147

148-
// Conversion to equivalent Short Weierstraß curve
148+
// Conversion to equivalent Short Weierstrass curve
149149
pub fn into_swcurve(self) -> SWCurve {
150150
let j = self.j;
151151
let k = self.k;
@@ -155,7 +155,7 @@ mod affine {
155155
SWCurve::new(a0, b0, self.map_into_swcurve(self.gen))
156156
}
157157

158-
// Point mapping into equivalent Short Weierstraß curve
158+
// Point mapping into equivalent Short Weierstrass curve
159159
pub fn map_into_swcurve(self, p: Point) -> SWPoint {
160160
if p.is_zero() {
161161
SWPoint::zero()
@@ -164,7 +164,7 @@ mod affine {
164164
}
165165
}
166166

167-
// Point mapping from equivalent Short Weierstraß curve
167+
// Point mapping from equivalent Short Weierstrass curve
168168
fn map_from_swcurve(self, p: SWPoint) -> Point {
169169
let SWPoint {x, y, infty} = p;
170170
let j = self.j;
@@ -347,7 +347,7 @@ mod curvegroup {
347347
TECurve::new((j + 2) / k, (j - 2) / k, gen.into_tecurve())
348348
}
349349

350-
// Conversion to equivalent Short Weierstraß curve
350+
// Conversion to equivalent Short Weierstrass curve
351351
fn into_swcurve(self) -> SWCurve {
352352
let j = self.j;
353353
let k = self.k;
@@ -357,12 +357,12 @@ mod curvegroup {
357357
SWCurve::new(a0, b0, self.map_into_swcurve(self.gen))
358358
}
359359

360-
// Point mapping into equivalent Short Weierstraß curve
360+
// Point mapping into equivalent Short Weierstrass curve
361361
pub fn map_into_swcurve(self, p: Point) -> SWPoint {
362362
self.into_affine().map_into_swcurve(p.into_affine()).into_group()
363363
}
364364

365-
// Point mapping from equivalent Short Weierstraß curve
365+
// Point mapping from equivalent Short Weierstrass curve
366366
fn map_from_swcurve(self, p: SWPoint) -> Point {
367367
self.into_affine().map_from_swcurve(p.into_affine()).into_group()
368368
}

noir_stdlib/src/ec/swcurve.nr

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
mod affine {
2-
// Affine representation of Short Weierstraß curves
2+
// Affine representation of Short Weierstrass curves
33
// Points are represented by two-dimensional Cartesian coordinates.
44
// Group operations are implemented in terms of those in CurveGroup (in this case, extended Twisted Edwards) coordinates
55
// for reasons of efficiency, cf. <https://en.wikibooks.org/wiki/Cryptography/Prime_Curve/Jacobian_Coordinates>.
@@ -10,7 +10,7 @@ mod affine {
1010
use crate::cmp::Eq;
1111

1212
// Curve specification
13-
pub struct Curve { // Short Weierstraß curve
13+
pub struct Curve { // Short Weierstrass curve
1414
// Coefficients in defining equation y^2 = x^3 + ax + b
1515
a: Field,
1616
b: Field,
@@ -187,14 +187,14 @@ mod affine {
187187
}
188188

189189
mod curvegroup {
190-
// CurveGroup representation of Weierstraß curves
190+
// CurveGroup representation of Weierstrass curves
191191
// Points are represented by three-dimensional Jacobian coordinates.
192192
// See <https://en.wikibooks.org/wiki/Cryptography/Prime_Curve/Jacobian_Coordinates> for details.
193193
use crate::ec::swcurve::affine;
194194
use crate::cmp::Eq;
195195

196196
// Curve specification
197-
pub struct Curve { // Short Weierstraß curve
197+
pub struct Curve { // Short Weierstrass curve
198198
// Coefficients in defining equation y^2 = x^3 + axz^4 + bz^6
199199
a: Field,
200200
b: Field,

0 commit comments

Comments
 (0)