Skip to content

Commit 447b05c

Browse files
committed
fix(parser): Report errors for -_1
This also - fixes some double-reporting of errors - adjusts where errors are shown for `1__2` Fixes #1103
1 parent 5a7b742 commit 447b05c

9 files changed

Lines changed: 68 additions & 103 deletions

File tree

crates/toml/tests/snapshots/invalid/float/exp-double-us.stderr

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,3 @@ TOML parse error at line 1, column 19
33
1 | exp-double-us = 1e__23
44
| ^
55
`_` may only go between digits, expected nothing
6-
7-
---
8-
TOML parse error at line 1, column 20
9-
|
10-
1 | exp-double-us = 1e__23
11-
| ^
12-
`_` may only go between digits, expected nothing
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
TOML parse error at line 1, column 15
1+
TOML parse error at line 1, column 14
22
|
33
1 | double-us = 1__23
4-
| ^
4+
| ^
55
`_` may only go between digits, expected nothing

crates/toml/tests/snapshots/invalid/integer/us-after-bin.stderr

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,3 @@ TOML parse error at line 1, column 18
33
1 | us-after-bin = 0b_1
44
| ^
55
`_` may only go between digits, expected nothing
6-
7-
---
8-
TOML parse error at line 1, column 18
9-
|
10-
1 | us-after-bin = 0b_1
11-
| ^
12-
`_` may only go between digits, expected nothing

crates/toml/tests/snapshots/invalid/integer/us-after-hex.stderr

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,3 @@ TOML parse error at line 1, column 18
33
1 | us-after-hex = 0x_1
44
| ^
55
`_` may only go between digits, expected nothing
6-
7-
---
8-
TOML parse error at line 1, column 18
9-
|
10-
1 | us-after-hex = 0x_1
11-
| ^
12-
`_` may only go between digits, expected nothing

crates/toml/tests/snapshots/invalid/integer/us-after-oct.stderr

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,3 @@ TOML parse error at line 1, column 18
33
1 | us-after-oct = 0o_1
44
| ^
55
`_` may only go between digits, expected nothing
6-
7-
---
8-
TOML parse error at line 1, column 18
9-
|
10-
1 | us-after-oct = 0o_1
11-
| ^
12-
`_` may only go between digits, expected nothing
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
TOML parse error at line 1, column 15
1+
TOML parse error at line 1, column 14
22
|
33
1 | double-us = 1__23
4-
| ^
4+
| ^
55
`_` may only go between digits, expected nothing

crates/toml_parser/src/decoder/scalar.rs

Lines changed: 36 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -583,7 +583,7 @@ fn ensure_radixed_value(
583583
}
584584

585585
fn decode_float_or_integer<'i>(
586-
stream: &'i str,
586+
mut stream: &'i str,
587587
raw: Raw<'i>,
588588
kind: ScalarKind,
589589
output: &mut dyn StringBuilder<'i>,
@@ -593,84 +593,55 @@ fn decode_float_or_integer<'i>(
593593

594594
let underscore = "_";
595595

596-
if has_underscore(stream) {
597-
if stream.starts_with(underscore) {
598-
let start = stream.offset_from(&raw.as_str());
599-
let end = start + underscore.len();
600-
error.report_error(
601-
ParseError::new("`_` may only go between digits")
602-
.with_context(Span::new_unchecked(0, raw.len()))
603-
.with_expected(&[])
604-
.with_unexpected(Span::new_unchecked(start, end)),
605-
);
606-
}
607-
if 1 < stream.len() && stream.ends_with(underscore) {
608-
let end = raw.len();
609-
let start = end - underscore.len();
610-
error.report_error(
611-
ParseError::new("`_` may only go between digits")
612-
.with_context(Span::new_unchecked(0, raw.len()))
613-
.with_expected(&[])
614-
.with_unexpected(Span::new_unchecked(start, end)),
615-
);
616-
}
617-
618-
for part in stream.split(underscore) {
619-
let part_start = part.offset_from(&raw.as_str());
620-
let part_end = part_start + part.len();
621-
622-
if 0 < part_start {
623-
let first = part.as_bytes().first().copied().unwrap_or(b'0');
624-
if !is_any_digit(first, kind) {
625-
let start = part_start - underscore.len();
626-
let end = part_start;
627-
debug_assert_eq!(&raw.as_str()[start..end], underscore);
628-
error.report_error(
629-
ParseError::new("`_` may only go between digits")
630-
.with_context(Span::new_unchecked(0, raw.len()))
631-
.with_expected(&[])
632-
.with_unexpected(Span::new_unchecked(start, end)),
633-
);
596+
let stream_start = stream.offset_from(&raw.as_str());
597+
while !stream.is_empty() {
598+
let sep_pos = stream.find_slice(underscore);
599+
let sep_start = sep_pos
600+
.clone()
601+
.map(|r| r.start)
602+
.unwrap_or_else(|| stream.len());
603+
604+
let part_start = stream.offset_from(&raw.as_str());
605+
let part_end = part_start + sep_start;
606+
let part = stream.next_slice(sep_start);
607+
608+
if sep_pos.is_some() {
609+
let _ = stream.next_slice(underscore.len());
610+
611+
let mut is_invalid_sep = false;
612+
if let Some(last_pos) = sep_start.checked_sub(1) {
613+
let last_byte = raw.as_bytes()[part_start + last_pos];
614+
if !is_any_digit(last_byte, kind) {
615+
is_invalid_sep = true;
634616
}
617+
} else if part_start == stream_start {
618+
is_invalid_sep = true;
635619
}
636-
if 1 < part.len() && part_end < raw.len() {
637-
let last = part.as_bytes().last().copied().unwrap_or(b'0');
638-
if !is_any_digit(last, kind) {
639-
let start = part_end;
640-
let end = start + underscore.len();
641-
debug_assert_eq!(&raw.as_str()[start..end], underscore);
642-
error.report_error(
643-
ParseError::new("`_` may only go between digits")
644-
.with_context(Span::new_unchecked(0, raw.len()))
645-
.with_expected(&[])
646-
.with_unexpected(Span::new_unchecked(start, end)),
647-
);
620+
621+
if let Some(next_byte) = stream.as_bytes().first() {
622+
if !is_any_digit(*next_byte, kind) {
623+
is_invalid_sep = true;
648624
}
625+
} else if stream.is_empty() {
626+
is_invalid_sep = true;
649627
}
650628

651-
if part.is_empty() && part_start != 0 && part_end != raw.len() {
652-
let start = part_start;
653-
let end = start + 1;
629+
if is_invalid_sep {
630+
let start = part_end;
631+
let end = start + underscore.len();
654632
error.report_error(
655633
ParseError::new("`_` may only go between digits")
656634
.with_context(Span::new_unchecked(0, raw.len()))
657635
.with_expected(&[])
658-
.with_unexpected(Span::new_unchecked(start, end)),
659-
);
660-
}
661-
662-
if !part.is_empty() && !output.push_str(part) {
663-
error.report_error(
664-
ParseError::new(ALLOCATION_ERROR)
665-
.with_unexpected(Span::new_unchecked(part_start, part_end)),
636+
.with_unexpected(Span::new_unchecked(end - underscore.len(), end)),
666637
);
667638
}
668639
}
669-
} else {
670-
if !output.push_str(stream) {
640+
641+
if !part.is_empty() && !output.push_str(part) {
671642
error.report_error(
672643
ParseError::new(ALLOCATION_ERROR)
673-
.with_unexpected(Span::new_unchecked(0, raw.len())),
644+
.with_unexpected(Span::new_unchecked(part_start, part_end)),
674645
);
675646
}
676647
}
@@ -694,10 +665,6 @@ fn is_dec_integer_digit(b: u8) -> bool {
694665
(b'0'..=b'9').contains_token(b)
695666
}
696667

697-
fn has_underscore(raw: &str) -> bool {
698-
raw.as_bytes().find_slice(b'_').is_some()
699-
}
700-
701668
fn is_float(raw: &str) -> bool {
702669
raw.as_bytes().find_slice((b'.', b'e', b'E')).is_some()
703670
}

crates/toml_parser/tests/snapshots/testsuite__parse_value__integer_neg_sep.txt

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,5 +7,18 @@ EventResults {
77
span: 0..3,
88
},
99
],
10-
errors: [],
10+
errors: [
11+
ParseError {
12+
context: Some(
13+
0..3,
14+
),
15+
description: "`_` may only go between digits",
16+
expected: Some(
17+
[],
18+
),
19+
unexpected: Some(
20+
1..2,
21+
),
22+
},
23+
],
1124
}

crates/toml_parser/tests/snapshots/testsuite__parse_value__integer_pos_sep.txt

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,5 +7,18 @@ EventResults {
77
span: 0..3,
88
},
99
],
10-
errors: [],
10+
errors: [
11+
ParseError {
12+
context: Some(
13+
0..3,
14+
),
15+
description: "`_` may only go between digits",
16+
expected: Some(
17+
[],
18+
),
19+
unexpected: Some(
20+
1..2,
21+
),
22+
},
23+
],
1124
}

0 commit comments

Comments
 (0)