Skip to content

Commit 6a218c8

Browse files
committed
Add feature: non-ASCII identifiers
The feature `non_ascii_idents` gives Rune feature parity with Rust in supporting Unicode identifiers. Refer to: rust-lang/rfcs#2457
1 parent dac879c commit 6a218c8

3 files changed

Lines changed: 49 additions & 2 deletions

File tree

crates/rune/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ disable-io = ["alloc"]
3131
fmt = ["alloc", "syntree"]
3232
std = ["alloc", "num/std", "serde/std", "rune-core/std", "rune-alloc/std", "musli/std", "musli/std", "once_cell/std", "anyhow/std"]
3333
alloc = ["anyhow", "rune-alloc/alloc", "rune-core/alloc", "once_cell/alloc", "serde/alloc"]
34+
non_ascii_idents = ["dep:unicode-ident"]
3435

3536
[dependencies]
3637
rune-macros = { version = "=0.14.0", path = "../rune-macros" }
@@ -76,6 +77,7 @@ sha2 = { version = "0.10.6", optional = true }
7677
base64 = { version = "0.21.0", optional = true }
7778
rand = { version = "0.8.5", optional = true }
7879
memchr = "2.7.4"
80+
unicode-ident = { version = "1.0.12", optional = true }
7981

8082
[dev-dependencies]
8183
tokio = { version = "1.28.1", features = ["full"] }

crates/rune/src/parse/lexer.rs

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,24 @@ use crate::ast::Span;
77
use crate::compile::{self, ErrorKind};
88
use crate::SourceId;
99

10+
#[cfg(feature = "non_ascii_idents")]
11+
use unicode_ident::{is_xid_continue as is_ident_continue, is_xid_start};
12+
13+
#[cfg(feature = "non_ascii_idents")]
14+
fn is_ident_start(c: char) -> bool {
15+
c == '_' || is_xid_start(c)
16+
}
17+
18+
#[cfg(not(feature = "non_ascii_idents"))]
19+
fn is_ident_start(c: char) -> bool {
20+
matches!(c, '_' | 'a'..='z' | 'A'..='Z')
21+
}
22+
23+
#[cfg(not(feature = "non_ascii_idents"))]
24+
fn is_ident_continue(c: char) -> bool {
25+
matches!(c, 'a'..='z' | 'A'..='Z' | '_' | '0'..='9')
26+
}
27+
1028
/// Lexer for the rune language.
1129
#[derive(Debug)]
1230
pub struct Lexer<'a> {
@@ -159,7 +177,7 @@ impl<'a> Lexer<'a> {
159177

160178
fn next_ident(&mut self, start: usize) -> compile::Result<Option<ast::Token>> {
161179
while let Some(c) = self.iter.peek() {
162-
if !matches!(c, 'a'..='z' | 'A'..='Z' | '_' | '0'..='9') {
180+
if !is_ident_continue(c) {
163181
break;
164182
}
165183

@@ -842,7 +860,7 @@ impl<'a> Lexer<'a> {
842860
'@' => ast::Kind::At,
843861
'$' => ast::Kind::Dollar,
844862
'~' => ast::Kind::Tilde,
845-
'_' | 'a'..='z' | 'A'..='Z' => {
863+
c if is_ident_start(c) => {
846864
return self.next_ident(start);
847865
}
848866
'0'..='9' => {

scripts/non_ascii_idents.rn

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
// Test non-ASCII identifiers (feature `non_ascii_idents`).
2+
3+
// Creating a variable.
4+
let 另一個世界 = "三體世界";
5+
6+
// Reference to a variable.
7+
let 世界 = 另一個世界;
8+
9+
// In template interpolation.
10+
let 高論 = `你好,${世界}。`;
11+
12+
// In string formatting.
13+
println!("我對{另一個世界}說話:「{}」", 高論);
14+
15+
// Compatibility check for alphanumeric characters and underscore.
16+
let _ = ();
17+
let aB_1 = ();
18+
let Ab_2 = ();
19+
let __甲_乙_丙_丁__ = ();
20+
21+
// Naming functions and function arguments.
22+
fn 口號(蟲子, 主) {
23+
`消除${蟲子}暴政,世界屬於${主}!`
24+
}
25+
26+
// Function call.
27+
println!("我們的口號是:「{}」", 口號("人類", "三體"));

0 commit comments

Comments
 (0)