From e715c1649e151054e5fb462e37262cf06a2216fb Mon Sep 17 00:00:00 2001 From: Ary Borenszweig Date: Wed, 4 Dec 2024 17:30:14 -0300 Subject: [PATCH 01/13] fix: honor escape characters in format strings --- compiler/noirc_frontend/src/lexer/lexer.rs | 93 +++++++++++++++++----- 1 file changed, 73 insertions(+), 20 deletions(-) diff --git a/compiler/noirc_frontend/src/lexer/lexer.rs b/compiler/noirc_frontend/src/lexer/lexer.rs index 68dc142ff10..cb899a94037 100644 --- a/compiler/noirc_frontend/src/lexer/lexer.rs +++ b/compiler/noirc_frontend/src/lexer/lexer.rs @@ -409,6 +409,24 @@ impl<'a> Lexer<'a> { fn eat_string_literal(&mut self) -> SpannedTokenResult { let start = self.position; + let string = self.eat_string(start)?; + let str_literal_token = Token::Str(string); + let end = self.position; + Ok(str_literal_token.into_span(start, end)) + } + + // This differs from `eat_string_literal` in that we want the leading `f` to be captured in the Span + fn eat_fmt_string(&mut self) -> SpannedTokenResult { + let start = self.position; + self.next_char(); + + let string = self.eat_string(start)?; + let str_literal_token = Token::FmtStr(string); + let end = self.position; + Ok(str_literal_token.into_span(start, end)) + } + + fn eat_string(&mut self, start: u32) -> Result { let mut string = String::new(); while let Some(next) = self.next_char() { @@ -436,26 +454,7 @@ impl<'a> Lexer<'a> { string.push(char); } - let str_literal_token = Token::Str(string); - - let end = self.position; - Ok(str_literal_token.into_span(start, end)) - } - - // This differs from `eat_string_literal` in that we want the leading `f` to be captured in the Span - fn eat_fmt_string(&mut self) -> SpannedTokenResult { - let start = self.position; - - self.next_char(); - - let str_literal = self.eat_while(None, |ch| ch != '"'); - - let str_literal_token = Token::FmtStr(str_literal); - - self.next_char(); // Advance past the closing quote - - let end = self.position; - Ok(str_literal_token.into_span(start, end)) + Ok(string) } fn eat_format_string_or_alpha_numeric(&mut self) -> SpannedTokenResult { @@ -962,6 +961,60 @@ mod tests { } } + #[test] + fn test_eat_string_literal_with_escapes() { + let input = "let _word = \"hello\\n\\t\""; + + let expected = vec![ + Token::Keyword(Keyword::Let), + Token::Ident("_word".to_string()), + Token::Assign, + Token::Str("hello\n\t".to_string()), + ]; + let mut lexer = Lexer::new(input); + + for token in expected.into_iter() { + let got = lexer.next_token().unwrap(); + assert_eq!(got, token); + } + } + + #[test] + fn test_eat_fmt_string_literal() { + let input = "let _word = f\"hello\""; + + let expected = vec![ + Token::Keyword(Keyword::Let), + Token::Ident("_word".to_string()), + Token::Assign, + Token::FmtStr("hello".to_string()), + ]; + let mut lexer = Lexer::new(input); + + for token in expected.into_iter() { + let got = lexer.next_token().unwrap(); + assert_eq!(got, token); + } + } + + #[test] + fn test_eat_fmt_string_literal_with_escapes() { + let input = "let _word = f\"hello\\n\\t\""; + + let expected = vec![ + Token::Keyword(Keyword::Let), + Token::Ident("_word".to_string()), + Token::Assign, + Token::FmtStr("hello\n\t".to_string()), + ]; + let mut lexer = Lexer::new(input); + + for token in expected.into_iter() { + let got = lexer.next_token().unwrap(); + assert_eq!(got, token); + } + } + #[test] fn test_eat_integer_literals() { let test_cases: Vec<(&str, Token)> = vec![ From d1b1f5f127da7d73f0128d951950b474e6b8f183 Mon Sep 17 00:00:00 2001 From: Ary Borenszweig Date: Thu, 5 Dec 2024 07:28:00 -0300 Subject: [PATCH 02/13] '{{' and '}}' represent '{' and '}' in format strings --- compiler/noirc_frontend/src/lexer/lexer.rs | 52 +++++++++++++++++----- 1 file changed, 40 insertions(+), 12 deletions(-) diff --git a/compiler/noirc_frontend/src/lexer/lexer.rs b/compiler/noirc_frontend/src/lexer/lexer.rs index cb899a94037..63615ed4db6 100644 --- a/compiler/noirc_frontend/src/lexer/lexer.rs +++ b/compiler/noirc_frontend/src/lexer/lexer.rs @@ -409,24 +409,42 @@ impl<'a> Lexer<'a> { fn eat_string_literal(&mut self) -> SpannedTokenResult { let start = self.position; - let string = self.eat_string(start)?; + let mut string = String::new(); + + while let Some(next) = self.next_char() { + let char = match next { + '"' => break, + '\\' => match self.next_char() { + Some('r') => '\r', + Some('n') => '\n', + Some('t') => '\t', + Some('0') => '\0', + Some('"') => '"', + Some('\\') => '\\', + Some(escaped) => { + let span = Span::inclusive(start, self.position); + return Err(LexerErrorKind::InvalidEscape { escaped, span }); + } + None => { + let span = Span::inclusive(start, self.position); + return Err(LexerErrorKind::UnterminatedStringLiteral { span }); + } + }, + other => other, + }; + + string.push(char); + } + let str_literal_token = Token::Str(string); let end = self.position; Ok(str_literal_token.into_span(start, end)) } - // This differs from `eat_string_literal` in that we want the leading `f` to be captured in the Span fn eat_fmt_string(&mut self) -> SpannedTokenResult { let start = self.position; self.next_char(); - let string = self.eat_string(start)?; - let str_literal_token = Token::FmtStr(string); - let end = self.position; - Ok(str_literal_token.into_span(start, end)) - } - - fn eat_string(&mut self, start: u32) -> Result { let mut string = String::new(); while let Some(next) = self.next_char() { @@ -448,13 +466,23 @@ impl<'a> Lexer<'a> { return Err(LexerErrorKind::UnterminatedStringLiteral { span }); } }, + '{' if self.peek_char_is('{') => { + self.next_char(); + '{' + } + '}' if self.peek_char_is('}') => { + self.next_char(); + '}' + } other => other, }; string.push(char); } - Ok(string) + let str_literal_token = Token::FmtStr(string); + let end = self.position; + Ok(str_literal_token.into_span(start, end)) } fn eat_format_string_or_alpha_numeric(&mut self) -> SpannedTokenResult { @@ -999,13 +1027,13 @@ mod tests { #[test] fn test_eat_fmt_string_literal_with_escapes() { - let input = "let _word = f\"hello\\n\\t\""; + let input = "let _word = f\"hello\\n\\t{{x}}\""; let expected = vec![ Token::Keyword(Keyword::Let), Token::Ident("_word".to_string()), Token::Assign, - Token::FmtStr("hello\n\t".to_string()), + Token::FmtStr("hello\n\t{x}".to_string()), ]; let mut lexer = Lexer::new(input); From 4d97159b8a1c08fa485c2993f25da724127d9204 Mon Sep 17 00:00:00 2001 From: Ary Borenszweig Date: Thu, 5 Dec 2024 12:17:23 -0300 Subject: [PATCH 03/13] Lex format string fragments --- Cargo.lock | 1 - .../noirc_evaluator/src/ssa/ssa_gen/mod.rs | 20 +- compiler/noirc_frontend/Cargo.toml | 1 - compiler/noirc_frontend/src/ast/expression.rs | 16 +- compiler/noirc_frontend/src/ast/visitor.rs | 6 +- .../src/elaborator/expressions.rs | 86 +++--- .../src/hir/comptime/hir_to_display_ast.rs | 4 +- .../src/hir/comptime/interpreter.rs | 28 +- .../src/hir/resolution/errors.rs | 7 - compiler/noirc_frontend/src/hir_def/expr.rs | 4 +- compiler/noirc_frontend/src/lexer/errors.rs | 14 + compiler/noirc_frontend/src/lexer/lexer.rs | 265 ++++++++++++++---- compiler/noirc_frontend/src/lexer/token.rs | 24 +- .../src/monomorphization/ast.rs | 4 +- .../src/monomorphization/mod.rs | 7 +- .../src/monomorphization/printer.rs | 6 +- compiler/noirc_frontend/src/parser/parser.rs | 6 +- .../src/parser/parser/expression.rs | 14 +- compiler/noirc_frontend/src/tests.rs | 14 +- noir_stdlib/src/collections/map.nr | 12 +- noir_stdlib/src/collections/umap.nr | 12 +- .../comptime_fmt_strings/src/main.nr | 2 +- .../execution_success/hashmap/src/main.nr | 5 +- .../execution_success/uhashmap/src/main.nr | 3 +- 24 files changed, 377 insertions(+), 184 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2f19ed704b2..14066a0e37e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3177,7 +3177,6 @@ dependencies = [ "proptest", "proptest-derive 0.5.0", "rangemap", - "regex", "rustc-hash", "serde", "serde_json", diff --git a/compiler/noirc_evaluator/src/ssa/ssa_gen/mod.rs b/compiler/noirc_evaluator/src/ssa/ssa_gen/mod.rs index d28236bd360..d66c47ac725 100644 --- a/compiler/noirc_evaluator/src/ssa/ssa_gen/mod.rs +++ b/compiler/noirc_evaluator/src/ssa/ssa_gen/mod.rs @@ -2,6 +2,7 @@ pub(crate) mod context; mod program; mod value; +use noirc_frontend::token::FmtStringFragment; pub(crate) use program::Ssa; use context::SharedContext; @@ -232,10 +233,25 @@ impl<'a> FunctionContext<'a> { Ok(self.builder.numeric_constant(*value as u128, Type::bool()).into()) } ast::Literal::Str(string) => Ok(self.codegen_string(string)), - ast::Literal::FmtStr(string, number_of_fields, fields) => { + ast::Literal::FmtStr(fragments, number_of_fields, fields) => { + let mut string = String::new(); + for fragment in fragments { + match fragment { + FmtStringFragment::String(value) => { + let value = value.replace('{', "{{").replace('}', "}}"); + string.push_str(&value); + } + FmtStringFragment::Interpolation(value, _span) => { + string.push('{'); + string.push_str(value); + string.push('}'); + } + } + } + // A caller needs multiple pieces of information to make use of a format string // The message string, the number of fields to be formatted, and the fields themselves - let string = self.codegen_string(string); + let string = self.codegen_string(&string); let field_count = self.builder.length_constant(*number_of_fields as u128); let fields = self.codegen_expression(fields)?; diff --git a/compiler/noirc_frontend/Cargo.toml b/compiler/noirc_frontend/Cargo.toml index 5d1520af54f..5f8f02689c8 100644 --- a/compiler/noirc_frontend/Cargo.toml +++ b/compiler/noirc_frontend/Cargo.toml @@ -25,7 +25,6 @@ num-bigint.workspace = true num-traits.workspace = true rustc-hash = "1.1.0" small-ord-set = "0.1.3" -regex = "1.9.1" cfg-if.workspace = true tracing.workspace = true petgraph = "0.6" diff --git a/compiler/noirc_frontend/src/ast/expression.rs b/compiler/noirc_frontend/src/ast/expression.rs index 2c8a9b6508d..2e4dae777e8 100644 --- a/compiler/noirc_frontend/src/ast/expression.rs +++ b/compiler/noirc_frontend/src/ast/expression.rs @@ -10,7 +10,7 @@ use crate::ast::{ use crate::node_interner::{ ExprId, InternedExpressionKind, InternedStatementKind, QuotedTypeId, StructId, }; -use crate::token::{Attributes, FunctionAttribute, Token, Tokens}; +use crate::token::{Attributes, FmtStringFragment, FunctionAttribute, Token, Tokens}; use crate::{Kind, Type}; use acvm::{acir::AcirField, FieldElement}; use iter_extended::vecmap; @@ -210,8 +210,8 @@ impl ExpressionKind { ExpressionKind::Literal(Literal::RawStr(contents, hashes)) } - pub fn format_string(contents: String) -> ExpressionKind { - ExpressionKind::Literal(Literal::FmtStr(contents)) + pub fn format_string(fragments: Vec) -> ExpressionKind { + ExpressionKind::Literal(Literal::FmtStr(fragments)) } pub fn constructor( @@ -434,7 +434,7 @@ pub enum Literal { Integer(FieldElement, /*sign*/ bool), // false for positive integer and true for negative Str(String), RawStr(String, u8), - FmtStr(String), + FmtStr(Vec), Unit, } @@ -669,7 +669,13 @@ impl Display for Literal { std::iter::once('#').cycle().take(*num_hashes as usize).collect(); write!(f, "r{hashes}\"{string}\"{hashes}") } - Literal::FmtStr(string) => write!(f, "f\"{string}\""), + Literal::FmtStr(fragments) => { + write!(f, "f\"")?; + for fragment in fragments { + fragment.fmt(f)?; + } + write!(f, "\"") + } Literal::Unit => write!(f, "()"), } } diff --git a/compiler/noirc_frontend/src/ast/visitor.rs b/compiler/noirc_frontend/src/ast/visitor.rs index f149c998eca..74c455e3135 100644 --- a/compiler/noirc_frontend/src/ast/visitor.rs +++ b/compiler/noirc_frontend/src/ast/visitor.rs @@ -16,7 +16,7 @@ use crate::{ InternedUnresolvedTypeData, QuotedTypeId, }, parser::{Item, ItemKind, ParsedSubModule}, - token::{MetaAttribute, SecondaryAttribute, Tokens}, + token::{FmtStringFragment, MetaAttribute, SecondaryAttribute, Tokens}, ParsedModule, QuotedType, }; @@ -172,7 +172,7 @@ pub trait Visitor { fn visit_literal_raw_str(&mut self, _: &str, _: u8) {} - fn visit_literal_fmt_str(&mut self, _: &str) {} + fn visit_literal_fmt_str(&mut self, _: &[FmtStringFragment]) {} fn visit_literal_unit(&mut self) {} @@ -900,7 +900,7 @@ impl Literal { Literal::Integer(value, negative) => visitor.visit_literal_integer(*value, *negative), Literal::Str(str) => visitor.visit_literal_str(str), Literal::RawStr(str, length) => visitor.visit_literal_raw_str(str, *length), - Literal::FmtStr(str) => visitor.visit_literal_fmt_str(str), + Literal::FmtStr(fragments) => visitor.visit_literal_fmt_str(fragments), Literal::Unit => visitor.visit_literal_unit(), } } diff --git a/compiler/noirc_frontend/src/elaborator/expressions.rs b/compiler/noirc_frontend/src/elaborator/expressions.rs index f801c1817ef..f65dfe9a539 100644 --- a/compiler/noirc_frontend/src/elaborator/expressions.rs +++ b/compiler/noirc_frontend/src/elaborator/expressions.rs @@ -1,7 +1,6 @@ use acvm::{AcirField, FieldElement}; use iter_extended::vecmap; use noirc_errors::{Location, Span}; -use regex::Regex; use rustc_hash::FxHashSet as HashSet; use crate::{ @@ -29,7 +28,7 @@ use crate::{ traits::{ResolvedTraitBound, TraitConstraint}, }, node_interner::{DefinitionKind, ExprId, FuncId, InternedStatementKind, TraitMethodId}, - token::Tokens, + token::{FmtStringFragment, Tokens}, Kind, QuotedType, Shared, StructType, Type, }; @@ -167,7 +166,7 @@ impl<'context> Elaborator<'context> { let len = Type::Constant(str.len().into(), Kind::u32()); (Lit(HirLiteral::Str(str)), Type::String(Box::new(len))) } - Literal::FmtStr(str) => self.elaborate_fmt_string(str, span), + Literal::FmtStr(fragments) => self.elaborate_fmt_string(fragments), Literal::Array(array_literal) => { self.elaborate_array_literal(array_literal, span, true) } @@ -234,53 +233,54 @@ impl<'context> Elaborator<'context> { (HirExpression::Literal(constructor(expr)), typ) } - fn elaborate_fmt_string(&mut self, str: String, call_expr_span: Span) -> (HirExpression, Type) { - let re = Regex::new(r"\{([a-zA-Z0-9_]+)\}") - .expect("ICE: an invalid regex pattern was used for checking format strings"); - + fn elaborate_fmt_string(&mut self, fragments: Vec) -> (HirExpression, Type) { let mut fmt_str_idents = Vec::new(); let mut capture_types = Vec::new(); + let mut len = 0; - for field in re.find_iter(&str) { - let matched_str = field.as_str(); - let ident_name = &matched_str[1..(matched_str.len() - 1)]; - - let scope_tree = self.scopes.current_scope_tree(); - let variable = scope_tree.find(ident_name); - - let hir_ident = if let Some((old_value, _)) = variable { - old_value.num_times_used += 1; - old_value.ident.clone() - } else if let Ok((definition_id, _)) = - self.lookup_global(Path::from_single(ident_name.to_string(), call_expr_span)) - { - HirIdent::non_trait_method(definition_id, Location::new(call_expr_span, self.file)) - } else if ident_name.parse::().is_ok() { - self.push_err(ResolverError::NumericConstantInFormatString { - name: ident_name.to_owned(), - span: call_expr_span, - }); - continue; - } else { - self.push_err(ResolverError::VariableNotDeclared { - name: ident_name.to_owned(), - span: call_expr_span, - }); - continue; - }; + for fragment in &fragments { + match fragment { + FmtStringFragment::String(string) => { + len += string.len(); + } + FmtStringFragment::Interpolation(ident_name, string_span) => { + len += ident_name.len() + 2; // + 2 for '{' + '}' + + let scope_tree = self.scopes.current_scope_tree(); + let variable = scope_tree.find(ident_name); + + let hir_ident = if let Some((old_value, _)) = variable { + old_value.num_times_used += 1; + old_value.ident.clone() + } else if let Ok((definition_id, _)) = + self.lookup_global(Path::from_single(ident_name.to_string(), *string_span)) + { + HirIdent::non_trait_method( + definition_id, + Location::new(*string_span, self.file), + ) + } else { + self.push_err(ResolverError::VariableNotDeclared { + name: ident_name.to_owned(), + span: *string_span, + }); + continue; + }; - let hir_expr = HirExpression::Ident(hir_ident.clone(), None); - let expr_id = self.interner.push_expr(hir_expr); - self.interner.push_expr_location(expr_id, call_expr_span, self.file); - let typ = self.type_check_variable(hir_ident, expr_id, None); - self.interner.push_expr_type(expr_id, typ.clone()); - capture_types.push(typ); - fmt_str_idents.push(expr_id); + let hir_expr = HirExpression::Ident(hir_ident.clone(), None); + let expr_id = self.interner.push_expr(hir_expr); + self.interner.push_expr_location(expr_id, *string_span, self.file); + let typ = self.type_check_variable(hir_ident, expr_id, None); + self.interner.push_expr_type(expr_id, typ.clone()); + capture_types.push(typ); + fmt_str_idents.push(expr_id); + } + } } - let len = Type::Constant(str.len().into(), Kind::u32()); + let len = Type::Constant(len.into(), Kind::u32()); let typ = Type::FmtString(Box::new(len), Box::new(Type::Tuple(capture_types))); - (HirExpression::Literal(HirLiteral::FmtStr(str, fmt_str_idents)), typ) + (HirExpression::Literal(HirLiteral::FmtStr(fragments, fmt_str_idents)), typ) } fn elaborate_prefix(&mut self, prefix: PrefixExpression, span: Span) -> (ExprId, Type) { diff --git a/compiler/noirc_frontend/src/hir/comptime/hir_to_display_ast.rs b/compiler/noirc_frontend/src/hir/comptime/hir_to_display_ast.rs index 5540a199cec..e2ef7266498 100644 --- a/compiler/noirc_frontend/src/hir/comptime/hir_to_display_ast.rs +++ b/compiler/noirc_frontend/src/hir/comptime/hir_to_display_ast.rs @@ -121,9 +121,9 @@ impl HirExpression { HirExpression::Literal(HirLiteral::Str(string)) => { ExpressionKind::Literal(Literal::Str(string.clone())) } - HirExpression::Literal(HirLiteral::FmtStr(string, _exprs)) => { + HirExpression::Literal(HirLiteral::FmtStr(fragments, _exprs)) => { // TODO: Is throwing away the exprs here valid? - ExpressionKind::Literal(Literal::FmtStr(string.clone())) + ExpressionKind::Literal(Literal::FmtStr(fragments.clone())) } HirExpression::Literal(HirLiteral::Unit) => ExpressionKind::Literal(Literal::Unit), HirExpression::Block(expr) => ExpressionKind::Block(expr.to_display_ast(interner)), diff --git a/compiler/noirc_frontend/src/hir/comptime/interpreter.rs b/compiler/noirc_frontend/src/hir/comptime/interpreter.rs index 49fd86b73bb..40fdc5c3d7c 100644 --- a/compiler/noirc_frontend/src/hir/comptime/interpreter.rs +++ b/compiler/noirc_frontend/src/hir/comptime/interpreter.rs @@ -20,7 +20,7 @@ use crate::monomorphization::{ perform_impl_bindings, perform_instantiation_bindings, resolve_trait_method, undo_instantiation_bindings, }; -use crate::token::Tokens; +use crate::token::{FmtStringFragment, Tokens}; use crate::TypeVariable; use crate::{ hir_def::{ @@ -623,8 +623,8 @@ impl<'local, 'interner> Interpreter<'local, 'interner> { self.evaluate_integer(value, is_negative, id) } HirLiteral::Str(string) => Ok(Value::String(Rc::new(string))), - HirLiteral::FmtStr(string, captures) => { - self.evaluate_format_string(string, captures, id) + HirLiteral::FmtStr(fragments, captures) => { + self.evaluate_format_string(fragments, captures, id) } HirLiteral::Array(array) => self.evaluate_array(array, id), HirLiteral::Slice(array) => self.evaluate_slice(array, id), @@ -633,7 +633,7 @@ impl<'local, 'interner> Interpreter<'local, 'interner> { fn evaluate_format_string( &mut self, - string: String, + fragments: Vec, captures: Vec, id: ExprId, ) -> IResult { @@ -644,13 +644,12 @@ impl<'local, 'interner> Interpreter<'local, 'interner> { let mut values: VecDeque<_> = captures.into_iter().map(|capture| self.evaluate(capture)).collect::>()?; - for character in string.chars() { - match character { - '\\' => escaped = true, - '{' if !escaped => consuming = true, - '}' if !escaped && consuming => { - consuming = false; - + for fragment in fragments { + match fragment { + FmtStringFragment::String(string) => { + result.push_str(&string); + } + FmtStringFragment::Interpolation(_, span) => { if let Some(value) = values.pop_front() { // When interpolating a quoted value inside a format string, we don't include the // surrounding `quote {` ... `}` as if we are unquoting the quoted value inside the string. @@ -665,13 +664,10 @@ impl<'local, 'interner> Interpreter<'local, 'interner> { } else { result.push_str(&value.display(self.elaborator.interner).to_string()); } + } else { + // TODO: should we panic here? } } - other if !consuming => { - escaped = false; - result.push(other); - } - _ => (), } } diff --git a/compiler/noirc_frontend/src/hir/resolution/errors.rs b/compiler/noirc_frontend/src/hir/resolution/errors.rs index 5c8e0a1b53e..774836f8992 100644 --- a/compiler/noirc_frontend/src/hir/resolution/errors.rs +++ b/compiler/noirc_frontend/src/hir/resolution/errors.rs @@ -77,8 +77,6 @@ pub enum ResolverError { MutableReferenceToImmutableVariable { variable: String, span: Span }, #[error("Mutable references to array indices are unsupported")] MutableReferenceToArrayElement { span: Span }, - #[error("Numeric constants should be printed without formatting braces")] - NumericConstantInFormatString { name: String, span: Span }, #[error("Closure environment must be a tuple or unit type")] InvalidClosureEnvironment { typ: Type, span: Span }, #[error("Nested slices, i.e. slices within an array or slice, are not supported")] @@ -378,11 +376,6 @@ impl<'a> From<&'a ResolverError> for Diagnostic { ResolverError::MutableReferenceToArrayElement { span } => { Diagnostic::simple_error("Mutable references to array elements are currently unsupported".into(), "Try storing the element in a fresh variable first".into(), *span) }, - ResolverError::NumericConstantInFormatString { name, span } => Diagnostic::simple_error( - format!("cannot find `{name}` in this scope "), - "Numeric constants should be printed without formatting braces".to_string(), - *span, - ), ResolverError::InvalidClosureEnvironment { span, typ } => Diagnostic::simple_error( format!("{typ} is not a valid closure environment type"), "Closure environment must be a tuple or unit type".to_string(), *span), diff --git a/compiler/noirc_frontend/src/hir_def/expr.rs b/compiler/noirc_frontend/src/hir_def/expr.rs index 5d3fe632a74..7d90b40b918 100644 --- a/compiler/noirc_frontend/src/hir_def/expr.rs +++ b/compiler/noirc_frontend/src/hir_def/expr.rs @@ -7,7 +7,7 @@ use crate::hir::type_check::generics::TraitGenerics; use crate::node_interner::{ DefinitionId, DefinitionKind, ExprId, FuncId, NodeInterner, StmtId, TraitMethodId, }; -use crate::token::Tokens; +use crate::token::{FmtStringFragment, Tokens}; use crate::Shared; use super::stmt::HirPattern; @@ -114,7 +114,7 @@ pub enum HirLiteral { Bool(bool), Integer(FieldElement, bool), //true for negative integer and false for positive Str(String), - FmtStr(String, Vec), + FmtStr(Vec, Vec), Unit, } diff --git a/compiler/noirc_frontend/src/lexer/errors.rs b/compiler/noirc_frontend/src/lexer/errors.rs index 8d799ef35d1..10a265eb8c5 100644 --- a/compiler/noirc_frontend/src/lexer/errors.rs +++ b/compiler/noirc_frontend/src/lexer/errors.rs @@ -30,6 +30,8 @@ pub enum LexerErrorKind { UnterminatedBlockComment { span: Span }, #[error("Unterminated string literal")] UnterminatedStringLiteral { span: Span }, + #[error("Invalid format string: expected '}}', found {found:?}")] + InvalidFormatString { found: char, span: Span }, #[error( "'\\{escaped}' is not a valid escape sequence. Use '\\' for a literal backslash character." )] @@ -68,6 +70,7 @@ impl LexerErrorKind { LexerErrorKind::LogicalAnd { span } => *span, LexerErrorKind::UnterminatedBlockComment { span } => *span, LexerErrorKind::UnterminatedStringLiteral { span } => *span, + LexerErrorKind::InvalidFormatString { span, .. } => *span, LexerErrorKind::InvalidEscape { span, .. } => *span, LexerErrorKind::InvalidQuoteDelimiter { delimiter } => delimiter.to_span(), LexerErrorKind::NonAsciiComment { span, .. } => *span, @@ -130,6 +133,17 @@ impl LexerErrorKind { LexerErrorKind::UnterminatedBlockComment { span } => ("Unterminated block comment".to_string(), "Unterminated block comment".to_string(), *span), LexerErrorKind::UnterminatedStringLiteral { span } => ("Unterminated string literal".to_string(), "Unterminated string literal".to_string(), *span), + LexerErrorKind::InvalidFormatString { found, span } => { + ( + format!("Invalid format string: expected '}}', found {found:?}"), + if found == &'.' { + "Field access isn't supported in format strings".to_string() + } else { + "If you intended to print '{', you can escape it using '{{'".to_string() + }, + *span, + ) + } LexerErrorKind::InvalidEscape { escaped, span } => (format!("'\\{escaped}' is not a valid escape sequence. Use '\\' for a literal backslash character."), "Invalid escape sequence".to_string(), *span), LexerErrorKind::InvalidQuoteDelimiter { delimiter } => { diff --git a/compiler/noirc_frontend/src/lexer/lexer.rs b/compiler/noirc_frontend/src/lexer/lexer.rs index 63615ed4db6..50ed55a8a9c 100644 --- a/compiler/noirc_frontend/src/lexer/lexer.rs +++ b/compiler/noirc_frontend/src/lexer/lexer.rs @@ -2,7 +2,7 @@ use crate::token::DocStyle; use super::{ errors::LexerErrorKind, - token::{IntType, Keyword, SpannedToken, Token, Tokens}, + token::{FmtStringFragment, IntType, Keyword, SpannedToken, Token, Tokens}, }; use acvm::{AcirField, FieldElement}; use noirc_errors::{Position, Span}; @@ -411,29 +411,34 @@ impl<'a> Lexer<'a> { let start = self.position; let mut string = String::new(); - while let Some(next) = self.next_char() { - let char = match next { - '"' => break, - '\\' => match self.next_char() { - Some('r') => '\r', - Some('n') => '\n', - Some('t') => '\t', - Some('0') => '\0', - Some('"') => '"', - Some('\\') => '\\', - Some(escaped) => { - let span = Span::inclusive(start, self.position); - return Err(LexerErrorKind::InvalidEscape { escaped, span }); - } - None => { - let span = Span::inclusive(start, self.position); - return Err(LexerErrorKind::UnterminatedStringLiteral { span }); - } - }, - other => other, - }; + loop { + if let Some(next) = self.next_char() { + let char = match next { + '"' => break, + '\\' => match self.next_char() { + Some('r') => '\r', + Some('n') => '\n', + Some('t') => '\t', + Some('0') => '\0', + Some('"') => '"', + Some('\\') => '\\', + Some(escaped) => { + let span = Span::inclusive(start, self.position); + return Err(LexerErrorKind::InvalidEscape { escaped, span }); + } + None => { + let span = Span::inclusive(start, self.position); + return Err(LexerErrorKind::UnterminatedStringLiteral { span }); + } + }, + other => other, + }; - string.push(char); + string.push(char); + } else { + let span = Span::inclusive(start, self.position); + return Err(LexerErrorKind::UnterminatedStringLiteral { span }); + } } let str_literal_token = Token::Str(string); @@ -445,44 +450,110 @@ impl<'a> Lexer<'a> { let start = self.position; self.next_char(); - let mut string = String::new(); + let mut fragments = Vec::new(); + + loop { + // String fragment until '{' or '"' + let mut string = String::new(); + let mut found_curly = false; + + loop { + if let Some(next) = self.next_char() { + let char = match next { + '"' => break, + '\\' => match self.next_char() { + Some('r') => '\r', + Some('n') => '\n', + Some('t') => '\t', + Some('0') => '\0', + Some('"') => '"', + Some('\\') => '\\', + Some(escaped) => { + let span = Span::inclusive(start, self.position); + return Err(LexerErrorKind::InvalidEscape { escaped, span }); + } + None => { + let span = Span::inclusive(start, self.position); + return Err(LexerErrorKind::UnterminatedStringLiteral { span }); + } + }, + '{' if self.peek_char_is('{') => { + self.next_char(); + '{' + } + '}' if self.peek_char_is('}') => { + self.next_char(); + '}' + } + '{' => { + found_curly = true; + break; + } + other => other, + }; + + string.push(char); + } else { + let span = Span::inclusive(start, self.position); + return Err(LexerErrorKind::UnterminatedStringLiteral { span }); + } + } - while let Some(next) = self.next_char() { - let char = match next { - '"' => break, - '\\' => match self.next_char() { - Some('r') => '\r', - Some('n') => '\n', - Some('t') => '\t', - Some('0') => '\0', - Some('"') => '"', - Some('\\') => '\\', - Some(escaped) => { - let span = Span::inclusive(start, self.position); - return Err(LexerErrorKind::InvalidEscape { escaped, span }); + if !string.is_empty() { + fragments.push(FmtStringFragment::String(string)); + } + + if !found_curly { + break; + } + + // Interpolation fragment until '}' or '"' + let mut string = String::new(); + let interpolation_start = self.position; + let mut first_char = true; + while let Some(next) = self.next_char() { + let char = match next { + '}' => { + break; } - None => { - let span = Span::inclusive(start, self.position); - return Err(LexerErrorKind::UnterminatedStringLiteral { span }); + other => { + let is_valid_char = if first_char { + other.is_ascii_alphabetic() || other == '_' + } else { + other.is_ascii_alphanumeric() || other == '_' + }; + if !is_valid_char { + let error_position = self.position; + + // Keep consuming chars until we find the closing double quote + // (unless we bumped into a double quote now, in which case we are done) + if other != '"' { + while let Some(next) = self.next_char() { + if next == '\'' && self.peek_char_is('"') { + self.next_char(); + } else if next == '"' { + break; + } + } + } + + let span = Span::inclusive(error_position, error_position); + return Err(LexerErrorKind::InvalidFormatString { found: other, span }); + } + first_char = false; + other } - }, - '{' if self.peek_char_is('{') => { - self.next_char(); - '{' - } - '}' if self.peek_char_is('}') => { - self.next_char(); - '}' - } - other => other, - }; + }; + string.push(char); + } - string.push(char); + let interpolation_span = Span::from(interpolation_start..self.position); + fragments.push(FmtStringFragment::Interpolation(string, interpolation_span)); } - let str_literal_token = Token::FmtStr(string); + let token = Token::FmtStr(fragments); let end = self.position; - Ok(str_literal_token.into_span(start, end)) + Ok(token.into_span(start, end)) } fn eat_format_string_or_alpha_numeric(&mut self) -> SpannedTokenResult { @@ -1008,14 +1079,24 @@ mod tests { } #[test] - fn test_eat_fmt_string_literal() { + fn test_eat_string_literal_missing_double_quote() { + let input = "\"hello"; + let mut lexer = Lexer::new(input); + assert!(matches!( + lexer.next_token(), + Err(LexerErrorKind::UnterminatedStringLiteral { .. }) + )); + } + + #[test] + fn test_eat_fmt_string_literal_without_interpolations() { let input = "let _word = f\"hello\""; let expected = vec![ Token::Keyword(Keyword::Let), Token::Ident("_word".to_string()), Token::Assign, - Token::FmtStr("hello".to_string()), + Token::FmtStr(vec![FmtStringFragment::String("hello".to_string())]), ]; let mut lexer = Lexer::new(input); @@ -1026,14 +1107,14 @@ mod tests { } #[test] - fn test_eat_fmt_string_literal_with_escapes() { + fn test_eat_fmt_string_literal_with_escapes_without_interpolations() { let input = "let _word = f\"hello\\n\\t{{x}}\""; let expected = vec![ Token::Keyword(Keyword::Let), Token::Ident("_word".to_string()), Token::Assign, - Token::FmtStr("hello\n\t{x}".to_string()), + Token::FmtStr(vec![FmtStringFragment::String("hello\n\t{x}".to_string())]), ]; let mut lexer = Lexer::new(input); @@ -1043,6 +1124,63 @@ mod tests { } } + #[test] + fn test_eat_fmt_string_literal_with_interpolations() { + let input = "let _word = f\"hello {world} and {_another} {vAr_123}\""; + + let expected = vec![ + Token::Keyword(Keyword::Let), + Token::Ident("_word".to_string()), + Token::Assign, + Token::FmtStr(vec![ + FmtStringFragment::String("hello ".to_string()), + FmtStringFragment::Interpolation("world".to_string(), Span::from(20..26)), + FmtStringFragment::String(" and ".to_string()), + FmtStringFragment::Interpolation("_another".to_string(), Span::from(32..41)), + FmtStringFragment::String(" ".to_string()), + FmtStringFragment::Interpolation("vAr_123".to_string(), Span::from(43..51)), + ]), + ]; + let mut lexer = Lexer::new(input); + + for token in expected.into_iter() { + let got = lexer.next_token().unwrap().into_token(); + assert_eq!(got, token); + } + } + + #[test] + fn test_eat_fmt_string_literal_missing_double_quote() { + let input = "f\"hello"; + let mut lexer = Lexer::new(input); + assert!(matches!( + lexer.next_token(), + Err(LexerErrorKind::UnterminatedStringLiteral { .. }) + )); + } + + #[test] + fn test_eat_fmt_string_literal_invalid_char_in_interpolation() { + let input = "f\"hello {foo.bar}\" true"; + let mut lexer = Lexer::new(input); + assert!(matches!(lexer.next_token(), Err(LexerErrorKind::InvalidFormatString { .. }))); + + // Make sure the lexer went past the ending double quote for better recovery + let token = lexer.next_token().unwrap().into_token(); + assert!(matches!(token, Token::Bool(true))); + } + + #[test] + fn test_eat_fmt_string_literal_double_quote_inside_interpolation() { + let input = "f\"hello {world\" true"; + let mut lexer = Lexer::new(input); + assert!(matches!(lexer.next_token(), Err(LexerErrorKind::InvalidFormatString { .. }))); + + // Make sure the lexer stopped parsing the string literal when it found \" inside the interpolation + let token = lexer.next_token().unwrap().into_token(); + assert!(matches!(token, Token::Bool(true))); + } + #[test] fn test_eat_integer_literals() { let test_cases: Vec<(&str, Token)> = vec![ @@ -1232,7 +1370,7 @@ mod tests { format!("let s = r#####\"{s}\"#####;"), ], ), - (Some(Token::FmtStr("".to_string())), vec![format!("assert(x == y, f\"{s}\");")]), + (Some(Token::FmtStr(vec![])), vec![format!("assert(x == y, f\"{s}\");")]), // expected token not found // (Some(Token::LineComment("".to_string(), None)), vec![ (None, vec![format!("//{s}"), format!("// {s}")]), @@ -1277,11 +1415,16 @@ mod tests { Err(LexerErrorKind::InvalidIntegerLiteral { .. }) | Err(LexerErrorKind::UnexpectedCharacter { .. }) | Err(LexerErrorKind::NonAsciiComment { .. }) - | Err(LexerErrorKind::UnterminatedBlockComment { .. }) => { + | Err(LexerErrorKind::UnterminatedBlockComment { .. }) + | Err(LexerErrorKind::UnterminatedStringLiteral { .. }) + | Err(LexerErrorKind::InvalidFormatString { .. }) => { expected_token_found = true; } Err(err) => { - panic!("Unexpected lexer error found: {:?}", err) + panic!( + "Unexpected lexer error found {:?} for input string {:?}", + err, blns_program_str + ) } } } diff --git a/compiler/noirc_frontend/src/lexer/token.rs b/compiler/noirc_frontend/src/lexer/token.rs index 836161c7c9f..07d36b4d140 100644 --- a/compiler/noirc_frontend/src/lexer/token.rs +++ b/compiler/noirc_frontend/src/lexer/token.rs @@ -25,7 +25,7 @@ pub enum BorrowedToken<'input> { Str(&'input str), /// the u8 is the number of hashes, i.e. r###.. RawStr(&'input str, u8), - FmtStr(&'input str), + FmtStr(&'input [FmtStringFragment]), Keyword(Keyword), IntType(IntType), AttributeStart { @@ -136,7 +136,7 @@ pub enum Token { Str(String), /// the u8 is the number of hashes, i.e. r###.. RawStr(String, u8), - FmtStr(String), + FmtStr(Vec), Keyword(Keyword), IntType(IntType), AttributeStart { @@ -312,6 +312,26 @@ pub fn token_to_borrowed_token(token: &Token) -> BorrowedToken<'_> { } } +#[derive(Clone, PartialEq, Eq, Hash, Debug, PartialOrd, Ord)] +pub enum FmtStringFragment { + String(String), + Interpolation(String, Span), +} + +impl Display for FmtStringFragment { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + FmtStringFragment::String(string) => { + let string = string.replace('{', "{{").replace('}', "}}"); + write!(f, "{}", string) + } + FmtStringFragment::Interpolation(string, _span) => { + write!(f, "{{{}}}", string) + } + } + } +} + #[derive(Clone, Copy, PartialEq, Eq, Hash, Debug, PartialOrd, Ord)] pub enum DocStyle { Outer, diff --git a/compiler/noirc_frontend/src/monomorphization/ast.rs b/compiler/noirc_frontend/src/monomorphization/ast.rs index 8f6817dc15d..fcd6c66a93e 100644 --- a/compiler/noirc_frontend/src/monomorphization/ast.rs +++ b/compiler/noirc_frontend/src/monomorphization/ast.rs @@ -7,11 +7,11 @@ use noirc_errors::{ Location, }; -use crate::hir_def::function::FunctionSignature; use crate::{ ast::{BinaryOpKind, IntegerBitSize, Signedness, Visibility}, token::{Attributes, FunctionAttribute}, }; +use crate::{hir_def::function::FunctionSignature, token::FmtStringFragment}; use serde::{Deserialize, Serialize}; use super::HirType; @@ -106,7 +106,7 @@ pub enum Literal { Bool(bool), Unit, Str(String), - FmtStr(String, u64, Box), + FmtStr(Vec, u64, Box), } #[derive(Debug, Clone, Hash)] diff --git a/compiler/noirc_frontend/src/monomorphization/mod.rs b/compiler/noirc_frontend/src/monomorphization/mod.rs index 050f844146a..84acbafdc27 100644 --- a/compiler/noirc_frontend/src/monomorphization/mod.rs +++ b/compiler/noirc_frontend/src/monomorphization/mod.rs @@ -12,6 +12,7 @@ use crate::ast::{FunctionKind, IntegerBitSize, Signedness, UnaryOp, Visibility}; use crate::hir::comptime::InterpreterError; use crate::hir::type_check::{NoMatchingImplFoundError, TypeCheckError}; use crate::node_interner::{ExprId, ImplSearchErrorKind}; +use crate::token::FmtStringFragment; use crate::{ debug::DebugInstrumenter, hir_def::{ @@ -417,10 +418,10 @@ impl<'interner> Monomorphizer<'interner> { let expr = match self.interner.expression(&expr) { HirExpression::Ident(ident, generics) => self.ident(ident, expr, generics)?, HirExpression::Literal(HirLiteral::Str(contents)) => Literal(Str(contents)), - HirExpression::Literal(HirLiteral::FmtStr(contents, idents)) => { + HirExpression::Literal(HirLiteral::FmtStr(fragments, idents)) => { let fields = try_vecmap(idents, |ident| self.expr(ident))?; Literal(FmtStr( - contents, + fragments, fields.len() as u64, Box::new(ast::Expression::Tuple(fields)), )) @@ -1846,7 +1847,7 @@ impl<'interner> Monomorphizer<'interner> { _ => unreachable!("ICE: format string fields should be structured in a tuple, but got a {zeroed_tuple}"), }; ast::Expression::Literal(ast::Literal::FmtStr( - "\0".repeat(*length as usize), + vec![FmtStringFragment::String("\0".repeat(*length as usize))], fields_len, Box::new(zeroed_tuple), )) diff --git a/compiler/noirc_frontend/src/monomorphization/printer.rs b/compiler/noirc_frontend/src/monomorphization/printer.rs index b6421b26a03..9c1072a4117 100644 --- a/compiler/noirc_frontend/src/monomorphization/printer.rs +++ b/compiler/noirc_frontend/src/monomorphization/printer.rs @@ -105,9 +105,11 @@ impl AstPrinter { super::ast::Literal::Integer(x, _, _, _) => x.fmt(f), super::ast::Literal::Bool(x) => x.fmt(f), super::ast::Literal::Str(s) => s.fmt(f), - super::ast::Literal::FmtStr(s, _, _) => { + super::ast::Literal::FmtStr(fragments, _, _) => { write!(f, "f\"")?; - s.fmt(f)?; + for fragment in fragments { + fragment.fmt(f)?; + } write!(f, "\"") } super::ast::Literal::Unit => { diff --git a/compiler/noirc_frontend/src/parser/parser.rs b/compiler/noirc_frontend/src/parser/parser.rs index c2f7b781873..aba1d345a39 100644 --- a/compiler/noirc_frontend/src/parser/parser.rs +++ b/compiler/noirc_frontend/src/parser/parser.rs @@ -5,7 +5,7 @@ use noirc_errors::Span; use crate::{ ast::{Ident, ItemVisibility}, lexer::{Lexer, SpannedTokenResult}, - token::{IntType, Keyword, SpannedToken, Token, TokenKind, Tokens}, + token::{FmtStringFragment, IntType, Keyword, SpannedToken, Token, TokenKind, Tokens}, }; use super::{labels::ParsingRuleLabel, ParsedModule, ParserError, ParserErrorReason}; @@ -294,11 +294,11 @@ impl<'a> Parser<'a> { } } - fn eat_fmt_str(&mut self) -> Option { + fn eat_fmt_str(&mut self) -> Option> { if matches!(self.token.token(), Token::FmtStr(..)) { let token = self.bump(); match token.into_token() { - Token::FmtStr(string) => Some(string), + Token::FmtStr(fragments) => Some(fragments), _ => unreachable!(), } } else { diff --git a/compiler/noirc_frontend/src/parser/parser/expression.rs b/compiler/noirc_frontend/src/parser/parser/expression.rs index e1ecc972eeb..fdd21627a0e 100644 --- a/compiler/noirc_frontend/src/parser/parser/expression.rs +++ b/compiler/noirc_frontend/src/parser/parser/expression.rs @@ -577,7 +577,7 @@ impl<'a> Parser<'a> { /// BlockExpression = Block fn parse_literal(&mut self) -> Option { if let Some(bool) = self.eat_bool() { - return Some(ExpressionKind::Literal(Literal::Bool(bool))); + return Some(ExpressionKind::boolean(bool)); } if let Some(int) = self.eat_int() { @@ -585,15 +585,15 @@ impl<'a> Parser<'a> { } if let Some(string) = self.eat_str() { - return Some(ExpressionKind::Literal(Literal::Str(string))); + return Some(ExpressionKind::string(string)); } if let Some((string, n)) = self.eat_raw_str() { - return Some(ExpressionKind::Literal(Literal::RawStr(string, n))); + return Some(ExpressionKind::raw_string(string, n)); } - if let Some(string) = self.eat_fmt_str() { - return Some(ExpressionKind::Literal(Literal::FmtStr(string))); + if let Some(fragments) = self.eat_fmt_str() { + return Some(ExpressionKind::format_string(fragments)); } if let Some(tokens) = self.eat_quote() { @@ -865,10 +865,10 @@ mod tests { fn parses_fmt_str() { let src = "f\"hello\""; let expr = parse_expression_no_errors(src); - let ExpressionKind::Literal(Literal::FmtStr(string)) = expr.kind else { + let ExpressionKind::Literal(Literal::FmtStr(fragments)) = expr.kind else { panic!("Expected format string literal"); }; - assert_eq!(string, "hello"); + assert_eq!(fragments[0].to_string(), "hello"); } #[test] diff --git a/compiler/noirc_frontend/src/tests.rs b/compiler/noirc_frontend/src/tests.rs index cba29d58ea3..8ddf1b571e6 100644 --- a/compiler/noirc_frontend/src/tests.rs +++ b/compiler/noirc_frontend/src/tests.rs @@ -1209,8 +1209,6 @@ fn resolve_fmt_strings() { let string = f"this is i: {i}"; println(string); - println(f"I want to print {0}"); - let new_val = 10; println(f"random_string{new_val}{new_val}"); } @@ -1220,7 +1218,7 @@ fn resolve_fmt_strings() { "#; let errors = get_program_errors(src); - assert!(errors.len() == 5, "Expected 5 errors, got: {:?}", errors); + assert!(errors.len() == 3, "Expected 5 errors, got: {:?}", errors); for (err, _file_id) in errors { match &err { @@ -1229,21 +1227,13 @@ fn resolve_fmt_strings() { }) => { assert_eq!(name, "i"); } - CompilationError::ResolverError(ResolverError::NumericConstantInFormatString { - name, - .. - }) => { - assert_eq!(name, "0"); - } CompilationError::TypeError(TypeCheckError::UnusedResultError { expr_type: _, expr_span, }) => { let a = src.get(expr_span.start() as usize..expr_span.end() as usize).unwrap(); assert!( - a == "println(string)" - || a == "println(f\"I want to print {0}\")" - || a == "println(f\"random_string{new_val}{new_val}\")" + a == "println(string)" || a == "println(f\"random_string{new_val}{new_val}\")" ); } _ => unimplemented!(), diff --git a/noir_stdlib/src/collections/map.nr b/noir_stdlib/src/collections/map.nr index bcce08faab4..eae21b3b411 100644 --- a/noir_stdlib/src/collections/map.nr +++ b/noir_stdlib/src/collections/map.nr @@ -201,7 +201,9 @@ impl HashMap { } } - let msg = f"Amount of valid elements should have been {self._len} times, but got {entries.len()}."; + let self_len = self._len; + let entries_len = entries.len(); + let msg = f"Amount of valid elements should have been {self_len} times, but got {entries_len}."; assert(entries.len() == self._len, msg); entries @@ -236,8 +238,10 @@ impl HashMap { } } + let self_len = self._len; + let keys_len = keys.len(); let msg = - f"Amount of valid elements should have been {self._len} times, but got {keys.len()}."; + f"Amount of valid elements should have been {self_len} times, but got {keys_len}."; assert(keys.len() == self._len, msg); keys @@ -271,8 +275,10 @@ impl HashMap { } } + let self_len = self._len; + let values_len = values.len(); let msg = - f"Amount of valid elements should have been {self._len} times, but got {values.len()}."; + f"Amount of valid elements should have been {self_len} times, but got {values_len}."; assert(values.len() == self._len, msg); values diff --git a/noir_stdlib/src/collections/umap.nr b/noir_stdlib/src/collections/umap.nr index 3e074551e9d..00f3da7d2a0 100644 --- a/noir_stdlib/src/collections/umap.nr +++ b/noir_stdlib/src/collections/umap.nr @@ -138,7 +138,9 @@ impl UHashMap { } } - let msg = f"Amount of valid elements should have been {self._len} times, but got {entries.len()}."; + let self_len = self._len; + let entries_len = entries.len(); + let msg = f"Amount of valid elements should have been {self_len} times, but got {entries_len}."; assert(entries.len() == self._len, msg); entries @@ -158,8 +160,10 @@ impl UHashMap { } } + let self_len = self._len; + let keys_len = keys.len(); let msg = - f"Amount of valid elements should have been {self._len} times, but got {keys.len()}."; + f"Amount of valid elements should have been {self_len} times, but got {keys_len}."; assert(keys.len() == self._len, msg); keys @@ -179,8 +183,10 @@ impl UHashMap { } } + let self_len = self._len; + let values_len = values.len(); let msg = - f"Amount of valid elements should have been {self._len} times, but got {values.len()}."; + f"Amount of valid elements should have been {self_len} times, but got {values_len}."; assert(values.len() == self._len, msg); values diff --git a/test_programs/compile_success_empty/comptime_fmt_strings/src/main.nr b/test_programs/compile_success_empty/comptime_fmt_strings/src/main.nr index ca337c822d8..8cdd15aaa0e 100644 --- a/test_programs/compile_success_empty/comptime_fmt_strings/src/main.nr +++ b/test_programs/compile_success_empty/comptime_fmt_strings/src/main.nr @@ -6,7 +6,7 @@ fn main() { // Can't print these at compile-time here since printing to stdout while // compiling breaks the test runner. - let s1 = f"x is {x}, fake interpolation: \{y}, y is {y}"; + let s1 = f"x is {x}, fake interpolation: {{y}}, y is {y}"; let s2 = std::mem::zeroed::>(); (s1, s2) }; diff --git a/test_programs/execution_success/hashmap/src/main.nr b/test_programs/execution_success/hashmap/src/main.nr index cfd4e4a9136..aab531ea559 100644 --- a/test_programs/execution_success/hashmap/src/main.nr +++ b/test_programs/execution_success/hashmap/src/main.nr @@ -104,10 +104,11 @@ fn test_insert_and_methods(input: [Entry; HASHMAP_LEN]) { hashmap.insert(entry.key, entry.value); } - assert(hashmap.len() == HASHMAP_LEN, "hashmap.len() does not match input lenght."); + assert(hashmap.len() == HASHMAP_LEN, "hashmap.len() does not match input length."); for entry in input { - assert(hashmap.contains_key(entry.key), f"Not found inserted key {entry.key}."); + let entry_key = entry.key; + assert(hashmap.contains_key(entry.key), f"Not found inserted key {entry_key}."); } hashmap.clear(); diff --git a/test_programs/execution_success/uhashmap/src/main.nr b/test_programs/execution_success/uhashmap/src/main.nr index b56a4fe1747..689ba9d4a04 100644 --- a/test_programs/execution_success/uhashmap/src/main.nr +++ b/test_programs/execution_success/uhashmap/src/main.nr @@ -104,7 +104,8 @@ unconstrained fn test_insert_and_methods(input: [Entry; HASHMAP_LEN]) { assert(hashmap.len() == HASHMAP_LEN, "hashmap.len() does not match input length."); for entry in input { - assert(hashmap.contains_key(entry.key), f"Not found inserted key {entry.key}."); + let entry_key = entry.key; + assert(hashmap.contains_key(entry.key), f"Not found inserted key {entry_key}."); } hashmap.clear(); From 0ab0caab13091eb77ba7a12977bcd39085a9d528 Mon Sep 17 00:00:00 2001 From: Ary Borenszweig Date: Thu, 5 Dec 2024 12:22:06 -0300 Subject: [PATCH 04/13] Error on unmatched '}' --- compiler/noirc_frontend/src/lexer/errors.rs | 26 +++++++++++++------- compiler/noirc_frontend/src/lexer/lexer.rs | 27 +++++++++++++++++++++ 2 files changed, 44 insertions(+), 9 deletions(-) diff --git a/compiler/noirc_frontend/src/lexer/errors.rs b/compiler/noirc_frontend/src/lexer/errors.rs index 10a265eb8c5..bc132420183 100644 --- a/compiler/noirc_frontend/src/lexer/errors.rs +++ b/compiler/noirc_frontend/src/lexer/errors.rs @@ -134,15 +134,23 @@ impl LexerErrorKind { LexerErrorKind::UnterminatedStringLiteral { span } => ("Unterminated string literal".to_string(), "Unterminated string literal".to_string(), *span), LexerErrorKind::InvalidFormatString { found, span } => { - ( - format!("Invalid format string: expected '}}', found {found:?}"), - if found == &'.' { - "Field access isn't supported in format strings".to_string() - } else { - "If you intended to print '{', you can escape it using '{{'".to_string() - }, - *span, - ) + if found == &'}' { + ( + "Invalid format string: unmatched '}}' found".to_string(), + "If you intended to print '}', you can escape it using '}}'".to_string(), + *span, + ) + } else { + ( + format!("Invalid format string: expected '}}', found {found:?}"), + if found == &'.' { + "Field access isn't supported in format strings".to_string() + } else { + "If you intended to print '{', you can escape it using '{{'".to_string() + }, + *span, + ) + } } LexerErrorKind::InvalidEscape { escaped, span } => (format!("'\\{escaped}' is not a valid escape sequence. Use '\\' for a literal backslash character."), "Invalid escape sequence".to_string(), *span), diff --git a/compiler/noirc_frontend/src/lexer/lexer.rs b/compiler/noirc_frontend/src/lexer/lexer.rs index 50ed55a8a9c..0acf3ad27bf 100644 --- a/compiler/noirc_frontend/src/lexer/lexer.rs +++ b/compiler/noirc_frontend/src/lexer/lexer.rs @@ -485,6 +485,22 @@ impl<'a> Lexer<'a> { self.next_char(); '}' } + '}' => { + let error_position = self.position; + + // Keep consuming chars until we find the closing double quote + // (unless we bumped into a double quote now, in which case we are done) + while let Some(next) = self.next_char() { + if next == '\'' && self.peek_char_is('"') { + self.next_char(); + } else if next == '"' { + break; + } + } + + let span = Span::inclusive(error_position, error_position); + return Err(LexerErrorKind::InvalidFormatString { found: '}', span }); + } '{' => { found_curly = true; break; @@ -1181,6 +1197,17 @@ mod tests { assert!(matches!(token, Token::Bool(true))); } + #[test] + fn test_eat_fmt_string_literal_unmatched_closing_curly() { + let input = "f\"hello }\" true"; + let mut lexer = Lexer::new(input); + assert!(matches!(lexer.next_token(), Err(LexerErrorKind::InvalidFormatString { .. }))); + + // Make sure the lexer went past the ending double quote for better recovery + let token = lexer.next_token().unwrap().into_token(); + assert!(matches!(token, Token::Bool(true))); + } + #[test] fn test_eat_integer_literals() { let test_cases: Vec<(&str, Token)> = vec![ From c76cd82e983012afdc6d0f9876746f0447dbfa9b Mon Sep 17 00:00:00 2001 From: Ary Borenszweig Date: Thu, 5 Dec 2024 13:57:00 -0300 Subject: [PATCH 05/13] Print oracle understands `{{` and `}}` --- .../noirc_evaluator/src/ssa/ssa_gen/mod.rs | 1 + compiler/noirc_frontend/src/ast/expression.rs | 8 +- compiler/noirc_frontend/src/ast/visitor.rs | 4 +- .../src/elaborator/expressions.rs | 76 +++++++++---------- .../src/hir/comptime/display.rs | 2 +- .../src/hir/comptime/hir_to_display_ast.rs | 4 +- .../src/hir/comptime/interpreter.rs | 2 +- compiler/noirc_frontend/src/hir_def/expr.rs | 2 +- compiler/noirc_frontend/src/lexer/lexer.rs | 45 ++++++++--- compiler/noirc_frontend/src/lexer/token.rs | 21 +++-- .../src/monomorphization/mod.rs | 2 +- compiler/noirc_frontend/src/parser/parser.rs | 4 +- .../src/parser/parser/expression.rs | 7 +- compiler/noirc_printable_type/src/lib.rs | 20 ++++- tooling/nargo_fmt/src/formatter/expression.rs | 7 +- 15 files changed, 124 insertions(+), 81 deletions(-) diff --git a/compiler/noirc_evaluator/src/ssa/ssa_gen/mod.rs b/compiler/noirc_evaluator/src/ssa/ssa_gen/mod.rs index d66c47ac725..9e94f5b6ec8 100644 --- a/compiler/noirc_evaluator/src/ssa/ssa_gen/mod.rs +++ b/compiler/noirc_evaluator/src/ssa/ssa_gen/mod.rs @@ -238,6 +238,7 @@ impl<'a> FunctionContext<'a> { for fragment in fragments { match fragment { FmtStringFragment::String(value) => { + // Escape curly braces in non-interpolations let value = value.replace('{', "{{").replace('}', "}}"); string.push_str(&value); } diff --git a/compiler/noirc_frontend/src/ast/expression.rs b/compiler/noirc_frontend/src/ast/expression.rs index 2e4dae777e8..aca52c8e6b9 100644 --- a/compiler/noirc_frontend/src/ast/expression.rs +++ b/compiler/noirc_frontend/src/ast/expression.rs @@ -210,8 +210,8 @@ impl ExpressionKind { ExpressionKind::Literal(Literal::RawStr(contents, hashes)) } - pub fn format_string(fragments: Vec) -> ExpressionKind { - ExpressionKind::Literal(Literal::FmtStr(fragments)) + pub fn format_string(fragments: Vec, length: u32) -> ExpressionKind { + ExpressionKind::Literal(Literal::FmtStr(fragments, length)) } pub fn constructor( @@ -434,7 +434,7 @@ pub enum Literal { Integer(FieldElement, /*sign*/ bool), // false for positive integer and true for negative Str(String), RawStr(String, u8), - FmtStr(Vec), + FmtStr(Vec, u32 /* length */), Unit, } @@ -669,7 +669,7 @@ impl Display for Literal { std::iter::once('#').cycle().take(*num_hashes as usize).collect(); write!(f, "r{hashes}\"{string}\"{hashes}") } - Literal::FmtStr(fragments) => { + Literal::FmtStr(fragments, _length) => { write!(f, "f\"")?; for fragment in fragments { fragment.fmt(f)?; diff --git a/compiler/noirc_frontend/src/ast/visitor.rs b/compiler/noirc_frontend/src/ast/visitor.rs index 74c455e3135..276c193e39a 100644 --- a/compiler/noirc_frontend/src/ast/visitor.rs +++ b/compiler/noirc_frontend/src/ast/visitor.rs @@ -172,7 +172,7 @@ pub trait Visitor { fn visit_literal_raw_str(&mut self, _: &str, _: u8) {} - fn visit_literal_fmt_str(&mut self, _: &[FmtStringFragment]) {} + fn visit_literal_fmt_str(&mut self, _: &[FmtStringFragment], _length: u32) {} fn visit_literal_unit(&mut self) {} @@ -900,7 +900,7 @@ impl Literal { Literal::Integer(value, negative) => visitor.visit_literal_integer(*value, *negative), Literal::Str(str) => visitor.visit_literal_str(str), Literal::RawStr(str, length) => visitor.visit_literal_raw_str(str, *length), - Literal::FmtStr(fragments) => visitor.visit_literal_fmt_str(fragments), + Literal::FmtStr(fragments, length) => visitor.visit_literal_fmt_str(fragments, *length), Literal::Unit => visitor.visit_literal_unit(), } } diff --git a/compiler/noirc_frontend/src/elaborator/expressions.rs b/compiler/noirc_frontend/src/elaborator/expressions.rs index f65dfe9a539..db56b70803b 100644 --- a/compiler/noirc_frontend/src/elaborator/expressions.rs +++ b/compiler/noirc_frontend/src/elaborator/expressions.rs @@ -166,7 +166,7 @@ impl<'context> Elaborator<'context> { let len = Type::Constant(str.len().into(), Kind::u32()); (Lit(HirLiteral::Str(str)), Type::String(Box::new(len))) } - Literal::FmtStr(fragments) => self.elaborate_fmt_string(fragments), + Literal::FmtStr(fragments, length) => self.elaborate_fmt_string(fragments, length), Literal::Array(array_literal) => { self.elaborate_array_literal(array_literal, span, true) } @@ -233,54 +233,50 @@ impl<'context> Elaborator<'context> { (HirExpression::Literal(constructor(expr)), typ) } - fn elaborate_fmt_string(&mut self, fragments: Vec) -> (HirExpression, Type) { + fn elaborate_fmt_string( + &mut self, + fragments: Vec, + length: u32, + ) -> (HirExpression, Type) { let mut fmt_str_idents = Vec::new(); let mut capture_types = Vec::new(); - let mut len = 0; for fragment in &fragments { - match fragment { - FmtStringFragment::String(string) => { - len += string.len(); - } - FmtStringFragment::Interpolation(ident_name, string_span) => { - len += ident_name.len() + 2; // + 2 for '{' + '}' - - let scope_tree = self.scopes.current_scope_tree(); - let variable = scope_tree.find(ident_name); - - let hir_ident = if let Some((old_value, _)) = variable { - old_value.num_times_used += 1; - old_value.ident.clone() - } else if let Ok((definition_id, _)) = - self.lookup_global(Path::from_single(ident_name.to_string(), *string_span)) - { - HirIdent::non_trait_method( - definition_id, - Location::new(*string_span, self.file), - ) - } else { - self.push_err(ResolverError::VariableNotDeclared { - name: ident_name.to_owned(), - span: *string_span, - }); - continue; - }; + if let FmtStringFragment::Interpolation(ident_name, string_span) = fragment { + let scope_tree = self.scopes.current_scope_tree(); + let variable = scope_tree.find(ident_name); + + let hir_ident = if let Some((old_value, _)) = variable { + old_value.num_times_used += 1; + old_value.ident.clone() + } else if let Ok((definition_id, _)) = + self.lookup_global(Path::from_single(ident_name.to_string(), *string_span)) + { + HirIdent::non_trait_method( + definition_id, + Location::new(*string_span, self.file), + ) + } else { + self.push_err(ResolverError::VariableNotDeclared { + name: ident_name.to_owned(), + span: *string_span, + }); + continue; + }; - let hir_expr = HirExpression::Ident(hir_ident.clone(), None); - let expr_id = self.interner.push_expr(hir_expr); - self.interner.push_expr_location(expr_id, *string_span, self.file); - let typ = self.type_check_variable(hir_ident, expr_id, None); - self.interner.push_expr_type(expr_id, typ.clone()); - capture_types.push(typ); - fmt_str_idents.push(expr_id); - } + let hir_expr = HirExpression::Ident(hir_ident.clone(), None); + let expr_id = self.interner.push_expr(hir_expr); + self.interner.push_expr_location(expr_id, *string_span, self.file); + let typ = self.type_check_variable(hir_ident, expr_id, None); + self.interner.push_expr_type(expr_id, typ.clone()); + capture_types.push(typ); + fmt_str_idents.push(expr_id); } } - let len = Type::Constant(len.into(), Kind::u32()); + let len = Type::Constant(length.into(), Kind::u32()); let typ = Type::FmtString(Box::new(len), Box::new(Type::Tuple(capture_types))); - (HirExpression::Literal(HirLiteral::FmtStr(fragments, fmt_str_idents)), typ) + (HirExpression::Literal(HirLiteral::FmtStr(fragments, fmt_str_idents, length)), typ) } fn elaborate_prefix(&mut self, prefix: PrefixExpression, span: Span) -> (ExprId, Type) { diff --git a/compiler/noirc_frontend/src/hir/comptime/display.rs b/compiler/noirc_frontend/src/hir/comptime/display.rs index 560d11cfa2e..29d1448f07e 100644 --- a/compiler/noirc_frontend/src/hir/comptime/display.rs +++ b/compiler/noirc_frontend/src/hir/comptime/display.rs @@ -661,7 +661,7 @@ fn remove_interned_in_literal(interner: &NodeInterner, literal: Literal) -> Lite | Literal::Integer(_, _) | Literal::Str(_) | Literal::RawStr(_, _) - | Literal::FmtStr(_) + | Literal::FmtStr(_, _) | Literal::Unit => literal, } } diff --git a/compiler/noirc_frontend/src/hir/comptime/hir_to_display_ast.rs b/compiler/noirc_frontend/src/hir/comptime/hir_to_display_ast.rs index e2ef7266498..9338c0fc37f 100644 --- a/compiler/noirc_frontend/src/hir/comptime/hir_to_display_ast.rs +++ b/compiler/noirc_frontend/src/hir/comptime/hir_to_display_ast.rs @@ -121,9 +121,9 @@ impl HirExpression { HirExpression::Literal(HirLiteral::Str(string)) => { ExpressionKind::Literal(Literal::Str(string.clone())) } - HirExpression::Literal(HirLiteral::FmtStr(fragments, _exprs)) => { + HirExpression::Literal(HirLiteral::FmtStr(fragments, _exprs, length)) => { // TODO: Is throwing away the exprs here valid? - ExpressionKind::Literal(Literal::FmtStr(fragments.clone())) + ExpressionKind::Literal(Literal::FmtStr(fragments.clone(), *length)) } HirExpression::Literal(HirLiteral::Unit) => ExpressionKind::Literal(Literal::Unit), HirExpression::Block(expr) => ExpressionKind::Block(expr.to_display_ast(interner)), diff --git a/compiler/noirc_frontend/src/hir/comptime/interpreter.rs b/compiler/noirc_frontend/src/hir/comptime/interpreter.rs index 40fdc5c3d7c..d1f9db8c503 100644 --- a/compiler/noirc_frontend/src/hir/comptime/interpreter.rs +++ b/compiler/noirc_frontend/src/hir/comptime/interpreter.rs @@ -623,7 +623,7 @@ impl<'local, 'interner> Interpreter<'local, 'interner> { self.evaluate_integer(value, is_negative, id) } HirLiteral::Str(string) => Ok(Value::String(Rc::new(string))), - HirLiteral::FmtStr(fragments, captures) => { + HirLiteral::FmtStr(fragments, captures, _length) => { self.evaluate_format_string(fragments, captures, id) } HirLiteral::Array(array) => self.evaluate_array(array, id), diff --git a/compiler/noirc_frontend/src/hir_def/expr.rs b/compiler/noirc_frontend/src/hir_def/expr.rs index 7d90b40b918..a32897ce32e 100644 --- a/compiler/noirc_frontend/src/hir_def/expr.rs +++ b/compiler/noirc_frontend/src/hir_def/expr.rs @@ -114,7 +114,7 @@ pub enum HirLiteral { Bool(bool), Integer(FieldElement, bool), //true for negative integer and false for positive Str(String), - FmtStr(Vec, Vec), + FmtStr(Vec, Vec, u32 /* length */), Unit, } diff --git a/compiler/noirc_frontend/src/lexer/lexer.rs b/compiler/noirc_frontend/src/lexer/lexer.rs index 0acf3ad27bf..b065d7e6c8d 100644 --- a/compiler/noirc_frontend/src/lexer/lexer.rs +++ b/compiler/noirc_frontend/src/lexer/lexer.rs @@ -451,6 +451,7 @@ impl<'a> Lexer<'a> { self.next_char(); let mut fragments = Vec::new(); + let mut length = 0; loop { // String fragment until '{' or '"' @@ -509,6 +510,18 @@ impl<'a> Lexer<'a> { }; string.push(char); + length += 1; + + if char == '{' || char == '}' { + // This might look a bit strange, but if there's `{{` or `}}` in the format string + // then it will be `{` and `}` in the string fragment respectively, but on the codegen + // phase it will be translated back to `{{` and `}}` to avoid executing an interpolation, + // thus the actual length of the codegen'd string will be one more than what we get here. + // + // We could just make the fragment include the double curly braces, but then the interpreter + // would need to undo the curly braces, so it's simpler to add them during codegen. + length += 1; + } } else { let span = Span::inclusive(start, self.position); return Err(LexerErrorKind::UnterminatedStringLiteral { span }); @@ -523,6 +536,8 @@ impl<'a> Lexer<'a> { break; } + length += 1; // for the curly brace + // Interpolation fragment until '}' or '"' let mut string = String::new(); let interpolation_start = self.position; @@ -560,14 +575,17 @@ impl<'a> Lexer<'a> { other } }; + length += 1; string.push(char); } + length += 1; // for the closing curly brace + let interpolation_span = Span::from(interpolation_start..self.position); fragments.push(FmtStringFragment::Interpolation(string, interpolation_span)); } - let token = Token::FmtStr(fragments); + let token = Token::FmtStr(fragments, length); let end = self.position; Ok(token.into_span(start, end)) } @@ -1112,7 +1130,7 @@ mod tests { Token::Keyword(Keyword::Let), Token::Ident("_word".to_string()), Token::Assign, - Token::FmtStr(vec![FmtStringFragment::String("hello".to_string())]), + Token::FmtStr(vec![FmtStringFragment::String("hello".to_string())], 5), ]; let mut lexer = Lexer::new(input); @@ -1130,7 +1148,7 @@ mod tests { Token::Keyword(Keyword::Let), Token::Ident("_word".to_string()), Token::Assign, - Token::FmtStr(vec![FmtStringFragment::String("hello\n\t{x}".to_string())]), + Token::FmtStr(vec![FmtStringFragment::String("hello\n\t{x}".to_string())], 12), ]; let mut lexer = Lexer::new(input); @@ -1148,14 +1166,17 @@ mod tests { Token::Keyword(Keyword::Let), Token::Ident("_word".to_string()), Token::Assign, - Token::FmtStr(vec![ - FmtStringFragment::String("hello ".to_string()), - FmtStringFragment::Interpolation("world".to_string(), Span::from(20..26)), - FmtStringFragment::String(" and ".to_string()), - FmtStringFragment::Interpolation("_another".to_string(), Span::from(32..41)), - FmtStringFragment::String(" ".to_string()), - FmtStringFragment::Interpolation("vAr_123".to_string(), Span::from(43..51)), - ]), + Token::FmtStr( + vec![ + FmtStringFragment::String("hello ".to_string()), + FmtStringFragment::Interpolation("world".to_string(), Span::from(20..26)), + FmtStringFragment::String(" and ".to_string()), + FmtStringFragment::Interpolation("_another".to_string(), Span::from(32..41)), + FmtStringFragment::String(" ".to_string()), + FmtStringFragment::Interpolation("vAr_123".to_string(), Span::from(43..51)), + ], + 38, + ), ]; let mut lexer = Lexer::new(input); @@ -1397,7 +1418,7 @@ mod tests { format!("let s = r#####\"{s}\"#####;"), ], ), - (Some(Token::FmtStr(vec![])), vec![format!("assert(x == y, f\"{s}\");")]), + (Some(Token::FmtStr(vec![], 0)), vec![format!("assert(x == y, f\"{s}\");")]), // expected token not found // (Some(Token::LineComment("".to_string(), None)), vec![ (None, vec![format!("//{s}"), format!("// {s}")]), diff --git a/compiler/noirc_frontend/src/lexer/token.rs b/compiler/noirc_frontend/src/lexer/token.rs index 07d36b4d140..01c641b3f0e 100644 --- a/compiler/noirc_frontend/src/lexer/token.rs +++ b/compiler/noirc_frontend/src/lexer/token.rs @@ -25,7 +25,7 @@ pub enum BorrowedToken<'input> { Str(&'input str), /// the u8 is the number of hashes, i.e. r###.. RawStr(&'input str, u8), - FmtStr(&'input [FmtStringFragment]), + FmtStr(&'input [FmtStringFragment], u32 /* length */), Keyword(Keyword), IntType(IntType), AttributeStart { @@ -136,7 +136,7 @@ pub enum Token { Str(String), /// the u8 is the number of hashes, i.e. r###.. RawStr(String, u8), - FmtStr(Vec), + FmtStr(Vec, u32 /* length */), Keyword(Keyword), IntType(IntType), AttributeStart { @@ -255,7 +255,7 @@ pub fn token_to_borrowed_token(token: &Token) -> BorrowedToken<'_> { Token::Int(n) => BorrowedToken::Int(*n), Token::Bool(b) => BorrowedToken::Bool(*b), Token::Str(ref b) => BorrowedToken::Str(b), - Token::FmtStr(ref b) => BorrowedToken::FmtStr(b), + Token::FmtStr(ref b, length) => BorrowedToken::FmtStr(b, *length), Token::RawStr(ref b, hashes) => BorrowedToken::RawStr(b, *hashes), Token::Keyword(k) => BorrowedToken::Keyword(*k), Token::AttributeStart { is_inner, is_tag } => { @@ -322,7 +322,16 @@ impl Display for FmtStringFragment { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { FmtStringFragment::String(string) => { - let string = string.replace('{', "{{").replace('}', "}}"); + // Undo the escapes when displaying the fmt string + let string = string + .replace('{', "{{") + .replace('}', "}}") + .replace('\r', "\\r") + .replace('\n', "\\n") + .replace('\t', "\\t") + .replace('\0', "\\0") + .replace('\'', "\\'") + .replace('\"', "\\\""); write!(f, "{}", string) } FmtStringFragment::Interpolation(string, _span) => { @@ -395,7 +404,7 @@ impl fmt::Display for Token { Token::Int(n) => write!(f, "{}", n), Token::Bool(b) => write!(f, "{b}"), Token::Str(ref b) => write!(f, "{b:?}"), - Token::FmtStr(ref b) => write!(f, "f{b:?}"), + Token::FmtStr(ref b, _length) => write!(f, "f{b:?}"), Token::RawStr(ref b, hashes) => { let h: String = std::iter::once('#').cycle().take(hashes as usize).collect(); write!(f, "r{h}{b:?}{h}") @@ -535,7 +544,7 @@ impl Token { | Token::Bool(_) | Token::Str(_) | Token::RawStr(..) - | Token::FmtStr(_) => TokenKind::Literal, + | Token::FmtStr(_, _) => TokenKind::Literal, Token::Keyword(_) => TokenKind::Keyword, Token::UnquoteMarker(_) => TokenKind::UnquoteMarker, Token::Quote(_) => TokenKind::Quote, diff --git a/compiler/noirc_frontend/src/monomorphization/mod.rs b/compiler/noirc_frontend/src/monomorphization/mod.rs index 84acbafdc27..6bed0f4ccc5 100644 --- a/compiler/noirc_frontend/src/monomorphization/mod.rs +++ b/compiler/noirc_frontend/src/monomorphization/mod.rs @@ -418,7 +418,7 @@ impl<'interner> Monomorphizer<'interner> { let expr = match self.interner.expression(&expr) { HirExpression::Ident(ident, generics) => self.ident(ident, expr, generics)?, HirExpression::Literal(HirLiteral::Str(contents)) => Literal(Str(contents)), - HirExpression::Literal(HirLiteral::FmtStr(fragments, idents)) => { + HirExpression::Literal(HirLiteral::FmtStr(fragments, idents, _length)) => { let fields = try_vecmap(idents, |ident| self.expr(ident))?; Literal(FmtStr( fragments, diff --git a/compiler/noirc_frontend/src/parser/parser.rs b/compiler/noirc_frontend/src/parser/parser.rs index aba1d345a39..2f62141234f 100644 --- a/compiler/noirc_frontend/src/parser/parser.rs +++ b/compiler/noirc_frontend/src/parser/parser.rs @@ -294,11 +294,11 @@ impl<'a> Parser<'a> { } } - fn eat_fmt_str(&mut self) -> Option> { + fn eat_fmt_str(&mut self) -> Option<(Vec, u32)> { if matches!(self.token.token(), Token::FmtStr(..)) { let token = self.bump(); match token.into_token() { - Token::FmtStr(fragments) => Some(fragments), + Token::FmtStr(fragments, length) => Some((fragments, length)), _ => unreachable!(), } } else { diff --git a/compiler/noirc_frontend/src/parser/parser/expression.rs b/compiler/noirc_frontend/src/parser/parser/expression.rs index fdd21627a0e..526a0c3dd6e 100644 --- a/compiler/noirc_frontend/src/parser/parser/expression.rs +++ b/compiler/noirc_frontend/src/parser/parser/expression.rs @@ -592,8 +592,8 @@ impl<'a> Parser<'a> { return Some(ExpressionKind::raw_string(string, n)); } - if let Some(fragments) = self.eat_fmt_str() { - return Some(ExpressionKind::format_string(fragments)); + if let Some((fragments, length)) = self.eat_fmt_str() { + return Some(ExpressionKind::format_string(fragments, length)); } if let Some(tokens) = self.eat_quote() { @@ -865,10 +865,11 @@ mod tests { fn parses_fmt_str() { let src = "f\"hello\""; let expr = parse_expression_no_errors(src); - let ExpressionKind::Literal(Literal::FmtStr(fragments)) = expr.kind else { + let ExpressionKind::Literal(Literal::FmtStr(fragments, length)) = expr.kind else { panic!("Expected format string literal"); }; assert_eq!(fragments[0].to_string(), "hello"); + assert_eq!(length, 5); } #[test] diff --git a/compiler/noirc_printable_type/src/lib.rs b/compiler/noirc_printable_type/src/lib.rs index 838a2472125..10f27fc5f87 100644 --- a/compiler/noirc_printable_type/src/lib.rs +++ b/compiler/noirc_printable_type/src/lib.rs @@ -253,8 +253,9 @@ fn to_string(value: &PrintableValue, typ: &PrintableType) -> Op Some(output) } -// Taken from Regex docs directly -fn replace_all( +/// Assumes `re` is a regex that matches interpolation sequences like `{...}`. +/// Replaces all matches by invoking `replacement`, but only if a match isn't preceded by a '{'. +fn replace_all_interpolations( re: &Regex, haystack: &str, mut replacement: impl FnMut(&Captures) -> Result, @@ -263,6 +264,13 @@ fn replace_all( let mut last_match = 0; for caps in re.captures_iter(haystack) { let m = caps.get(0).unwrap(); + + // If we have '{' before the match, it's an escape sequence. + let start = m.start(); + if start > 0 && haystack.as_bytes().get(start - 1).copied() == Some(b'{') { + continue; + } + new.push_str(&haystack[last_match..m.start()]); new.push_str(&replacement(&caps)?); last_match = m.end(); @@ -282,11 +290,17 @@ impl std::fmt::Display for PrintableValueDisplay { let mut display_iter = values.iter(); let re = Regex::new(r"\{([a-zA-Z0-9_]+)\}").map_err(|_| std::fmt::Error)?; - let formatted_str = replace_all(&re, template, |_: &Captures| { + // First replace all `{...}` interpolations with the corresponding values, making sure + // to skip ones that are also surrounded by curly braces (like `{{...}}`) + let formatted_str = replace_all_interpolations(&re, template, |_: &Captures| { let (value, typ) = display_iter.next().ok_or(std::fmt::Error)?; to_string(value, typ).ok_or(std::fmt::Error) })?; + // Now that we replaced all interpolations, we need to unescape the double curly braces + // that were added to avoid interpolations. + let formatted_str = formatted_str.replace("{{", "{").replace("}}", "}"); + write!(fmt, "{formatted_str}") } } diff --git a/tooling/nargo_fmt/src/formatter/expression.rs b/tooling/nargo_fmt/src/formatter/expression.rs index 0730d06ad72..ecc9fab18ce 100644 --- a/tooling/nargo_fmt/src/formatter/expression.rs +++ b/tooling/nargo_fmt/src/formatter/expression.rs @@ -104,11 +104,12 @@ impl<'a, 'b> ChunkFormatter<'a, 'b> { formatter.write_left_paren(); formatter.write_right_paren(); })), - Literal::Bool(_) | Literal::Str(_) | Literal::FmtStr(_) | Literal::RawStr(..) => group - .text(self.chunk(|formatter| { + Literal::Bool(_) | Literal::Str(_) | Literal::FmtStr(_, _) | Literal::RawStr(..) => { + group.text(self.chunk(|formatter| { formatter.write_current_token_as_in_source(); formatter.bump(); - })), + })); + } Literal::Integer(..) => group.text(self.chunk(|formatter| { if formatter.is_at(Token::Minus) { formatter.write_token(Token::Minus); From 33c42a6c7ac4b7ddbe2a46fa173e6503509df6bc Mon Sep 17 00:00:00 2001 From: Ary Borenszweig Date: Thu, 5 Dec 2024 13:59:42 -0300 Subject: [PATCH 06/13] Fix span --- compiler/noirc_frontend/src/lexer/lexer.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/compiler/noirc_frontend/src/lexer/lexer.rs b/compiler/noirc_frontend/src/lexer/lexer.rs index b065d7e6c8d..22f15469c3a 100644 --- a/compiler/noirc_frontend/src/lexer/lexer.rs +++ b/compiler/noirc_frontend/src/lexer/lexer.rs @@ -540,7 +540,7 @@ impl<'a> Lexer<'a> { // Interpolation fragment until '}' or '"' let mut string = String::new(); - let interpolation_start = self.position; + let interpolation_start = self.position + 1; // + 1 because we are at '{' let mut first_char = true; while let Some(next) = self.next_char() { let char = match next { @@ -1169,11 +1169,11 @@ mod tests { Token::FmtStr( vec![ FmtStringFragment::String("hello ".to_string()), - FmtStringFragment::Interpolation("world".to_string(), Span::from(20..26)), + FmtStringFragment::Interpolation("world".to_string(), Span::from(21..26)), FmtStringFragment::String(" and ".to_string()), - FmtStringFragment::Interpolation("_another".to_string(), Span::from(32..41)), + FmtStringFragment::Interpolation("_another".to_string(), Span::from(33..41)), FmtStringFragment::String(" ".to_string()), - FmtStringFragment::Interpolation("vAr_123".to_string(), Span::from(43..51)), + FmtStringFragment::Interpolation("vAr_123".to_string(), Span::from(44..51)), ], 38, ), From 5128bf4bbcf0027227eec1d701dde4ff410a08a0 Mon Sep 17 00:00:00 2001 From: Ary Borenszweig Date: Thu, 5 Dec 2024 14:07:57 -0300 Subject: [PATCH 07/13] Error on empty interpolation --- compiler/noirc_frontend/src/lexer/errors.rs | 10 ++++ compiler/noirc_frontend/src/lexer/lexer.rs | 51 +++++++++++++++------ 2 files changed, 46 insertions(+), 15 deletions(-) diff --git a/compiler/noirc_frontend/src/lexer/errors.rs b/compiler/noirc_frontend/src/lexer/errors.rs index bc132420183..f95ccba061a 100644 --- a/compiler/noirc_frontend/src/lexer/errors.rs +++ b/compiler/noirc_frontend/src/lexer/errors.rs @@ -32,6 +32,8 @@ pub enum LexerErrorKind { UnterminatedStringLiteral { span: Span }, #[error("Invalid format string: expected '}}', found {found:?}")] InvalidFormatString { found: char, span: Span }, + #[error("Invalid format string: expected letter or underscore, found '}}'")] + EmptyFormatStringInterpolation { span: Span }, #[error( "'\\{escaped}' is not a valid escape sequence. Use '\\' for a literal backslash character." )] @@ -71,6 +73,7 @@ impl LexerErrorKind { LexerErrorKind::UnterminatedBlockComment { span } => *span, LexerErrorKind::UnterminatedStringLiteral { span } => *span, LexerErrorKind::InvalidFormatString { span, .. } => *span, + LexerErrorKind::EmptyFormatStringInterpolation { span, .. } => *span, LexerErrorKind::InvalidEscape { span, .. } => *span, LexerErrorKind::InvalidQuoteDelimiter { delimiter } => delimiter.to_span(), LexerErrorKind::NonAsciiComment { span, .. } => *span, @@ -152,6 +155,13 @@ impl LexerErrorKind { ) } } + LexerErrorKind::EmptyFormatStringInterpolation { span } => { + ( + "Invalid format string: expected letter or underscore, found '}}'".to_string(), + "If you intended to print '{' or '}', you can escape them using '{{' and '}}' respectively".to_string(), + *span, + ) + } LexerErrorKind::InvalidEscape { escaped, span } => (format!("'\\{escaped}' is not a valid escape sequence. Use '\\' for a literal backslash character."), "Invalid escape sequence".to_string(), *span), LexerErrorKind::InvalidQuoteDelimiter { delimiter } => { diff --git a/compiler/noirc_frontend/src/lexer/lexer.rs b/compiler/noirc_frontend/src/lexer/lexer.rs index 22f15469c3a..bf61364523f 100644 --- a/compiler/noirc_frontend/src/lexer/lexer.rs +++ b/compiler/noirc_frontend/src/lexer/lexer.rs @@ -490,14 +490,7 @@ impl<'a> Lexer<'a> { let error_position = self.position; // Keep consuming chars until we find the closing double quote - // (unless we bumped into a double quote now, in which case we are done) - while let Some(next) = self.next_char() { - if next == '\'' && self.peek_char_is('"') { - self.next_char(); - } else if next == '"' { - break; - } - } + self.skip_until_string_end(); let span = Span::inclusive(error_position, error_position); return Err(LexerErrorKind::InvalidFormatString { found: '}', span }); @@ -545,6 +538,16 @@ impl<'a> Lexer<'a> { while let Some(next) = self.next_char() { let char = match next { '}' => { + if string.is_empty() { + let error_position = self.position; + + // Keep consuming chars until we find the closing double quote + self.skip_until_string_end(); + + let span = Span::inclusive(error_position, error_position); + return Err(LexerErrorKind::EmptyFormatStringInterpolation { span }); + } + break; } other => { @@ -559,13 +562,7 @@ impl<'a> Lexer<'a> { // Keep consuming chars until we find the closing double quote // (unless we bumped into a double quote now, in which case we are done) if other != '"' { - while let Some(next) = self.next_char() { - if next == '\'' && self.peek_char_is('"') { - self.next_char(); - } else if next == '"' { - break; - } - } + self.skip_until_string_end(); } let span = Span::inclusive(error_position, error_position); @@ -590,6 +587,16 @@ impl<'a> Lexer<'a> { Ok(token.into_span(start, end)) } + fn skip_until_string_end(&mut self) { + while let Some(next) = self.next_char() { + if next == '\'' && self.peek_char_is('"') { + self.next_char(); + } else if next == '"' { + break; + } + } + } + fn eat_format_string_or_alpha_numeric(&mut self) -> SpannedTokenResult { if self.peek_char_is('"') { self.eat_fmt_string() @@ -1229,6 +1236,20 @@ mod tests { assert!(matches!(token, Token::Bool(true))); } + #[test] + fn test_eat_fmt_string_literal_empty_interpolation() { + let input = "f\"{}\" true"; + let mut lexer = Lexer::new(input); + assert!(matches!( + lexer.next_token(), + Err(LexerErrorKind::EmptyFormatStringInterpolation { .. }) + )); + + // Make sure the lexer went past the ending double quote for better recovery + let token = lexer.next_token().unwrap().into_token(); + assert!(matches!(token, Token::Bool(true))); + } + #[test] fn test_eat_integer_literals() { let test_cases: Vec<(&str, Token)> = vec![ From e69e7cba0c0896f0841ec7b5a4d7ddc487d99dca Mon Sep 17 00:00:00 2001 From: Ary Borenszweig Date: Thu, 5 Dec 2024 14:28:53 -0300 Subject: [PATCH 08/13] nargo fmt --- noir_stdlib/src/collections/map.nr | 3 ++- noir_stdlib/src/collections/umap.nr | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/noir_stdlib/src/collections/map.nr b/noir_stdlib/src/collections/map.nr index eae21b3b411..2b0da1b90ec 100644 --- a/noir_stdlib/src/collections/map.nr +++ b/noir_stdlib/src/collections/map.nr @@ -203,7 +203,8 @@ impl HashMap { let self_len = self._len; let entries_len = entries.len(); - let msg = f"Amount of valid elements should have been {self_len} times, but got {entries_len}."; + let msg = + f"Amount of valid elements should have been {self_len} times, but got {entries_len}."; assert(entries.len() == self._len, msg); entries diff --git a/noir_stdlib/src/collections/umap.nr b/noir_stdlib/src/collections/umap.nr index 00f3da7d2a0..7aebeb437cf 100644 --- a/noir_stdlib/src/collections/umap.nr +++ b/noir_stdlib/src/collections/umap.nr @@ -140,7 +140,8 @@ impl UHashMap { let self_len = self._len; let entries_len = entries.len(); - let msg = f"Amount of valid elements should have been {self_len} times, but got {entries_len}."; + let msg = + f"Amount of valid elements should have been {self_len} times, but got {entries_len}."; assert(entries.len() == self._len, msg); entries From b51e15e6cbdbc6ec3b5f2ec5d2cda17c54260288 Mon Sep 17 00:00:00 2001 From: Ary Borenszweig Date: Thu, 5 Dec 2024 14:40:41 -0300 Subject: [PATCH 09/13] FmtStringFragment -> FmtStrFragment --- .../noirc_evaluator/src/ssa/ssa_gen/mod.rs | 6 ++--- compiler/noirc_frontend/src/ast/expression.rs | 6 ++--- compiler/noirc_frontend/src/ast/visitor.rs | 4 ++-- .../src/elaborator/expressions.rs | 6 ++--- .../src/hir/comptime/interpreter.rs | 8 +++---- compiler/noirc_frontend/src/hir_def/expr.rs | 4 ++-- compiler/noirc_frontend/src/lexer/lexer.rs | 22 +++++++++---------- compiler/noirc_frontend/src/lexer/token.rs | 12 +++++----- .../src/monomorphization/ast.rs | 4 ++-- .../src/monomorphization/mod.rs | 4 ++-- compiler/noirc_frontend/src/parser/parser.rs | 4 ++-- 11 files changed, 40 insertions(+), 40 deletions(-) diff --git a/compiler/noirc_evaluator/src/ssa/ssa_gen/mod.rs b/compiler/noirc_evaluator/src/ssa/ssa_gen/mod.rs index 9e94f5b6ec8..bd236793a2f 100644 --- a/compiler/noirc_evaluator/src/ssa/ssa_gen/mod.rs +++ b/compiler/noirc_evaluator/src/ssa/ssa_gen/mod.rs @@ -2,7 +2,7 @@ pub(crate) mod context; mod program; mod value; -use noirc_frontend::token::FmtStringFragment; +use noirc_frontend::token::FmtStrFragment; pub(crate) use program::Ssa; use context::SharedContext; @@ -237,12 +237,12 @@ impl<'a> FunctionContext<'a> { let mut string = String::new(); for fragment in fragments { match fragment { - FmtStringFragment::String(value) => { + FmtStrFragment::String(value) => { // Escape curly braces in non-interpolations let value = value.replace('{', "{{").replace('}', "}}"); string.push_str(&value); } - FmtStringFragment::Interpolation(value, _span) => { + FmtStrFragment::Interpolation(value, _span) => { string.push('{'); string.push_str(value); string.push('}'); diff --git a/compiler/noirc_frontend/src/ast/expression.rs b/compiler/noirc_frontend/src/ast/expression.rs index aca52c8e6b9..ae622f46686 100644 --- a/compiler/noirc_frontend/src/ast/expression.rs +++ b/compiler/noirc_frontend/src/ast/expression.rs @@ -10,7 +10,7 @@ use crate::ast::{ use crate::node_interner::{ ExprId, InternedExpressionKind, InternedStatementKind, QuotedTypeId, StructId, }; -use crate::token::{Attributes, FmtStringFragment, FunctionAttribute, Token, Tokens}; +use crate::token::{Attributes, FmtStrFragment, FunctionAttribute, Token, Tokens}; use crate::{Kind, Type}; use acvm::{acir::AcirField, FieldElement}; use iter_extended::vecmap; @@ -210,7 +210,7 @@ impl ExpressionKind { ExpressionKind::Literal(Literal::RawStr(contents, hashes)) } - pub fn format_string(fragments: Vec, length: u32) -> ExpressionKind { + pub fn format_string(fragments: Vec, length: u32) -> ExpressionKind { ExpressionKind::Literal(Literal::FmtStr(fragments, length)) } @@ -434,7 +434,7 @@ pub enum Literal { Integer(FieldElement, /*sign*/ bool), // false for positive integer and true for negative Str(String), RawStr(String, u8), - FmtStr(Vec, u32 /* length */), + FmtStr(Vec, u32 /* length */), Unit, } diff --git a/compiler/noirc_frontend/src/ast/visitor.rs b/compiler/noirc_frontend/src/ast/visitor.rs index 276c193e39a..2f60532980a 100644 --- a/compiler/noirc_frontend/src/ast/visitor.rs +++ b/compiler/noirc_frontend/src/ast/visitor.rs @@ -16,7 +16,7 @@ use crate::{ InternedUnresolvedTypeData, QuotedTypeId, }, parser::{Item, ItemKind, ParsedSubModule}, - token::{FmtStringFragment, MetaAttribute, SecondaryAttribute, Tokens}, + token::{FmtStrFragment, MetaAttribute, SecondaryAttribute, Tokens}, ParsedModule, QuotedType, }; @@ -172,7 +172,7 @@ pub trait Visitor { fn visit_literal_raw_str(&mut self, _: &str, _: u8) {} - fn visit_literal_fmt_str(&mut self, _: &[FmtStringFragment], _length: u32) {} + fn visit_literal_fmt_str(&mut self, _: &[FmtStrFragment], _length: u32) {} fn visit_literal_unit(&mut self) {} diff --git a/compiler/noirc_frontend/src/elaborator/expressions.rs b/compiler/noirc_frontend/src/elaborator/expressions.rs index db56b70803b..b5fab6faf9b 100644 --- a/compiler/noirc_frontend/src/elaborator/expressions.rs +++ b/compiler/noirc_frontend/src/elaborator/expressions.rs @@ -28,7 +28,7 @@ use crate::{ traits::{ResolvedTraitBound, TraitConstraint}, }, node_interner::{DefinitionKind, ExprId, FuncId, InternedStatementKind, TraitMethodId}, - token::{FmtStringFragment, Tokens}, + token::{FmtStrFragment, Tokens}, Kind, QuotedType, Shared, StructType, Type, }; @@ -235,14 +235,14 @@ impl<'context> Elaborator<'context> { fn elaborate_fmt_string( &mut self, - fragments: Vec, + fragments: Vec, length: u32, ) -> (HirExpression, Type) { let mut fmt_str_idents = Vec::new(); let mut capture_types = Vec::new(); for fragment in &fragments { - if let FmtStringFragment::Interpolation(ident_name, string_span) = fragment { + if let FmtStrFragment::Interpolation(ident_name, string_span) = fragment { let scope_tree = self.scopes.current_scope_tree(); let variable = scope_tree.find(ident_name); diff --git a/compiler/noirc_frontend/src/hir/comptime/interpreter.rs b/compiler/noirc_frontend/src/hir/comptime/interpreter.rs index d1f9db8c503..ce1fbe4a524 100644 --- a/compiler/noirc_frontend/src/hir/comptime/interpreter.rs +++ b/compiler/noirc_frontend/src/hir/comptime/interpreter.rs @@ -20,7 +20,7 @@ use crate::monomorphization::{ perform_impl_bindings, perform_instantiation_bindings, resolve_trait_method, undo_instantiation_bindings, }; -use crate::token::{FmtStringFragment, Tokens}; +use crate::token::{FmtStrFragment, Tokens}; use crate::TypeVariable; use crate::{ hir_def::{ @@ -633,7 +633,7 @@ impl<'local, 'interner> Interpreter<'local, 'interner> { fn evaluate_format_string( &mut self, - fragments: Vec, + fragments: Vec, captures: Vec, id: ExprId, ) -> IResult { @@ -646,10 +646,10 @@ impl<'local, 'interner> Interpreter<'local, 'interner> { for fragment in fragments { match fragment { - FmtStringFragment::String(string) => { + FmtStrFragment::String(string) => { result.push_str(&string); } - FmtStringFragment::Interpolation(_, span) => { + FmtStrFragment::Interpolation(_, span) => { if let Some(value) = values.pop_front() { // When interpolating a quoted value inside a format string, we don't include the // surrounding `quote {` ... `}` as if we are unquoting the quoted value inside the string. diff --git a/compiler/noirc_frontend/src/hir_def/expr.rs b/compiler/noirc_frontend/src/hir_def/expr.rs index a32897ce32e..e243fc88cff 100644 --- a/compiler/noirc_frontend/src/hir_def/expr.rs +++ b/compiler/noirc_frontend/src/hir_def/expr.rs @@ -7,7 +7,7 @@ use crate::hir::type_check::generics::TraitGenerics; use crate::node_interner::{ DefinitionId, DefinitionKind, ExprId, FuncId, NodeInterner, StmtId, TraitMethodId, }; -use crate::token::{FmtStringFragment, Tokens}; +use crate::token::{FmtStrFragment, Tokens}; use crate::Shared; use super::stmt::HirPattern; @@ -114,7 +114,7 @@ pub enum HirLiteral { Bool(bool), Integer(FieldElement, bool), //true for negative integer and false for positive Str(String), - FmtStr(Vec, Vec, u32 /* length */), + FmtStr(Vec, Vec, u32 /* length */), Unit, } diff --git a/compiler/noirc_frontend/src/lexer/lexer.rs b/compiler/noirc_frontend/src/lexer/lexer.rs index bf61364523f..a5c4b2cd772 100644 --- a/compiler/noirc_frontend/src/lexer/lexer.rs +++ b/compiler/noirc_frontend/src/lexer/lexer.rs @@ -2,7 +2,7 @@ use crate::token::DocStyle; use super::{ errors::LexerErrorKind, - token::{FmtStringFragment, IntType, Keyword, SpannedToken, Token, Tokens}, + token::{FmtStrFragment, IntType, Keyword, SpannedToken, Token, Tokens}, }; use acvm::{AcirField, FieldElement}; use noirc_errors::{Position, Span}; @@ -522,7 +522,7 @@ impl<'a> Lexer<'a> { } if !string.is_empty() { - fragments.push(FmtStringFragment::String(string)); + fragments.push(FmtStrFragment::String(string)); } if !found_curly { @@ -579,7 +579,7 @@ impl<'a> Lexer<'a> { length += 1; // for the closing curly brace let interpolation_span = Span::from(interpolation_start..self.position); - fragments.push(FmtStringFragment::Interpolation(string, interpolation_span)); + fragments.push(FmtStrFragment::Interpolation(string, interpolation_span)); } let token = Token::FmtStr(fragments, length); @@ -1137,7 +1137,7 @@ mod tests { Token::Keyword(Keyword::Let), Token::Ident("_word".to_string()), Token::Assign, - Token::FmtStr(vec![FmtStringFragment::String("hello".to_string())], 5), + Token::FmtStr(vec![FmtStrFragment::String("hello".to_string())], 5), ]; let mut lexer = Lexer::new(input); @@ -1155,7 +1155,7 @@ mod tests { Token::Keyword(Keyword::Let), Token::Ident("_word".to_string()), Token::Assign, - Token::FmtStr(vec![FmtStringFragment::String("hello\n\t{x}".to_string())], 12), + Token::FmtStr(vec![FmtStrFragment::String("hello\n\t{x}".to_string())], 12), ]; let mut lexer = Lexer::new(input); @@ -1175,12 +1175,12 @@ mod tests { Token::Assign, Token::FmtStr( vec![ - FmtStringFragment::String("hello ".to_string()), - FmtStringFragment::Interpolation("world".to_string(), Span::from(21..26)), - FmtStringFragment::String(" and ".to_string()), - FmtStringFragment::Interpolation("_another".to_string(), Span::from(33..41)), - FmtStringFragment::String(" ".to_string()), - FmtStringFragment::Interpolation("vAr_123".to_string(), Span::from(44..51)), + FmtStrFragment::String("hello ".to_string()), + FmtStrFragment::Interpolation("world".to_string(), Span::from(21..26)), + FmtStrFragment::String(" and ".to_string()), + FmtStrFragment::Interpolation("_another".to_string(), Span::from(33..41)), + FmtStrFragment::String(" ".to_string()), + FmtStrFragment::Interpolation("vAr_123".to_string(), Span::from(44..51)), ], 38, ), diff --git a/compiler/noirc_frontend/src/lexer/token.rs b/compiler/noirc_frontend/src/lexer/token.rs index 01c641b3f0e..f35515045db 100644 --- a/compiler/noirc_frontend/src/lexer/token.rs +++ b/compiler/noirc_frontend/src/lexer/token.rs @@ -25,7 +25,7 @@ pub enum BorrowedToken<'input> { Str(&'input str), /// the u8 is the number of hashes, i.e. r###.. RawStr(&'input str, u8), - FmtStr(&'input [FmtStringFragment], u32 /* length */), + FmtStr(&'input [FmtStrFragment], u32 /* length */), Keyword(Keyword), IntType(IntType), AttributeStart { @@ -136,7 +136,7 @@ pub enum Token { Str(String), /// the u8 is the number of hashes, i.e. r###.. RawStr(String, u8), - FmtStr(Vec, u32 /* length */), + FmtStr(Vec, u32 /* length */), Keyword(Keyword), IntType(IntType), AttributeStart { @@ -313,15 +313,15 @@ pub fn token_to_borrowed_token(token: &Token) -> BorrowedToken<'_> { } #[derive(Clone, PartialEq, Eq, Hash, Debug, PartialOrd, Ord)] -pub enum FmtStringFragment { +pub enum FmtStrFragment { String(String), Interpolation(String, Span), } -impl Display for FmtStringFragment { +impl Display for FmtStrFragment { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { - FmtStringFragment::String(string) => { + FmtStrFragment::String(string) => { // Undo the escapes when displaying the fmt string let string = string .replace('{', "{{") @@ -334,7 +334,7 @@ impl Display for FmtStringFragment { .replace('\"', "\\\""); write!(f, "{}", string) } - FmtStringFragment::Interpolation(string, _span) => { + FmtStrFragment::Interpolation(string, _span) => { write!(f, "{{{}}}", string) } } diff --git a/compiler/noirc_frontend/src/monomorphization/ast.rs b/compiler/noirc_frontend/src/monomorphization/ast.rs index fcd6c66a93e..a3941483d3d 100644 --- a/compiler/noirc_frontend/src/monomorphization/ast.rs +++ b/compiler/noirc_frontend/src/monomorphization/ast.rs @@ -11,7 +11,7 @@ use crate::{ ast::{BinaryOpKind, IntegerBitSize, Signedness, Visibility}, token::{Attributes, FunctionAttribute}, }; -use crate::{hir_def::function::FunctionSignature, token::FmtStringFragment}; +use crate::{hir_def::function::FunctionSignature, token::FmtStrFragment}; use serde::{Deserialize, Serialize}; use super::HirType; @@ -106,7 +106,7 @@ pub enum Literal { Bool(bool), Unit, Str(String), - FmtStr(Vec, u64, Box), + FmtStr(Vec, u64, Box), } #[derive(Debug, Clone, Hash)] diff --git a/compiler/noirc_frontend/src/monomorphization/mod.rs b/compiler/noirc_frontend/src/monomorphization/mod.rs index 6bed0f4ccc5..b31a5744d09 100644 --- a/compiler/noirc_frontend/src/monomorphization/mod.rs +++ b/compiler/noirc_frontend/src/monomorphization/mod.rs @@ -12,7 +12,7 @@ use crate::ast::{FunctionKind, IntegerBitSize, Signedness, UnaryOp, Visibility}; use crate::hir::comptime::InterpreterError; use crate::hir::type_check::{NoMatchingImplFoundError, TypeCheckError}; use crate::node_interner::{ExprId, ImplSearchErrorKind}; -use crate::token::FmtStringFragment; +use crate::token::FmtStrFragment; use crate::{ debug::DebugInstrumenter, hir_def::{ @@ -1847,7 +1847,7 @@ impl<'interner> Monomorphizer<'interner> { _ => unreachable!("ICE: format string fields should be structured in a tuple, but got a {zeroed_tuple}"), }; ast::Expression::Literal(ast::Literal::FmtStr( - vec![FmtStringFragment::String("\0".repeat(*length as usize))], + vec![FmtStrFragment::String("\0".repeat(*length as usize))], fields_len, Box::new(zeroed_tuple), )) diff --git a/compiler/noirc_frontend/src/parser/parser.rs b/compiler/noirc_frontend/src/parser/parser.rs index 2f62141234f..fcc58c5d833 100644 --- a/compiler/noirc_frontend/src/parser/parser.rs +++ b/compiler/noirc_frontend/src/parser/parser.rs @@ -5,7 +5,7 @@ use noirc_errors::Span; use crate::{ ast::{Ident, ItemVisibility}, lexer::{Lexer, SpannedTokenResult}, - token::{FmtStringFragment, IntType, Keyword, SpannedToken, Token, TokenKind, Tokens}, + token::{FmtStrFragment, IntType, Keyword, SpannedToken, Token, TokenKind, Tokens}, }; use super::{labels::ParsingRuleLabel, ParsedModule, ParserError, ParserErrorReason}; @@ -294,7 +294,7 @@ impl<'a> Parser<'a> { } } - fn eat_fmt_str(&mut self) -> Option<(Vec, u32)> { + fn eat_fmt_str(&mut self) -> Option<(Vec, u32)> { if matches!(self.token.token(), Token::FmtStr(..)) { let token = self.bump(); match token.into_token() { From 59cda07a02483f0b51add53314693af8e03abbb6 Mon Sep 17 00:00:00 2001 From: Ary Borenszweig Date: Thu, 5 Dec 2024 15:04:31 -0300 Subject: [PATCH 10/13] Make it work for `{{{`, `{{{{`, etc. --- compiler/noirc_printable_type/src/lib.rs | 25 ++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/compiler/noirc_printable_type/src/lib.rs b/compiler/noirc_printable_type/src/lib.rs index 10f27fc5f87..0c6fbe3fbac 100644 --- a/compiler/noirc_printable_type/src/lib.rs +++ b/compiler/noirc_printable_type/src/lib.rs @@ -254,7 +254,13 @@ fn to_string(value: &PrintableValue, typ: &PrintableType) -> Op } /// Assumes `re` is a regex that matches interpolation sequences like `{...}`. -/// Replaces all matches by invoking `replacement`, but only if a match isn't preceded by a '{'. +/// Replaces all matches by invoking `replacement`, but only if a match isn't preceded by an odd +/// number of '{'. +/// For example, if we have `{{x}}` then there's one `{` before the interpolation, and we can +/// see it's actually not an interpolation. +/// However, if we have `{{{x}}}` there are two `{` before the interpolation, so the third +/// `{` actually starts an interpolation. +/// With four (`{{{{x}}}}`) it's again even and it's not an interpolation, etc. fn replace_all_interpolations( re: &Regex, haystack: &str, @@ -265,9 +271,20 @@ fn replace_all_interpolations( for caps in re.captures_iter(haystack) { let m = caps.get(0).unwrap(); - // If we have '{' before the match, it's an escape sequence. - let start = m.start(); - if start > 0 && haystack.as_bytes().get(start - 1).copied() == Some(b'{') { + // Count how many '{' we have right before this interpolation. If the number if + // even (or zero) it's an interpolation, otherwise it's not. + let mut curly_braces_count = 0; + let mut index = m.start(); + while index > 0 { + if haystack.as_bytes().get(index - 1).unwrap() == &b'{' { + curly_braces_count += 1; + } else { + break; + } + index -= 1; + } + + if curly_braces_count % 2 == 1 { continue; } From a78076116a973175e999cdcf27b11c4dbdd7d860 Mon Sep 17 00:00:00 2001 From: Ary Borenszweig Date: Fri, 6 Dec 2024 08:24:35 -0300 Subject: [PATCH 11/13] Replace TODO with comment --- compiler/noirc_frontend/src/hir/comptime/interpreter.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/compiler/noirc_frontend/src/hir/comptime/interpreter.rs b/compiler/noirc_frontend/src/hir/comptime/interpreter.rs index ce1fbe4a524..53b6015d2a5 100644 --- a/compiler/noirc_frontend/src/hir/comptime/interpreter.rs +++ b/compiler/noirc_frontend/src/hir/comptime/interpreter.rs @@ -665,7 +665,8 @@ impl<'local, 'interner> Interpreter<'local, 'interner> { result.push_str(&value.display(self.elaborator.interner).to_string()); } } else { - // TODO: should we panic here? + // If we can't find a value for this fragment it means it already errored + // when trying to type-check it, and we don't want to error again or panic here. } } } From df08cdd1c9f49499197bfa0a31efed30c1f40263 Mon Sep 17 00:00:00 2001 From: Ary Borenszweig Date: Fri, 6 Dec 2024 09:57:29 -0300 Subject: [PATCH 12/13] Don't use regex in print oracle --- Cargo.lock | 1 - compiler/noirc_printable_type/Cargo.toml | 1 - compiler/noirc_printable_type/src/lib.rs | 138 ++++++++++++++--------- 3 files changed, 82 insertions(+), 58 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 14066a0e37e..c43e17955d9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3195,7 +3195,6 @@ dependencies = [ "acvm", "iter-extended", "jsonrpc", - "regex", "serde", "serde_json", "thiserror", diff --git a/compiler/noirc_printable_type/Cargo.toml b/compiler/noirc_printable_type/Cargo.toml index 8bb56703e8a..8d0574aad64 100644 --- a/compiler/noirc_printable_type/Cargo.toml +++ b/compiler/noirc_printable_type/Cargo.toml @@ -14,7 +14,6 @@ workspace = true [dependencies] acvm.workspace = true iter-extended.workspace = true -regex = "1.9.1" serde.workspace = true serde_json.workspace = true thiserror.workspace = true diff --git a/compiler/noirc_printable_type/src/lib.rs b/compiler/noirc_printable_type/src/lib.rs index 0c6fbe3fbac..d46b37c4ea2 100644 --- a/compiler/noirc_printable_type/src/lib.rs +++ b/compiler/noirc_printable_type/src/lib.rs @@ -2,7 +2,7 @@ use std::{collections::BTreeMap, str}; use acvm::{acir::AcirField, brillig_vm::brillig::ForeignCallParam}; use iter_extended::vecmap; -use regex::{Captures, Regex}; + use serde::{Deserialize, Serialize}; use thiserror::Error; @@ -253,49 +253,6 @@ fn to_string(value: &PrintableValue, typ: &PrintableType) -> Op Some(output) } -/// Assumes `re` is a regex that matches interpolation sequences like `{...}`. -/// Replaces all matches by invoking `replacement`, but only if a match isn't preceded by an odd -/// number of '{'. -/// For example, if we have `{{x}}` then there's one `{` before the interpolation, and we can -/// see it's actually not an interpolation. -/// However, if we have `{{{x}}}` there are two `{` before the interpolation, so the third -/// `{` actually starts an interpolation. -/// With four (`{{{{x}}}}`) it's again even and it's not an interpolation, etc. -fn replace_all_interpolations( - re: &Regex, - haystack: &str, - mut replacement: impl FnMut(&Captures) -> Result, -) -> Result { - let mut new = String::with_capacity(haystack.len()); - let mut last_match = 0; - for caps in re.captures_iter(haystack) { - let m = caps.get(0).unwrap(); - - // Count how many '{' we have right before this interpolation. If the number if - // even (or zero) it's an interpolation, otherwise it's not. - let mut curly_braces_count = 0; - let mut index = m.start(); - while index > 0 { - if haystack.as_bytes().get(index - 1).unwrap() == &b'{' { - curly_braces_count += 1; - } else { - break; - } - index -= 1; - } - - if curly_braces_count % 2 == 1 { - continue; - } - - new.push_str(&haystack[last_match..m.start()]); - new.push_str(&replacement(&caps)?); - last_match = m.end(); - } - new.push_str(&haystack[last_match..]); - Ok(new) -} - impl std::fmt::Display for PrintableValueDisplay { fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { @@ -304,24 +261,56 @@ impl std::fmt::Display for PrintableValueDisplay { write!(fmt, "{output_string}") } Self::FmtString(template, values) => { - let mut display_iter = values.iter(); - let re = Regex::new(r"\{([a-zA-Z0-9_]+)\}").map_err(|_| std::fmt::Error)?; + let mut values_iter = values.iter(); + write_template_replacing_interpolations(template, fmt, || { + values_iter.next().and_then(|(value, typ)| to_string(value, typ)) + }) + } + } + } +} - // First replace all `{...}` interpolations with the corresponding values, making sure - // to skip ones that are also surrounded by curly braces (like `{{...}}`) - let formatted_str = replace_all_interpolations(&re, template, |_: &Captures| { - let (value, typ) = display_iter.next().ok_or(std::fmt::Error)?; - to_string(value, typ).ok_or(std::fmt::Error) - })?; +fn write_template_replacing_interpolations( + template: &str, + fmt: &mut std::fmt::Formatter<'_>, + mut replacement: impl FnMut() -> Option, +) -> std::fmt::Result { + let mut last_index = 0; // How far we've written from the template + let mut char_indices = template.char_indices().peekable(); + while let Some((char_index, char)) = char_indices.next() { + // Keep going forward until we find a '{' + if char != '{' { + continue; + } - // Now that we replaced all interpolations, we need to unescape the double curly braces - // that were added to avoid interpolations. - let formatted_str = formatted_str.replace("{{", "{").replace("}}", "}"); + // We'll either have to write an interpolation or '{{' if it's an escape, + // so let's write what we've seen so far in the template. + write!(fmt, "{}", &template[last_index..char_index])?; - write!(fmt, "{formatted_str}") + // If it's '{{', write '{' and keep going + if char_indices.peek().map(|(_, char)| char) == Some(&'{') { + write!(fmt, "{{")?; + (last_index, _) = char_indices.next().unwrap(); + continue; + } + + // Write the interpolation + if let Some(string) = replacement() { + write!(fmt, "{}", string)?; + } else { + return Err(std::fmt::Error); + } + + // Whatever was inside '{...}' doesn't matter, so skip until we find '}' + while let Some((_, char)) = char_indices.next() { + if char == '}' { + last_index = char_indices.peek().map(|(index, _)| *index).unwrap_or(template.len()); + break; } } } + + write!(fmt, "{}", &template[last_index..]) } /// This trims any leading zeroes. @@ -421,3 +410,40 @@ pub fn decode_string_value(field_elements: &[F]) -> String { let final_string = str::from_utf8(&string_as_slice).unwrap(); final_string.to_owned() } + +#[cfg(test)] +mod tests { + use acvm::FieldElement; + + use crate::{PrintableType, PrintableValue, PrintableValueDisplay}; + + #[test] + fn printable_value_display_to_string_without_interpolations() { + let template = "hello"; + let display = + PrintableValueDisplay::::FmtString(template.to_string(), vec![]); + assert_eq!(display.to_string(), template); + } + + #[test] + fn printable_value_display_to_string_with_curly_escapes() { + let template = "hello {{world}} {{{{double_escape}}}}"; + let display = + PrintableValueDisplay::::FmtString(template.to_string(), vec![]); + assert_eq!(display.to_string(), template); + } + + #[test] + fn printable_value_display_to_string_with_interpolations() { + let template = "hello {one} {{no}} {two} {{not_again}} {three} world"; + let values = vec![ + (PrintableValue::String("ONE".to_string()), PrintableType::String { length: 3 }), + (PrintableValue::String("TWO".to_string()), PrintableType::String { length: 3 }), + (PrintableValue::String("THREE".to_string()), PrintableType::String { length: 5 }), + ]; + let expected = "hello ONE {{no}} TWO {{not_again}} THREE world"; + let display = + PrintableValueDisplay::::FmtString(template.to_string(), values); + assert_eq!(display.to_string(), expected); + } +} From 8a1521458f9218921951a530a6c2ff87bb0a37f9 Mon Sep 17 00:00:00 2001 From: Ary Borenszweig Date: Fri, 6 Dec 2024 11:28:49 -0300 Subject: [PATCH 13/13] Don't print format string with errors at compile time --- compiler/noirc_frontend/src/hir/comptime/errors.rs | 12 +++++++++++- .../noirc_frontend/src/hir/comptime/interpreter.rs | 8 ++++++-- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/compiler/noirc_frontend/src/hir/comptime/errors.rs b/compiler/noirc_frontend/src/hir/comptime/errors.rs index 446c4dae2d3..3df20b39209 100644 --- a/compiler/noirc_frontend/src/hir/comptime/errors.rs +++ b/compiler/noirc_frontend/src/hir/comptime/errors.rs @@ -240,6 +240,9 @@ pub enum InterpreterError { err: Box, location: Location, }, + CannotInterpretFormatStringWithErrors { + location: Location, + }, // These cases are not errors, they are just used to prevent us from running more code // until the loop can be resumed properly. These cases will never be displayed to users. @@ -315,7 +318,8 @@ impl InterpreterError { | InterpreterError::TypeAnnotationsNeededForMethodCall { location } | InterpreterError::CannotResolveExpression { location, .. } | InterpreterError::CannotSetFunctionBody { location, .. } - | InterpreterError::UnknownArrayLength { location, .. } => *location, + | InterpreterError::UnknownArrayLength { location, .. } + | InterpreterError::CannotInterpretFormatStringWithErrors { location } => *location, InterpreterError::FailedToParseMacro { error, file, .. } => { Location::new(error.span(), *file) @@ -664,6 +668,12 @@ impl<'a> From<&'a InterpreterError> for CustomDiagnostic { let secondary = format!("Evaluating the length failed with: `{err}`"); CustomDiagnostic::simple_error(msg, secondary, location.span) } + InterpreterError::CannotInterpretFormatStringWithErrors { location } => { + let msg = "Cannot interpret format string with errors".to_string(); + let secondary = + "Some of the variables to interpolate could not be evaluated".to_string(); + CustomDiagnostic::simple_error(msg, secondary, location.span) + } } } } diff --git a/compiler/noirc_frontend/src/hir/comptime/interpreter.rs b/compiler/noirc_frontend/src/hir/comptime/interpreter.rs index 53b6015d2a5..dfa55a9d79b 100644 --- a/compiler/noirc_frontend/src/hir/comptime/interpreter.rs +++ b/compiler/noirc_frontend/src/hir/comptime/interpreter.rs @@ -665,8 +665,12 @@ impl<'local, 'interner> Interpreter<'local, 'interner> { result.push_str(&value.display(self.elaborator.interner).to_string()); } } else { - // If we can't find a value for this fragment it means it already errored - // when trying to type-check it, and we don't want to error again or panic here. + // If we can't find a value for this fragment it means the interpolated value was not + // found or it errored. In this case we error here as well. + let location = self.elaborator.interner.expr_location(&id); + return Err(InterpreterError::CannotInterpretFormatStringWithErrors { + location, + }); } } }