diff --git a/crates/oxc_codegen/src/str.rs b/crates/oxc_codegen/src/str.rs index 04fb474a24585..4f3b23e5c0cb7 100644 --- a/crates/oxc_codegen/src/str.rs +++ b/crates/oxc_codegen/src/str.rs @@ -28,6 +28,98 @@ impl Codegen<'_> { pub(crate) fn print_string_literal(&mut self, s: &StringLiteral<'_>, allow_backtick: bool) { self.add_source_mapping(s.span); + if !self.options.minify { + if let Some(raw) = s.raw { + self.print_string_literal_raw(&raw); + return; + } + } + + self.print_string_literal_encode(s, allow_backtick); + } + + /// Print a [`StringLiteral`], from its `raw` representation. + /// + /// Only change made is to change quotes to what's specified in `options.single_quote`. + fn print_string_literal_raw(&mut self, raw: &str) { + let quote = self.quote; + let raw_bytes = raw.as_bytes(); + + // Assertion here should remove the bounds checks from `raw_bytes[0]`, + // `raw_bytes[raw_bytes.len() - 1]` and `&raw_bytes[1..raw_bytes.len() - 1]` below + assert!(raw_bytes.len() >= 2); + + // If `raw` already has desired quotes, print `raw` unchanged + if raw_bytes[0] == quote as u8 { + self.print_str(raw); + return; + } + + // Escape quotes + + // Cut off quotes from start and end of `raw`. + // Check the last char of `raw` is ASCII. This ensures that trimmed `raw_bytes` is a valid + // UTF-8 string, and doesn't end with an unfinished part of a unicode byte sequence. + // This ensures the safety of `print_bytes_unchecked` calls below. + // We have to do this check because we don't have a static guarantee that `raw` starts and ends + // with an ASCII quote. Without this check, a bug in parser could cause undefined behavior here. + assert!(raw_bytes[raw_bytes.len() - 1].is_ascii()); + let raw_bytes = &raw_bytes[1..raw_bytes.len() - 1]; + let mut bytes = raw_bytes.iter(); + let mut chunk_start = bytes.as_slice().as_ptr(); + + quote.print(self); + + while let Some(&byte) = bytes.clone().next() { + if byte == quote as u8 { + // Print up to before quote, print slash, and start next chunk on the quote, + // so quote gets pushed in next chunk. + // Note: `byte` was peeked, not consumed, so `quote_ptr` points to the quote, not after it. + let quote_ptr = bytes.as_slice().as_ptr(); + // SAFETY: `chunk_start` points to either start of string content or an ASCII quote char. + // Either way, that's on a UTF-8 char boundary, and in bounds of `raw_bytes`. + // `quote_ptr` points to an ASCII quote char, so also on a UTF-8 char boundary. + // `quote_ptr >= chunk_start` because `bytes` only gets advanced, + // and `chunk_start` is either start of string or a previous `quote_ptr`. + unsafe { + let chunk_len = quote_ptr.offset_from(chunk_start); + let chunk_len = usize::try_from(chunk_len).unwrap_unchecked(); + let chunk = slice::from_raw_parts(chunk_start, chunk_len); + self.code.print_bytes_unchecked(chunk); + } + self.print_ascii_byte(b'\\'); + chunk_start = quote_ptr; + + // Consume the quote + bytes.next().unwrap(); + } else if byte == b'\\' { + // Consume slash and next byte. + // Next byte might be an escaped quote which don't want to escape again e.g. `\"`. + bytes.next().unwrap(); + bytes.next().unwrap(); + } else { + // Consume the peeked byte + bytes.next().unwrap(); + } + } + + // SAFETY: `chunk_start` points to either start of string content or an ASCII quote character. + // Either way, that's on a UTF-8 char boundary, and in bounds of `raw_bytes`. + // `bytes` is exhausted, so `bytes.as_slice().as_ptr()` points to end of `raw_bytes`. + // `chunk_start` must be before it, or (if string is empty) equal to it. + unsafe { + let end_ptr = bytes.as_slice().as_ptr(); + let chunk_len = end_ptr.offset_from(chunk_start); + let chunk_len = usize::try_from(chunk_len).unwrap_unchecked(); + let chunk = slice::from_raw_parts(chunk_start, chunk_len); + self.code.print_bytes_unchecked(chunk); + } + + quote.print(self); + } + + /// Print a [`StringLiteral`], re-encoding from its `value`. + fn print_string_literal_encode(&mut self, s: &StringLiteral<'_>, allow_backtick: bool) { // If `minify` option enabled, quote will be chosen depending on what produces shortest output. // What is the best quote to use will be determined when first character needing escape is found. // This avoids iterating through the string twice if it contains no quotes (common case). diff --git a/crates/oxc_codegen/tests/integration/esbuild.rs b/crates/oxc_codegen/tests/integration/esbuild.rs index f35605ad1b091..884251f2d2e71 100644 --- a/crates/oxc_codegen/tests/integration/esbuild.rs +++ b/crates/oxc_codegen/tests/integration/esbuild.rs @@ -333,52 +333,89 @@ fn test_nullish() { #[test] fn test_string() { + // No `minify` option + + // Prints double-quoted strings as in original + test("let x = \"\"", "let x = \"\";\n"); + test("let x = \"abc\"", "let x = \"abc\";\n"); + test("let x = \"\t\"", "let x = \"\t\";\n"); + test("let x = \"\\t\"", "let x = \"\\t\";\n"); + + // Converts single quote to double test("let x = ''", "let x = \"\";\n"); - test("let x = '\\b'", "let x = \"\\b\";\n"); - test("let x = '\\f'", "let x = \"\\f\";\n"); - test("let x = '\t'", "let x = \"\t\";\n"); - test("let x = '\\v'", "let x = \"\\v\";\n"); - test("let x = '\\n'", "let x = \"\\n\";\n"); - test("let x = '\\r'", "let x = \"\\r\";\n"); - test("let x = '\\r\\n'", "let x = \"\\r\\n\";\n"); - test("let x = '\\''", "let x = \"'\";\n"); + test("let x = 'abc'", "let x = \"abc\";\n"); test("let x = '\"'", "let x = \"\\\"\";\n"); - test("let x = '`'", "let x = \"`\";\n"); - test("let x = '\\'\"'", "let x = \"'\\\"\";\n"); - test("let x = '\\'`'", "let x = \"'`\";\n"); - test("let x = '\"`'", "let x = \"\\\"`\";\n"); - test("let x = '\\\\'", "let x = \"\\\\\";\n"); - test("let x = '\x00'", "let x = \"\\0\";\n"); - test("let x = '\x00!'", "let x = \"\\0!\";\n"); - test("let x = '\x001'", "let x = \"\\x001\";\n"); - test("let x = '\\0'", "let x = \"\\0\";\n"); - test("let x = '\\0!'", "let x = \"\\0!\";\n"); - test("let x = '\x07'", "let x = \"\\x07\";\n"); - test("let x = '\x07!'", "let x = \"\\x07!\";\n"); - test("let x = '\x071'", "let x = \"\\x071\";\n"); - test("let x = '\\7'", "let x = \"\\x07\";\n"); - test("let x = '\\7!'", "let x = \"\\x07!\";\n"); - test("let x = '\\01'", "let x = \"\x01\";\n"); - test("let x = '\x10'", "let x = \"\x10\";\n"); - test("let x = '\\x10'", "let x = \"\x10\";\n"); - test("let x = '\x1B'", "let x = \"\\x1B\";\n"); - test("let x = '\\x1B'", "let x = \"\\x1B\";\n"); - test("let x = '\\x41'", "let x = \"A\";\n"); - test("let x = '\u{ABCD}'", "let x = \"\u{ABCD}\";\n"); - test("let x = '\\uABCD'", "let x = \"\u{ABCD}\";\n"); - test("let x = '\\U000123AB'", "let x = \"U000123AB\";\n"); - test("let x = '\\u{123AB}'", "let x = \"\u{123ab}\";\n"); - test("let x = '\\u{41}'", "let x = \"A\";\n"); - test("let x = '\\uD808\\uDFAB'", "let x = \"\u{123ab}\";\n"); - test("let x = '\\uD808'", "let x = \"\\ud808\";\n"); // lone surrogate - test("let x = '\\uD808X'", "let x = \"\\ud808X\";\n"); - test("let x = '\\uDFAB'", "let x = \"\\udfab\";\n"); - test("let x = '\\uDFABX'", "let x = \"\\udfabX\";\n"); - - test("let x = '\\x80'", "let x = \"\u{80}\";\n"); - test("let x = '\\xFF'", "let x = \"ÿ\";\n"); - test("let x = '\\xF0\\x9F\\x8D\\x95'", "let x = \"ð\u{9f}\u{8d}\u{95}\";\n"); - test("let x = '\\uD801\\uDC02\\uDC03\\uD804'", "let x = \"𐐂\\udc03\\ud804\";\n"); // surrogates + test("let x = 'abc\"'", "let x = \"abc\\\"\";\n"); + test("let x = 'abc\"\"\"'", "let x = \"abc\\\"\\\"\\\"\";\n"); + test("let x = '\"def'", "let x = \"\\\"def\";\n"); + test("let x = '\"\"\"def'", "let x = \"\\\"\\\"\\\"def\";\n"); + test("let x = 'abc\"def'", "let x = \"abc\\\"def\";\n"); + test("let x = 'abc\"\"\"def\"\"\"ghi'", "let x = \"abc\\\"\\\"\\\"def\\\"\\\"\\\"ghi\";\n"); + // Does not double-escape already-escaped quotes + test("let x = '\\\"'", "let x = \"\\\"\";\n"); + test("let x = 'abc\\\"\\\"'", "let x = \"abc\\\"\\\"\";\n"); + test("let x = '\\\"\\\"def'", "let x = \"\\\"\\\"def\";\n"); + test("let x = 'abc\\\"\\\"def'", "let x = \"abc\\\"\\\"def\";\n"); + test("let x = '\\r\\n\"'", "let x = \"\\r\\n\\\"\";\n"); + test("let x = '\\\\\"'", "let x = \"\\\\\\\"\";\n"); + test("let x = '\\\\\\\"'", "let x = \"\\\\\\\"\";\n"); + // Does not escape other characters + test("let x = '\t'", "let x = \"\t\";\n"); + // Prints other escapes as in original + test("let x = '\\t'", "let x = \"\\t\";\n"); + test("let x = '\\x41'", "let x = \"\\x41\";\n"); + test("let x = '\\u{41}'", "let x = \"\\u{41}\";\n"); + test("let x = '\\uD800'", "let x = \"\\uD800\";\n"); + test("let x = '\\uD801\\uDC02'", "let x = \"\\uD801\\uDC02\";\n"); + + // `minify` option + + // Escapes characters and chooses best quote character + test_minify("let x = ''", "let x=``;"); + test_minify("let x = '\\b'", "let x=`\\b`;"); + test_minify("let x = '\\f'", "let x=`\\f`;"); + test_minify("let x = '\t'", "let x=`\t`;"); + test_minify("let x = '\\v'", "let x=`\\v`;"); + test_minify("let x = '\\n'", "let x=`\n`;"); + test_minify("let x = '\\r'", "let x=`\\r`;"); + test_minify("let x = '\\r\\n'", "let x=`\\r\n`;"); + test_minify("let x = '\\''", "let x=`'`;"); + test_minify("let x = '\"'", "let x=`\"`;"); + test_minify("let x = '`'", "let x=\"`\";"); + test_minify("let x = '\\'\"'", "let x=`'\"`;"); + test_minify("let x = '\\'`'", "let x=\"'`\";"); + test_minify("let x = '\"`'", "let x='\"`';"); + test_minify("let x = '\\\\'", "let x=`\\\\`;"); + test_minify("let x = '\x00'", "let x=`\\0`;"); + test_minify("let x = '\x00!'", "let x=`\\0!`;"); + test_minify("let x = '\x001'", "let x=`\\x001`;"); + test_minify("let x = '\\0'", "let x=`\\0`;"); + test_minify("let x = '\\0!'", "let x=`\\0!`;"); + test_minify("let x = '\x07'", "let x=`\\x07`;"); + test_minify("let x = '\x07!'", "let x=`\\x07!`;"); + test_minify("let x = '\x071'", "let x=`\\x071`;"); + test_minify("let x = '\\7'", "let x=`\\x07`;"); + test_minify("let x = '\\7!'", "let x=`\\x07!`;"); + test_minify("let x = '\\01'", "let x=`\x01`;"); + test_minify("let x = '\x10'", "let x=`\x10`;"); + test_minify("let x = '\\x10'", "let x=`\x10`;"); + test_minify("let x = '\x1B'", "let x=`\\x1B`;"); + test_minify("let x = '\\x1B'", "let x=`\\x1B`;"); + test_minify("let x = '\\x41'", "let x=`A`;"); + test_minify("let x = '\u{ABCD}'", "let x=`\u{ABCD}`;"); + test_minify("let x = '\\uABCD'", "let x=`\u{ABCD}`;"); + test_minify("let x = '\\U000123AB'", "let x=`U000123AB`;"); + test_minify("let x = '\\u{123AB}'", "let x=`\u{123ab}`;"); + test_minify("let x = '\\u{41}'", "let x=`A`;"); + test_minify("let x = '\\uD808\\uDFAB'", "let x=`\u{123ab}`;"); + test_minify("let x = '\\uD808'", "let x=`\\ud808`;"); // lone surrogate + test_minify("let x = '\\uD808X'", "let x=`\\ud808X`;"); + test_minify("let x = '\\uDFAB'", "let x=`\\udfab`;"); + test_minify("let x = '\\uDFABX'", "let x=`\\udfabX`;"); + test_minify("let x = '\\x80'", "let x=`\u{80}`;"); + test_minify("let x = '\\xFF'", "let x=`ÿ`;"); + test_minify("let x = '\\xF0\\x9F\\x8D\\x95'", "let x=`ð\u{9f}\u{8d}\u{95}`;"); + test_minify("let x = '\\uD801\\uDC02\\uDC03\\uD804'", "let x=`𐐂\\udc03\\ud804`;"); // surrogates } #[test] diff --git a/crates/oxc_codegen/tests/integration/unit.rs b/crates/oxc_codegen/tests/integration/unit.rs index 3dc78c249b4ac..860feab548f2f 100644 --- a/crates/oxc_codegen/tests/integration/unit.rs +++ b/crates/oxc_codegen/tests/integration/unit.rs @@ -150,7 +150,13 @@ fn unicode_escape() { test("console.log('こんにちは');", "console.log(\"こんにちは\");\n"); test("console.log('안녕하세요');", "console.log(\"안녕하세요\");\n"); test("console.log('🧑‍🤝‍🧑');", "console.log(\"🧑‍🤝‍🧑\");\n"); - test("console.log(\"\\uD800\\uD801\")", "console.log(\"\\ud800\\ud801\");\n"); + test("console.log(\"\\uD800\\uD801\")", "console.log(\"\\uD800\\uD801\");\n"); + + test_minify("console.log('你好');", "console.log(`你好`);"); + test_minify("console.log('こんにちは');", "console.log(`こんにちは`);"); + test_minify("console.log('안녕하세요');", "console.log(`안녕하세요`);"); + test_minify("console.log('🧑‍🤝‍🧑');", "console.log(`🧑‍🤝‍🧑`);"); + test_minify("console.log(\"\\uD800\\uD801\")", "console.log(`\\ud800\\ud801`);"); } #[test] @@ -511,53 +517,59 @@ fn getter_setter() { #[test] fn string() { + // Uses quotes as requested in options + let single_quote = CodegenOptions { single_quote: true, ..CodegenOptions::default() }; + test_options("let x = \"'\";", "let x = '\\'';\n", single_quote); + let double_quote = CodegenOptions { single_quote: false, ..CodegenOptions::default() }; + test_options("let x = '\\\"';", "let x = \"\\\"\";\n", double_quote); + // `${` only escaped when quote is backtick - test("let x = \"${}\";", "let x = \"${}\";\n"); - test_minify("let x = \"${}\";", "let x=\"${}\";"); + test("let x = '${}';", "let x = \"${}\";\n"); + test_minify("let x = '${}';", "let x=\"${}\";"); test("let x = '\"\"${}';", "let x = \"\\\"\\\"${}\";\n"); test_minify("let x = '\"\"${}';", "let x='\"\"${}';"); - test("let x = '\"\"\\'\\'${}';", "let x = \"\\\"\\\"''${}\";\n"); + test("let x = '\"\"\\'\\'${}';", "let x = \"\\\"\\\"\\'\\'${}\";\n"); test_minify("let x = '\"\"\\'\\'${}';", "let x=`\"\"''\\${}`;"); test_minify("let x = '\\'\\'\\'\"\"\"${}';", "let x=`'''\"\"\"\\${}`;"); // Lossy replacement character - test("let x = \"�\\u{FFFD}\";", "let x = \"��\";\n"); + test("let x = '�\\u{FFFD}';", "let x = \"�\\u{FFFD}\";\n"); test_minify("let x = \"�\\u{FFFD}\";", "let x=`��`;"); test( - "let x = \"� ��� \\u{FFFD} \\u{FFFD}\\u{FFFD}\\u{FFFD} �\";", - "let x = \"� ��� � ��� �\";\n", + "let x = '� ��� \\u{FFFD} \\u{FFFD}\\u{FFFD}\\u{FFFD} �';", + "let x = \"� ��� \\u{FFFD} \\u{FFFD}\\u{FFFD}\\u{FFFD} �\";\n", ); test_minify( - "let x = \"� ��� \\u{FFFD} \\u{FFFD}\\u{FFFD}\\u{FFFD} �\";", + "let x = '� ��� \\u{FFFD} \\u{FFFD}\\u{FFFD}\\u{FFFD} �';", "let x=`� ��� � ��� �`;", ); // Lone surrogates test( - "let x = \"\\uD800 \\uDBFF \\uDC00 \\uDFFF\";", - "let x = \"\\ud800 \\udbff \\udc00 \\udfff\";\n", + "let x = '\\uD800 \\uDBFF \\uDC00 \\uDFFF';", + "let x = \"\\uD800 \\uDBFF \\uDC00 \\uDFFF\";\n", ); test_minify( - "let x = \"\\uD800 \\uDBFF \\uDC00 \\uDFFF\";", + "let x = '\\uD800 \\uDBFF \\uDC00 \\uDFFF';", "let x=`\\ud800 \\udbff \\udc00 \\udfff`;", ); - test("let x = \"\\uD800\u{41}\";", "let x = \"\\ud800A\";\n"); - test_minify("let x = \"\\uD800\u{41}\";", "let x=`\\ud800A`;"); + test("let x = '\\uD800\\u{41}';", "let x = \"\\uD800\\u{41}\";\n"); + test_minify("let x = '\\uD800\\u{41}';", "let x=`\\ud800A`;"); // Invalid pairs test( - "let x = \"\\uD800\\uDBFF \\uDC00\\uDFFF\";", - "let x = \"\\ud800\\udbff \\udc00\\udfff\";\n", + "let x = '\\uD800\\uDBFF \\uDC00\\uDFFF';", + "let x = \"\\uD800\\uDBFF \\uDC00\\uDFFF\";\n", ); test_minify( - "let x = \"\\uD800\\uDBFF \\uDC00\\uDFFF\";", + "let x = '\\uD800\\uDBFF \\uDC00\\uDFFF';", "let x=`\\ud800\\udbff \\udc00\\udfff`;", ); // Lone surrogates and lossy replacement characters test( - "let x = \"��\\u{FFFD}\\u{FFFD}\\uD800\\uDBFF��\\u{FFFD}\\u{FFFD}\\uDC00\\uDFFF��\\u{FFFD}\\u{FFFD}\";", - "let x = \"����\\ud800\\udbff����\\udc00\\udfff����\";\n", + "let x = '��\\u{FFFD}\\u{FFFD}\\uD800\\uDBFF��\\u{FFFD}\\u{FFFD}\\uDC00\\uDFFF��\\u{FFFD}\\u{FFFD}';", + "let x = \"��\\u{FFFD}\\u{FFFD}\\uD800\\uDBFF��\\u{FFFD}\\u{FFFD}\\uDC00\\uDFFF��\\u{FFFD}\\u{FFFD}\";\n", ); test_minify( - "let x = \"��\\u{FFFD}\\u{FFFD}\\uD800\\uDBFF��\\u{FFFD}\\u{FFFD}\\uDC00\\uDFFF��\\u{FFFD}\\u{FFFD}\";", + "let x = '��\\u{FFFD}\\u{FFFD}\\uD800\\uDBFF��\\u{FFFD}\\u{FFFD}\\uDC00\\uDFFF��\\u{FFFD}\\u{FFFD}';", "let x=`����\\ud800\\udbff����\\udc00\\udfff����`;", );