Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 92 additions & 0 deletions crates/oxc_codegen/src/str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,98 @@ impl Codegen<'_> {
pub(crate) fn print_string_literal(&mut self, s: &StringLiteral<'_>, allow_backtick: bool) {
self.add_source_mapping(s.span);

if !self.options.minify {
if let Some(raw) = s.raw {
self.print_string_literal_raw(&raw);
return;
}
}

self.print_string_literal_encode(s, allow_backtick);
}

/// Print a [`StringLiteral`], from its `raw` representation.
///
/// Only change made is to change quotes to what's specified in `options.single_quote`.
fn print_string_literal_raw(&mut self, raw: &str) {
let quote = self.quote;
let raw_bytes = raw.as_bytes();

// Assertion here should remove the bounds checks from `raw_bytes[0]`,
// `raw_bytes[raw_bytes.len() - 1]` and `&raw_bytes[1..raw_bytes.len() - 1]` below
assert!(raw_bytes.len() >= 2);

// If `raw` already has desired quotes, print `raw` unchanged
if raw_bytes[0] == quote as u8 {
self.print_str(raw);
return;
}

// Escape quotes

// Cut off quotes from start and end of `raw`.
// Check the last char of `raw` is ASCII. This ensures that trimmed `raw_bytes` is a valid
// UTF-8 string, and doesn't end with an unfinished part of a unicode byte sequence.
// This ensures the safety of `print_bytes_unchecked` calls below.
// We have to do this check because we don't have a static guarantee that `raw` starts and ends
// with an ASCII quote. Without this check, a bug in parser could cause undefined behavior here.
assert!(raw_bytes[raw_bytes.len() - 1].is_ascii());
let raw_bytes = &raw_bytes[1..raw_bytes.len() - 1];
let mut bytes = raw_bytes.iter();
let mut chunk_start = bytes.as_slice().as_ptr();

quote.print(self);

while let Some(&byte) = bytes.clone().next() {
if byte == quote as u8 {
// Print up to before quote, print slash, and start next chunk on the quote,
// so quote gets pushed in next chunk.
// Note: `byte` was peeked, not consumed, so `quote_ptr` points to the quote, not after it.
let quote_ptr = bytes.as_slice().as_ptr();
// SAFETY: `chunk_start` points to either start of string content or an ASCII quote char.
// Either way, that's on a UTF-8 char boundary, and in bounds of `raw_bytes`.
// `quote_ptr` points to an ASCII quote char, so also on a UTF-8 char boundary.
// `quote_ptr >= chunk_start` because `bytes` only gets advanced,
// and `chunk_start` is either start of string or a previous `quote_ptr`.
unsafe {
let chunk_len = quote_ptr.offset_from(chunk_start);
let chunk_len = usize::try_from(chunk_len).unwrap_unchecked();
let chunk = slice::from_raw_parts(chunk_start, chunk_len);
self.code.print_bytes_unchecked(chunk);
}
self.print_ascii_byte(b'\\');
chunk_start = quote_ptr;

// Consume the quote
bytes.next().unwrap();
} else if byte == b'\\' {
// Consume slash and next byte.
// Next byte might be an escaped quote which don't want to escape again e.g. `\"`.
bytes.next().unwrap();
bytes.next().unwrap();
} else {
// Consume the peeked byte
bytes.next().unwrap();
}
}

// SAFETY: `chunk_start` points to either start of string content or an ASCII quote character.
// Either way, that's on a UTF-8 char boundary, and in bounds of `raw_bytes`.
// `bytes` is exhausted, so `bytes.as_slice().as_ptr()` points to end of `raw_bytes`.
// `chunk_start` must be before it, or (if string is empty) equal to it.
unsafe {
let end_ptr = bytes.as_slice().as_ptr();
let chunk_len = end_ptr.offset_from(chunk_start);
let chunk_len = usize::try_from(chunk_len).unwrap_unchecked();
let chunk = slice::from_raw_parts(chunk_start, chunk_len);
self.code.print_bytes_unchecked(chunk);
}

quote.print(self);
}

/// Print a [`StringLiteral`], re-encoding from its `value`.
fn print_string_literal_encode(&mut self, s: &StringLiteral<'_>, allow_backtick: bool) {
// If `minify` option enabled, quote will be chosen depending on what produces shortest output.
// What is the best quote to use will be determined when first character needing escape is found.
// This avoids iterating through the string twice if it contains no quotes (common case).
Expand Down
125 changes: 81 additions & 44 deletions crates/oxc_codegen/tests/integration/esbuild.rs
Original file line number Diff line number Diff line change
Expand Up @@ -333,52 +333,89 @@ fn test_nullish() {

#[test]
fn test_string() {
// No `minify` option

// Prints double-quoted strings as in original
test("let x = \"\"", "let x = \"\";\n");
test("let x = \"abc\"", "let x = \"abc\";\n");
test("let x = \"\t\"", "let x = \"\t\";\n");
test("let x = \"\\t\"", "let x = \"\\t\";\n");

// Converts single quote to double
test("let x = ''", "let x = \"\";\n");
test("let x = '\\b'", "let x = \"\\b\";\n");
test("let x = '\\f'", "let x = \"\\f\";\n");
test("let x = '\t'", "let x = \"\t\";\n");
test("let x = '\\v'", "let x = \"\\v\";\n");
test("let x = '\\n'", "let x = \"\\n\";\n");
test("let x = '\\r'", "let x = \"\\r\";\n");
test("let x = '\\r\\n'", "let x = \"\\r\\n\";\n");
test("let x = '\\''", "let x = \"'\";\n");
test("let x = 'abc'", "let x = \"abc\";\n");
test("let x = '\"'", "let x = \"\\\"\";\n");
test("let x = '`'", "let x = \"`\";\n");
test("let x = '\\'\"'", "let x = \"'\\\"\";\n");
test("let x = '\\'`'", "let x = \"'`\";\n");
test("let x = '\"`'", "let x = \"\\\"`\";\n");
test("let x = '\\\\'", "let x = \"\\\\\";\n");
test("let x = '\x00'", "let x = \"\\0\";\n");
test("let x = '\x00!'", "let x = \"\\0!\";\n");
test("let x = '\x001'", "let x = \"\\x001\";\n");
test("let x = '\\0'", "let x = \"\\0\";\n");
test("let x = '\\0!'", "let x = \"\\0!\";\n");
test("let x = '\x07'", "let x = \"\\x07\";\n");
test("let x = '\x07!'", "let x = \"\\x07!\";\n");
test("let x = '\x071'", "let x = \"\\x071\";\n");
test("let x = '\\7'", "let x = \"\\x07\";\n");
test("let x = '\\7!'", "let x = \"\\x07!\";\n");
test("let x = '\\01'", "let x = \"\x01\";\n");
test("let x = '\x10'", "let x = \"\x10\";\n");
test("let x = '\\x10'", "let x = \"\x10\";\n");
test("let x = '\x1B'", "let x = \"\\x1B\";\n");
test("let x = '\\x1B'", "let x = \"\\x1B\";\n");
test("let x = '\\x41'", "let x = \"A\";\n");
test("let x = '\u{ABCD}'", "let x = \"\u{ABCD}\";\n");
test("let x = '\\uABCD'", "let x = \"\u{ABCD}\";\n");
test("let x = '\\U000123AB'", "let x = \"U000123AB\";\n");
test("let x = '\\u{123AB}'", "let x = \"\u{123ab}\";\n");
test("let x = '\\u{41}'", "let x = \"A\";\n");
test("let x = '\\uD808\\uDFAB'", "let x = \"\u{123ab}\";\n");
test("let x = '\\uD808'", "let x = \"\\ud808\";\n"); // lone surrogate
test("let x = '\\uD808X'", "let x = \"\\ud808X\";\n");
test("let x = '\\uDFAB'", "let x = \"\\udfab\";\n");
test("let x = '\\uDFABX'", "let x = \"\\udfabX\";\n");

test("let x = '\\x80'", "let x = \"\u{80}\";\n");
test("let x = '\\xFF'", "let x = \"ÿ\";\n");
test("let x = '\\xF0\\x9F\\x8D\\x95'", "let x = \"ð\u{9f}\u{8d}\u{95}\";\n");
test("let x = '\\uD801\\uDC02\\uDC03\\uD804'", "let x = \"𐐂\\udc03\\ud804\";\n"); // surrogates
test("let x = 'abc\"'", "let x = \"abc\\\"\";\n");
test("let x = 'abc\"\"\"'", "let x = \"abc\\\"\\\"\\\"\";\n");
test("let x = '\"def'", "let x = \"\\\"def\";\n");
test("let x = '\"\"\"def'", "let x = \"\\\"\\\"\\\"def\";\n");
test("let x = 'abc\"def'", "let x = \"abc\\\"def\";\n");
test("let x = 'abc\"\"\"def\"\"\"ghi'", "let x = \"abc\\\"\\\"\\\"def\\\"\\\"\\\"ghi\";\n");
// Does not double-escape already-escaped quotes
test("let x = '\\\"'", "let x = \"\\\"\";\n");
test("let x = 'abc\\\"\\\"'", "let x = \"abc\\\"\\\"\";\n");
test("let x = '\\\"\\\"def'", "let x = \"\\\"\\\"def\";\n");
test("let x = 'abc\\\"\\\"def'", "let x = \"abc\\\"\\\"def\";\n");
test("let x = '\\r\\n\"'", "let x = \"\\r\\n\\\"\";\n");
test("let x = '\\\\\"'", "let x = \"\\\\\\\"\";\n");
test("let x = '\\\\\\\"'", "let x = \"\\\\\\\"\";\n");
// Does not escape other characters
test("let x = '\t'", "let x = \"\t\";\n");
// Prints other escapes as in original
test("let x = '\\t'", "let x = \"\\t\";\n");
test("let x = '\\x41'", "let x = \"\\x41\";\n");
test("let x = '\\u{41}'", "let x = \"\\u{41}\";\n");
test("let x = '\\uD800'", "let x = \"\\uD800\";\n");
test("let x = '\\uD801\\uDC02'", "let x = \"\\uD801\\uDC02\";\n");

// `minify` option

// Escapes characters and chooses best quote character
test_minify("let x = ''", "let x=``;");
test_minify("let x = '\\b'", "let x=`\\b`;");
test_minify("let x = '\\f'", "let x=`\\f`;");
test_minify("let x = '\t'", "let x=`\t`;");
test_minify("let x = '\\v'", "let x=`\\v`;");
test_minify("let x = '\\n'", "let x=`\n`;");
test_minify("let x = '\\r'", "let x=`\\r`;");
test_minify("let x = '\\r\\n'", "let x=`\\r\n`;");
test_minify("let x = '\\''", "let x=`'`;");
test_minify("let x = '\"'", "let x=`\"`;");
test_minify("let x = '`'", "let x=\"`\";");
test_minify("let x = '\\'\"'", "let x=`'\"`;");
test_minify("let x = '\\'`'", "let x=\"'`\";");
test_minify("let x = '\"`'", "let x='\"`';");
test_minify("let x = '\\\\'", "let x=`\\\\`;");
test_minify("let x = '\x00'", "let x=`\\0`;");
test_minify("let x = '\x00!'", "let x=`\\0!`;");
test_minify("let x = '\x001'", "let x=`\\x001`;");
test_minify("let x = '\\0'", "let x=`\\0`;");
test_minify("let x = '\\0!'", "let x=`\\0!`;");
test_minify("let x = '\x07'", "let x=`\\x07`;");
test_minify("let x = '\x07!'", "let x=`\\x07!`;");
test_minify("let x = '\x071'", "let x=`\\x071`;");
test_minify("let x = '\\7'", "let x=`\\x07`;");
test_minify("let x = '\\7!'", "let x=`\\x07!`;");
test_minify("let x = '\\01'", "let x=`\x01`;");
test_minify("let x = '\x10'", "let x=`\x10`;");
test_minify("let x = '\\x10'", "let x=`\x10`;");
test_minify("let x = '\x1B'", "let x=`\\x1B`;");
test_minify("let x = '\\x1B'", "let x=`\\x1B`;");
test_minify("let x = '\\x41'", "let x=`A`;");
test_minify("let x = '\u{ABCD}'", "let x=`\u{ABCD}`;");
test_minify("let x = '\\uABCD'", "let x=`\u{ABCD}`;");
test_minify("let x = '\\U000123AB'", "let x=`U000123AB`;");
test_minify("let x = '\\u{123AB}'", "let x=`\u{123ab}`;");
test_minify("let x = '\\u{41}'", "let x=`A`;");
test_minify("let x = '\\uD808\\uDFAB'", "let x=`\u{123ab}`;");
test_minify("let x = '\\uD808'", "let x=`\\ud808`;"); // lone surrogate
test_minify("let x = '\\uD808X'", "let x=`\\ud808X`;");
test_minify("let x = '\\uDFAB'", "let x=`\\udfab`;");
test_minify("let x = '\\uDFABX'", "let x=`\\udfabX`;");
test_minify("let x = '\\x80'", "let x=`\u{80}`;");
test_minify("let x = '\\xFF'", "let x=`ÿ`;");
test_minify("let x = '\\xF0\\x9F\\x8D\\x95'", "let x=`ð\u{9f}\u{8d}\u{95}`;");
test_minify("let x = '\\uD801\\uDC02\\uDC03\\uD804'", "let x=`𐐂\\udc03\\ud804`;"); // surrogates
}

#[test]
Expand Down
50 changes: 31 additions & 19 deletions crates/oxc_codegen/tests/integration/unit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,13 @@ fn unicode_escape() {
test("console.log('こんにちは');", "console.log(\"こんにちは\");\n");
test("console.log('안녕하세요');", "console.log(\"안녕하세요\");\n");
test("console.log('🧑‍🤝‍🧑');", "console.log(\"🧑‍🤝‍🧑\");\n");
test("console.log(\"\\uD800\\uD801\")", "console.log(\"\\ud800\\ud801\");\n");
test("console.log(\"\\uD800\\uD801\")", "console.log(\"\\uD800\\uD801\");\n");

test_minify("console.log('你好');", "console.log(`你好`);");
test_minify("console.log('こんにちは');", "console.log(`こんにちは`);");
test_minify("console.log('안녕하세요');", "console.log(`안녕하세요`);");
test_minify("console.log('🧑‍🤝‍🧑');", "console.log(`🧑‍🤝‍🧑`);");
test_minify("console.log(\"\\uD800\\uD801\")", "console.log(`\\ud800\\ud801`);");
}

#[test]
Expand Down Expand Up @@ -511,53 +517,59 @@ fn getter_setter() {

#[test]
fn string() {
// Uses quotes as requested in options
let single_quote = CodegenOptions { single_quote: true, ..CodegenOptions::default() };
test_options("let x = \"'\";", "let x = '\\'';\n", single_quote);
let double_quote = CodegenOptions { single_quote: false, ..CodegenOptions::default() };
test_options("let x = '\\\"';", "let x = \"\\\"\";\n", double_quote);

// `${` only escaped when quote is backtick
test("let x = \"${}\";", "let x = \"${}\";\n");
test_minify("let x = \"${}\";", "let x=\"${}\";");
test("let x = '${}';", "let x = \"${}\";\n");
test_minify("let x = '${}';", "let x=\"${}\";");
test("let x = '\"\"${}';", "let x = \"\\\"\\\"${}\";\n");
test_minify("let x = '\"\"${}';", "let x='\"\"${}';");
test("let x = '\"\"\\'\\'${}';", "let x = \"\\\"\\\"''${}\";\n");
test("let x = '\"\"\\'\\'${}';", "let x = \"\\\"\\\"\\'\\'${}\";\n");
test_minify("let x = '\"\"\\'\\'${}';", "let x=`\"\"''\\${}`;");
test_minify("let x = '\\'\\'\\'\"\"\"${}';", "let x=`'''\"\"\"\\${}`;");

// Lossy replacement character
test("let x = \"�\\u{FFFD}\";", "let x = \"�\";\n");
test("let x = '�\\u{FFFD}';", "let x = \"�\\u{FFFD}\";\n");
test_minify("let x = \"�\\u{FFFD}\";", "let x=`��`;");
test(
"let x = \"� ��� \\u{FFFD} \\u{FFFD}\\u{FFFD}\\u{FFFD} �\";",
"let x = \"� ��� � ��� �\";\n",
"let x = '� ��� \\u{FFFD} \\u{FFFD}\\u{FFFD}\\u{FFFD} �';",
"let x = \"� ��� \\u{FFFD} \\u{FFFD}\\u{FFFD}\\u{FFFD} �\";\n",
);
test_minify(
"let x = \"� ��� \\u{FFFD} \\u{FFFD}\\u{FFFD}\\u{FFFD} �\";",
"let x = '� ��� \\u{FFFD} \\u{FFFD}\\u{FFFD}\\u{FFFD} �';",
"let x=`� ��� � ��� �`;",
);
// Lone surrogates
test(
"let x = \"\\uD800 \\uDBFF \\uDC00 \\uDFFF\";",
"let x = \"\\ud800 \\udbff \\udc00 \\udfff\";\n",
"let x = '\\uD800 \\uDBFF \\uDC00 \\uDFFF';",
"let x = \"\\uD800 \\uDBFF \\uDC00 \\uDFFF\";\n",
);
test_minify(
"let x = \"\\uD800 \\uDBFF \\uDC00 \\uDFFF\";",
"let x = '\\uD800 \\uDBFF \\uDC00 \\uDFFF';",
"let x=`\\ud800 \\udbff \\udc00 \\udfff`;",
);
test("let x = \"\\uD800\u{41}\";", "let x = \"\\ud800A\";\n");
test_minify("let x = \"\\uD800\u{41}\";", "let x=`\\ud800A`;");
test("let x = '\\uD800\\u{41}';", "let x = \"\\uD800\\u{41}\";\n");
test_minify("let x = '\\uD800\\u{41}';", "let x=`\\ud800A`;");
// Invalid pairs
test(
"let x = \"\\uD800\\uDBFF \\uDC00\\uDFFF\";",
"let x = \"\\ud800\\udbff \\udc00\\udfff\";\n",
"let x = '\\uD800\\uDBFF \\uDC00\\uDFFF';",
"let x = \"\\uD800\\uDBFF \\uDC00\\uDFFF\";\n",
);
test_minify(
"let x = \"\\uD800\\uDBFF \\uDC00\\uDFFF\";",
"let x = '\\uD800\\uDBFF \\uDC00\\uDFFF';",
"let x=`\\ud800\\udbff \\udc00\\udfff`;",
);
// Lone surrogates and lossy replacement characters
test(
"let x = \"��\\u{FFFD}\\u{FFFD}\\uD800\\uDBFF��\\u{FFFD}\\u{FFFD}\\uDC00\\uDFFF��\\u{FFFD}\\u{FFFD}\";",
"let x = \"����\\ud800\\udbff����\\udc00\\udfff����\";\n",
"let x = '��\\u{FFFD}\\u{FFFD}\\uD800\\uDBFF��\\u{FFFD}\\u{FFFD}\\uDC00\\uDFFF��\\u{FFFD}\\u{FFFD}';",
"let x = \"��\\u{FFFD}\\u{FFFD}\\uD800\\uDBFF��\\u{FFFD}\\u{FFFD}\\uDC00\\uDFFF��\\u{FFFD}\\u{FFFD}\";\n",
);
test_minify(
"let x = \"��\\u{FFFD}\\u{FFFD}\\uD800\\uDBFF��\\u{FFFD}\\u{FFFD}\\uDC00\\uDFFF��\\u{FFFD}\\u{FFFD}\";",
"let x = '��\\u{FFFD}\\u{FFFD}\\uD800\\uDBFF��\\u{FFFD}\\u{FFFD}\\uDC00\\uDFFF��\\u{FFFD}\\u{FFFD}';",
"let x=`����\\ud800\\udbff����\\udc00\\udfff����`;",
);

Expand Down
Loading