Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 59 additions & 35 deletions crates/oxide/src/extractor/pre_processors/ruby.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,44 +34,46 @@ impl PreProcessor for Ruby {

// Extract embedded template languages
// https://viewcomponent.org/guide/templates.html#interpolations
let content_as_str = std::str::from_utf8(content).unwrap();

let starts = TEMPLATE_START_REGEX
.captures_iter(content_as_str)
.collect::<Vec<_>>();
let ends = TEMPLATE_END_REGEX
.captures_iter(content_as_str)
.collect::<Vec<_>>();

for start in starts.iter() {
// The language for this block
let lang = start.get(1).unwrap().as_str();

// The HEREDOC delimiter
let delimiter_start = start.get(2).unwrap().as_str();

// Where the "body" starts for the HEREDOC block
let body_start = start.get(0).unwrap().end();

// Look through all of the ends to find a matching language
for end in ends.iter() {
// 1. This must appear after the start
let body_end = end.get(0).unwrap().start();
if body_end < body_start {
continue;
}
// Only process if content is valid UTF-8, otherwise skip HEREDOC extraction
// but still perform the byte-level Ruby processing below
if let Ok(content_as_str) = std::str::from_utf8(content) {
let starts = TEMPLATE_START_REGEX
.captures_iter(content_as_str)
.collect::<Vec<_>>();
let ends = TEMPLATE_END_REGEX
.captures_iter(content_as_str)
.collect::<Vec<_>>();

for start in starts.iter() {
// The language for this block
let lang = start.get(1).unwrap().as_str();

// The HEREDOC delimiter
let delimiter_start = start.get(2).unwrap().as_str();

// Where the "body" starts for the HEREDOC block
let body_start = start.get(0).unwrap().end();

// Look through all of the ends to find a matching language
for end in ends.iter() {
// 1. This must appear after the start
let body_end = end.get(0).unwrap().start();
if body_end < body_start {
continue;
}

// The languages must match otherwise we haven't found the end
let delimiter_end = end.get(1).unwrap().as_str();
if delimiter_end != delimiter_start {
continue;
}
// The languages must match otherwise we haven't found the end
let delimiter_end = end.get(1).unwrap().as_str();
if delimiter_end != delimiter_start {
continue;
}

let body = &content_as_str[body_start..body_end];
let replaced = pre_process_input(body.as_bytes(), &lang.to_ascii_lowercase());
let body = &content_as_str[body_start..body_end];
let replaced = pre_process_input(body.as_bytes(), &lang.to_ascii_lowercase());

result.replace_range(body_start..body_end, replaced);
break;
result.replace_range(body_start..body_end, replaced);
break;
}
}
}

Expand Down Expand Up @@ -427,4 +429,26 @@ mod tests {
vec!["text-amber-600", "text-sky-500", "text-green-500"],
);
}

#[test]
fn test_invalid_utf8_does_not_panic() {
use crate::extractor::pre_processors::pre_processor::PreProcessor;

// Invalid UTF-8 sequence: 0x80 is a continuation byte without a leading byte
let invalid_utf8: &[u8] = &[0x80, 0x81, 0x82];

let processor = Ruby::default();

// Should not panic, just return the input unchanged
let result = processor.process(invalid_utf8);
assert_eq!(result, invalid_utf8);
}

#[test]
fn test_valid_utf8_with_multibyte_chars() {
// Test that valid UTF-8 with multi-byte characters (like em-dashes) works
let input = "# Comment with em—dash\n%w[flex px-2.5]";

Ruby::test_extract_contains(input, vec!["flex", "px-2.5"]);
}
}
28 changes: 21 additions & 7 deletions crates/oxide/src/extractor/pre_processors/vue.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,15 @@ impl PreProcessor for Vue {
fn process(&self, content: &[u8]) -> Vec<u8> {
let mut result = content.to_vec();

let content_as_str = std::str::from_utf8(content).unwrap();
for (_, [lang, body]) in TEMPLATE_REGEX
.captures_iter(content_as_str)
.map(|c| c.extract())
{
let replaced = pre_process_input(body.as_bytes(), lang);
result = result.replace(body, replaced);
// Only process template tags if content is valid UTF-8
if let Ok(content_as_str) = std::str::from_utf8(content) {
for (_, [lang, body]) in TEMPLATE_REGEX
.captures_iter(content_as_str)
.map(|c| c.extract())
{
let replaced = pre_process_input(body.as_bytes(), lang);
result = result.replace(body, replaced);
}
}

result
Expand All @@ -43,4 +45,16 @@ mod tests {

Vue::test_extract_contains(input, vec!["bg-neutral-900", "text-red-500"]);
}

#[test]
fn test_invalid_utf8_does_not_panic() {
// Invalid UTF-8 sequence: 0x80 is a continuation byte without a leading byte
let invalid_utf8: &[u8] = &[0x80, 0x81, 0x82];

let processor = Vue::default();

// Should not panic, just return the input unchanged
let result = processor.process(invalid_utf8);
assert_eq!(result, invalid_utf8);
}
}