From 7dc7878c037fc3909feb5666638c816606745263 Mon Sep 17 00:00:00 2001 From: Robin Malfait Date: Fri, 28 Mar 2025 16:15:49 +0100 Subject: [PATCH 1/6] passthrough the extension to the Extractor itself --- crates/oxide/src/extractor/mod.rs | 12 +++++++----- .../extractor/pre_processors/pre_processor.rs | 2 +- crates/oxide/src/main.rs | 2 +- crates/oxide/src/scanner/mod.rs | 18 +++++++++--------- 4 files changed, 18 insertions(+), 16 deletions(-) diff --git a/crates/oxide/src/extractor/mod.rs b/crates/oxide/src/extractor/mod.rs index fc9a870869a1..0c628196bbd8 100644 --- a/crates/oxide/src/extractor/mod.rs +++ b/crates/oxide/src/extractor/mod.rs @@ -56,15 +56,17 @@ impl fmt::Display for Extracted<'_> { #[derive(Debug)] pub struct Extractor<'a> { cursor: cursor::Cursor<'a>, + extension: Option<&'a str>, css_variable_machine: CssVariableMachine, candidate_machine: CandidateMachine, } impl<'a> Extractor<'a> { - pub fn new(input: &'a [u8]) -> Self { + pub fn new(input: &'a [u8], extension: Option<&'a str>) -> Self { Self { cursor: cursor::Cursor::new(input), + extension, css_variable_machine: Default::default(), candidate_machine: Default::default(), @@ -208,7 +210,7 @@ mod tests { } fn extract_sorted_candidates(input: &str) -> Vec<&str> { - let mut machine = Extractor::new(input.as_bytes()); + let mut machine = Extractor::new(input.as_bytes(), None); let mut actual = machine .extract() .iter() @@ -222,7 +224,7 @@ mod tests { } fn extract_sorted_css_variables(input: &str) -> Vec<&str> { - let mut machine = Extractor::new(input.as_bytes()); + let mut machine = Extractor::new(input.as_bytes(), None); let mut actual = machine .extract() .iter() @@ -287,12 +289,12 @@ mod tests { let input = include_bytes!("../fixtures/example.html"); let throughput = Throughput::compute(iterations, input.len(), || { - let mut extractor = Extractor::new(input); + let mut extractor = Extractor::new(input, None); _ = black_box(extractor.extract()); }); eprintln!("Extractor throughput: {:}", throughput); - let mut extractor = Extractor::new(input); + let mut extractor = Extractor::new(input, None); let start = std::time::Instant::now(); _ = black_box(extractor.extract().len()); let end = start.elapsed(); diff --git a/crates/oxide/src/extractor/pre_processors/pre_processor.rs b/crates/oxide/src/extractor/pre_processors/pre_processor.rs index 7b094129c736..2e8f5a7be2ea 100644 --- a/crates/oxide/src/extractor/pre_processors/pre_processor.rs +++ b/crates/oxide/src/extractor/pre_processors/pre_processor.rs @@ -33,7 +33,7 @@ pub trait PreProcessor: Sized + Default { let processor = Self::default(); let transformed = processor.process(input); - let extracted = Extractor::new(&transformed).extract(); + let extracted = Extractor::new(&transformed, None).extract(); // Extract all candidates and css variables. let candidates = extracted diff --git a/crates/oxide/src/main.rs b/crates/oxide/src/main.rs index 1fa2918afb72..520702df6be5 100644 --- a/crates/oxide/src/main.rs +++ b/crates/oxide/src/main.rs @@ -5,7 +5,7 @@ use tailwindcss_oxide::extractor::{Extracted, Extractor}; use tailwindcss_oxide::throughput::Throughput; fn run_full_extractor(input: &[u8]) -> Vec<&[u8]> { - Extractor::new(input) + Extractor::new(input, None) .extract() .into_iter() .map(|x| match x { diff --git a/crates/oxide/src/scanner/mod.rs b/crates/oxide/src/scanner/mod.rs index 57624910f51e..d902a6f69910 100644 --- a/crates/oxide/src/scanner/mod.rs +++ b/crates/oxide/src/scanner/mod.rs @@ -318,7 +318,7 @@ impl Scanner { &mut self, changed_content: ChangedContent, ) -> Vec<(String, usize)> { - let content = read_changed_content(changed_content).unwrap_or_default(); + let (content, extension) = read_changed_content(changed_content).unwrap_or_default(); let original_content = &content; // Workaround for legacy upgrades: @@ -328,7 +328,7 @@ impl Scanner { let content = content.replace("-[]", "XYZ"); let offset = content.as_ptr() as usize; - let mut extractor = Extractor::new(&content[..]); + let mut extractor = Extractor::new(&content[..], Some(&extension)); extractor .extract() @@ -355,7 +355,7 @@ impl Scanner { } } -fn read_changed_content(c: ChangedContent) -> Option> { +fn read_changed_content(c: ChangedContent) -> Option<(Vec, String)> { let (content, extension) = match c { ChangedContent::File(file, extension) => match std::fs::read(&file) { Ok(content) => (content, extension), @@ -368,7 +368,7 @@ fn read_changed_content(c: ChangedContent) -> Option> { ChangedContent::Content(contents, extension) => (contents.into_bytes(), extension), }; - Some(pre_process_input(&content, &extension)) + Some((pre_process_input(&content, &extension), extension)) } pub fn pre_process_input(content: &[u8], extension: &str) -> Vec { @@ -389,7 +389,7 @@ pub fn pre_process_input(content: &[u8], extension: &str) -> Vec { } #[tracing::instrument(skip_all)] -fn read_all_files(changed_content: Vec) -> Vec> { +fn read_all_files(changed_content: Vec) -> Vec<(Vec, String)> { event!( tracing::Level::INFO, "Reading {:?} file(s)", @@ -403,16 +403,16 @@ fn read_all_files(changed_content: Vec) -> Vec> { } #[tracing::instrument(skip_all)] -fn parse_all_blobs(blobs: Vec>) -> Vec { +fn parse_all_blobs(blobs: Vec<(Vec, String)>) -> Vec { let mut result: Vec<_> = blobs .par_iter() - .flat_map(|blob| blob.par_split(|x| *x == b'\n')) - .filter_map(|blob| { + .flat_map(|(blob, extension)| blob.par_split(|x| *x == b'\n').map(move |x| (x, extension))) + .filter_map(|(blob, extension)| { if blob.is_empty() { return None; } - let extracted = crate::extractor::Extractor::new(blob).extract(); + let extracted = crate::extractor::Extractor::new(blob, Some(extension)).extract(); if extracted.is_empty() { return None; } From 7b2a5c141993adc5ee759b773b8e031e7a374cbc Mon Sep 17 00:00:00 2001 From: Robin Malfait Date: Fri, 28 Mar 2025 16:21:44 +0100 Subject: [PATCH 2/6] _do_ scan `.css` files but only for CSS variable usage --- crates/oxide/src/scanner/fixtures/ignored-extensions.txt | 1 - crates/oxide/src/scanner/mod.rs | 9 +++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/crates/oxide/src/scanner/fixtures/ignored-extensions.txt b/crates/oxide/src/scanner/fixtures/ignored-extensions.txt index f147c24fe110..2b19a87c02a7 100644 --- a/crates/oxide/src/scanner/fixtures/ignored-extensions.txt +++ b/crates/oxide/src/scanner/fixtures/ignored-extensions.txt @@ -1,4 +1,3 @@ -css less lock sass diff --git a/crates/oxide/src/scanner/mod.rs b/crates/oxide/src/scanner/mod.rs index d902a6f69910..801c114cf9fe 100644 --- a/crates/oxide/src/scanner/mod.rs +++ b/crates/oxide/src/scanner/mod.rs @@ -84,6 +84,9 @@ pub struct Scanner { /// All found extensions extensions: FxHashSet, + /// All CSS files we want to scan for CSS variable usage + css_files: Vec, + /// All files that we have to scan files: Vec, @@ -248,6 +251,12 @@ impl Scanner { .and_then(|x| x.to_str()) .unwrap_or_default(); // In case the file has no extension + // Special handing for CSS files to extract CSS variables + if extension == "css" { + self.css_files.push(path); + continue; + } + self.extensions.insert(extension.to_owned()); self.changed_content.push(ChangedContent::File( path.to_path_buf(), From 7eb49088a319325378bc3d42a76369ee3b30cc20 Mon Sep 17 00:00:00 2001 From: Robin Malfait Date: Fri, 28 Mar 2025 16:22:16 +0100 Subject: [PATCH 3/6] Revert "passthrough the extension to the Extractor itself" This reverts commit 4ff21a330404981e9bd395dafc82c7ea86104742. --- crates/oxide/src/extractor/mod.rs | 12 +++++------- .../extractor/pre_processors/pre_processor.rs | 2 +- crates/oxide/src/main.rs | 2 +- crates/oxide/src/scanner/mod.rs | 18 +++++++++--------- 4 files changed, 16 insertions(+), 18 deletions(-) diff --git a/crates/oxide/src/extractor/mod.rs b/crates/oxide/src/extractor/mod.rs index 0c628196bbd8..fc9a870869a1 100644 --- a/crates/oxide/src/extractor/mod.rs +++ b/crates/oxide/src/extractor/mod.rs @@ -56,17 +56,15 @@ impl fmt::Display for Extracted<'_> { #[derive(Debug)] pub struct Extractor<'a> { cursor: cursor::Cursor<'a>, - extension: Option<&'a str>, css_variable_machine: CssVariableMachine, candidate_machine: CandidateMachine, } impl<'a> Extractor<'a> { - pub fn new(input: &'a [u8], extension: Option<&'a str>) -> Self { + pub fn new(input: &'a [u8]) -> Self { Self { cursor: cursor::Cursor::new(input), - extension, css_variable_machine: Default::default(), candidate_machine: Default::default(), @@ -210,7 +208,7 @@ mod tests { } fn extract_sorted_candidates(input: &str) -> Vec<&str> { - let mut machine = Extractor::new(input.as_bytes(), None); + let mut machine = Extractor::new(input.as_bytes()); let mut actual = machine .extract() .iter() @@ -224,7 +222,7 @@ mod tests { } fn extract_sorted_css_variables(input: &str) -> Vec<&str> { - let mut machine = Extractor::new(input.as_bytes(), None); + let mut machine = Extractor::new(input.as_bytes()); let mut actual = machine .extract() .iter() @@ -289,12 +287,12 @@ mod tests { let input = include_bytes!("../fixtures/example.html"); let throughput = Throughput::compute(iterations, input.len(), || { - let mut extractor = Extractor::new(input, None); + let mut extractor = Extractor::new(input); _ = black_box(extractor.extract()); }); eprintln!("Extractor throughput: {:}", throughput); - let mut extractor = Extractor::new(input, None); + let mut extractor = Extractor::new(input); let start = std::time::Instant::now(); _ = black_box(extractor.extract().len()); let end = start.elapsed(); diff --git a/crates/oxide/src/extractor/pre_processors/pre_processor.rs b/crates/oxide/src/extractor/pre_processors/pre_processor.rs index 2e8f5a7be2ea..7b094129c736 100644 --- a/crates/oxide/src/extractor/pre_processors/pre_processor.rs +++ b/crates/oxide/src/extractor/pre_processors/pre_processor.rs @@ -33,7 +33,7 @@ pub trait PreProcessor: Sized + Default { let processor = Self::default(); let transformed = processor.process(input); - let extracted = Extractor::new(&transformed, None).extract(); + let extracted = Extractor::new(&transformed).extract(); // Extract all candidates and css variables. let candidates = extracted diff --git a/crates/oxide/src/main.rs b/crates/oxide/src/main.rs index 520702df6be5..1fa2918afb72 100644 --- a/crates/oxide/src/main.rs +++ b/crates/oxide/src/main.rs @@ -5,7 +5,7 @@ use tailwindcss_oxide::extractor::{Extracted, Extractor}; use tailwindcss_oxide::throughput::Throughput; fn run_full_extractor(input: &[u8]) -> Vec<&[u8]> { - Extractor::new(input, None) + Extractor::new(input) .extract() .into_iter() .map(|x| match x { diff --git a/crates/oxide/src/scanner/mod.rs b/crates/oxide/src/scanner/mod.rs index 801c114cf9fe..fd281185edaf 100644 --- a/crates/oxide/src/scanner/mod.rs +++ b/crates/oxide/src/scanner/mod.rs @@ -327,7 +327,7 @@ impl Scanner { &mut self, changed_content: ChangedContent, ) -> Vec<(String, usize)> { - let (content, extension) = read_changed_content(changed_content).unwrap_or_default(); + let content = read_changed_content(changed_content).unwrap_or_default(); let original_content = &content; // Workaround for legacy upgrades: @@ -337,7 +337,7 @@ impl Scanner { let content = content.replace("-[]", "XYZ"); let offset = content.as_ptr() as usize; - let mut extractor = Extractor::new(&content[..], Some(&extension)); + let mut extractor = Extractor::new(&content[..]); extractor .extract() @@ -364,7 +364,7 @@ impl Scanner { } } -fn read_changed_content(c: ChangedContent) -> Option<(Vec, String)> { +fn read_changed_content(c: ChangedContent) -> Option> { let (content, extension) = match c { ChangedContent::File(file, extension) => match std::fs::read(&file) { Ok(content) => (content, extension), @@ -377,7 +377,7 @@ fn read_changed_content(c: ChangedContent) -> Option<(Vec, String)> { ChangedContent::Content(contents, extension) => (contents.into_bytes(), extension), }; - Some((pre_process_input(&content, &extension), extension)) + Some(pre_process_input(&content, &extension)) } pub fn pre_process_input(content: &[u8], extension: &str) -> Vec { @@ -398,7 +398,7 @@ pub fn pre_process_input(content: &[u8], extension: &str) -> Vec { } #[tracing::instrument(skip_all)] -fn read_all_files(changed_content: Vec) -> Vec<(Vec, String)> { +fn read_all_files(changed_content: Vec) -> Vec> { event!( tracing::Level::INFO, "Reading {:?} file(s)", @@ -412,16 +412,16 @@ fn read_all_files(changed_content: Vec) -> Vec<(Vec, String) } #[tracing::instrument(skip_all)] -fn parse_all_blobs(blobs: Vec<(Vec, String)>) -> Vec { +fn parse_all_blobs(blobs: Vec>) -> Vec { let mut result: Vec<_> = blobs .par_iter() - .flat_map(|(blob, extension)| blob.par_split(|x| *x == b'\n').map(move |x| (x, extension))) - .filter_map(|(blob, extension)| { + .flat_map(|blob| blob.par_split(|x| *x == b'\n')) + .filter_map(|blob| { if blob.is_empty() { return None; } - let extracted = crate::extractor::Extractor::new(blob, Some(extension)).extract(); + let extracted = crate::extractor::Extractor::new(blob).extract(); if extracted.is_empty() { return None; } From 9f7d0f96963d5d19d0971161f2c0bb8c768f4dd7 Mon Sep 17 00:00:00 2001 From: Robin Malfait Date: Fri, 28 Mar 2025 16:45:41 +0100 Subject: [PATCH 4/6] extract used CSS variables from CSS files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We will track CSS files while traversing the folder structure, but don't extract any normal candidates from these CSS files. We will also not include these files into any of the returned globs. We will just run the CSS extractor on these CSS files, and every time we find a CSS variable, we will verify whether it was used or not. For now, "using", just means if it is used inside of `var(…)`. --- crates/oxide/src/extractor/mod.rs | 36 ++++++++++++++++++++ crates/oxide/src/scanner/mod.rs | 56 +++++++++++++++++++++++++++++-- crates/oxide/tests/scanner.rs | 35 +++++++++++++++++++ 3 files changed, 125 insertions(+), 2 deletions(-) diff --git a/crates/oxide/src/extractor/mod.rs b/crates/oxide/src/extractor/mod.rs index fc9a870869a1..c0b95bedc697 100644 --- a/crates/oxide/src/extractor/mod.rs +++ b/crates/oxide/src/extractor/mod.rs @@ -1,5 +1,6 @@ use crate::cursor; use crate::extractor::machine::Span; +use bstr::ByteSlice; use candidate_machine::CandidateMachine; use css_variable_machine::CssVariableMachine; use machine::{Machine, MachineState}; @@ -139,6 +140,41 @@ impl<'a> Extractor<'a> { extracted } + + pub fn extract_css_variables_from_css_files(&mut self) -> Vec> { + let mut extracted = Vec::with_capacity(100); + + let len = self.cursor.input.len(); + + let cursor = &mut self.cursor.clone(); + while cursor.pos < len { + if cursor.curr.is_ascii_whitespace() { + cursor.advance(); + continue; + } + + if let MachineState::Done(span) = self.css_variable_machine.next(cursor) { + // We are only interested in variables that are used, not defined. Therefore we + // need to ensure that the variable is prefixed with `var(`. + if span.start < 4 { + cursor.advance(); + continue; + } + + let slice_before = Span::new(span.start - 4, span.start - 1); + if !slice_before.slice(self.cursor.input).starts_with(b"var(") { + cursor.advance(); + continue; + } + + extracted.push(Extracted::CssVariable(span.slice(self.cursor.input))); + } + + cursor.advance(); + } + + extracted + } } // Extract sub-candidates from a given range. diff --git a/crates/oxide/src/scanner/mod.rs b/crates/oxide/src/scanner/mod.rs index fd281185edaf..bd23a050105f 100644 --- a/crates/oxide/src/scanner/mod.rs +++ b/crates/oxide/src/scanner/mod.rs @@ -215,11 +215,25 @@ impl Scanner { fn extract_candidates(&mut self) -> Vec { let changed_content = self.changed_content.drain(..).collect::>(); - let candidates = parse_all_blobs(read_all_files(changed_content)); + // Extract all candidates from the changed content + let mut new_candidates = parse_all_blobs(read_all_files(changed_content)); + + // Extract all CSS variables from the CSS files + let css_files = self.css_files.drain(..).collect::>(); + if !css_files.is_empty() { + let css_variables = extract_css_variables(read_all_files( + css_files + .into_iter() + .map(|file| ChangedContent::File(file, "css".into())) + .collect(), + )); + + new_candidates.extend(css_variables); + } // Only compute the new candidates and ignore the ones we already have. This is for // subsequent calls to prevent serializing the entire set of candidates every time. - let mut new_candidates = candidates + let mut new_candidates = new_candidates .into_par_iter() .filter(|candidate| !self.candidates.contains(candidate)) .collect::>(); @@ -411,6 +425,44 @@ fn read_all_files(changed_content: Vec) -> Vec> { .collect() } +#[tracing::instrument(skip_all)] +fn extract_css_variables(blobs: Vec>) -> Vec { + let mut result: Vec<_> = blobs + .par_iter() + .flat_map(|blob| blob.par_split(|x| *x == b'\n')) + .filter_map(|blob| { + if blob.is_empty() { + return None; + } + + let extracted = + crate::extractor::Extractor::new(blob).extract_css_variables_from_css_files(); + if extracted.is_empty() { + return None; + } + + Some(FxHashSet::from_iter(extracted.into_iter().map( + |x| match x { + Extracted::CssVariable(bytes) => bytes, + _ => &[], + }, + ))) + }) + .reduce(Default::default, |mut a, b| { + a.extend(b); + a + }) + .into_iter() + .map(|s| unsafe { String::from_utf8_unchecked(s.to_vec()) }) + .collect(); + + // SAFETY: Unstable sort is faster and in this scenario it's also safe because we are + // guaranteed to have unique candidates. + result.par_sort_unstable(); + + result +} + #[tracing::instrument(skip_all)] fn parse_all_blobs(blobs: Vec>) -> Vec { let mut result: Vec<_> = blobs diff --git a/crates/oxide/tests/scanner.rs b/crates/oxide/tests/scanner.rs index ab91a8c7267f..70b9a86275d1 100644 --- a/crates/oxide/tests/scanner.rs +++ b/crates/oxide/tests/scanner.rs @@ -1735,4 +1735,39 @@ mod scanner { assert_eq!(candidates, vec!["content-['abcd/xyz.html']"]); } + + #[test] + fn test_extract_used_css_variables_from_css() { + let dir = tempdir().unwrap().into_path(); + create_files_in( + &dir, + &[ + ( + "src/index.css", + r#" + @theme { + --color-red: #ff0000; /* Not used, so don't extract */ + --color-green: #00ff00; /* Not used, so don't extract */ + } + + .button { + color: var(--color-red); /* Used, so extract */ + } + "#, + ), + ("src/used-at-start.css", "var(--color-used-at-start)"), + // Here to verify that we don't crash when trying to find `var(` in front of the + // variable. + ("src/defined-at-start.css", "--color-defined-at-start: red;"), + ], + ); + + let mut scanner = Scanner::new(vec![public_source_entry_from_pattern( + dir.clone(), + "@source './'", + )]); + let candidates = scanner.scan(); + + assert_eq!(candidates, vec!["--color-red", "--color-used-at-start"]); + } } From 481793b1b2967532300e94a2edac9e9fec4ca1a7 Mon Sep 17 00:00:00 2001 From: Robin Malfait Date: Fri, 28 Mar 2025 17:40:44 +0100 Subject: [PATCH 5/6] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 434ac59e9b55..0efc1de3af53 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,6 +29,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Fix negated `content` rules in legacy JavaScript configuration ([#17255](https://github.com/tailwindlabs/tailwindcss/pull/17255)) - Extract special `@("@")md:…` syntax in Razor files ([#17427](https://github.com/tailwindlabs/tailwindcss/pull/17427)) - Disallow arbitrary values with top-level braces and semicolons as well as unbalanced parentheses and brackets ([#17361](https://github.com/tailwindlabs/tailwindcss/pull/17361)) +- Extract used CSS variables from `.css` files ([#17433](https://github.com/tailwindlabs/tailwindcss/pull/17433)) ### Changed From 5272925870747383886b341d61fa8d9914e8fcce Mon Sep 17 00:00:00 2001 From: Robin Malfait Date: Fri, 28 Mar 2025 17:45:10 +0100 Subject: [PATCH 6/6] rename function It's shorter, and fits on a single line now. --- crates/oxide/src/extractor/mod.rs | 2 +- crates/oxide/src/scanner/mod.rs | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/crates/oxide/src/extractor/mod.rs b/crates/oxide/src/extractor/mod.rs index c0b95bedc697..bcbe3e1ae157 100644 --- a/crates/oxide/src/extractor/mod.rs +++ b/crates/oxide/src/extractor/mod.rs @@ -141,7 +141,7 @@ impl<'a> Extractor<'a> { extracted } - pub fn extract_css_variables_from_css_files(&mut self) -> Vec> { + pub fn extract_variables_from_css(&mut self) -> Vec> { let mut extracted = Vec::with_capacity(100); let len = self.cursor.input.len(); diff --git a/crates/oxide/src/scanner/mod.rs b/crates/oxide/src/scanner/mod.rs index bd23a050105f..e678f7b7f215 100644 --- a/crates/oxide/src/scanner/mod.rs +++ b/crates/oxide/src/scanner/mod.rs @@ -435,8 +435,7 @@ fn extract_css_variables(blobs: Vec>) -> Vec { return None; } - let extracted = - crate::extractor::Extractor::new(blob).extract_css_variables_from_css_files(); + let extracted = crate::extractor::Extractor::new(blob).extract_variables_from_css(); if extracted.is_empty() { return None; }