fohte · fohte · Feb 24, 2026 · Feb 24, 2026 · Feb 24, 2026 · Feb 24, 2026
diff --git a/src/rules/pattern_lexer.rs b/src/rules/pattern_lexer.rs
@@ -21,6 +21,9 @@ pub enum LexToken {
     CloseBracket,
     /// Angle-bracket placeholder (e.g. "<cmd>", "<path:name>")
     Placeholder(String),
+    /// A multi-word alternation where at least one alternative contains spaces.
+    /// e.g., `"npx prettier"|prettier` -> [["npx", "prettier"], ["prettier"]]
+    MultiWordAlternation(Vec<Vec<String>>),
 }
 
 /// Tokenize a pattern string into a sequence of `LexToken`s.
@@ -37,7 +40,8 @@ pub fn tokenize(pattern: &str) -> Result<Vec<LexToken>, PatternParseError> {
                 chars.next();
             }
 
-            // Quoted string -> Literal (content without quotes)
+            // Quoted string -> Literal (content without quotes), or
+            // multi-word alternation if followed by `|`
             '"' | '\'' => {
                 let quote = ch;
                 chars.next(); // consume opening quote
@@ -46,7 +50,15 @@ pub fn tokenize(pattern: &str) -> Result<Vec<LexToken>, PatternParseError> {
                         "unclosed quote starting at position {pos}"
                     ))
                 })?;
-                tokens.push(LexToken::Literal(value));
+                // Check if this starts a multi-word alternation: "quoted"|...
+                if let Some(&(_, '|')) = chars.peek() {
+                    let first_words: Vec<String> =
+                        value.split_whitespace().map(|s| s.to_string()).collect();
+                    let token = consume_alternation_continuation(&mut chars, first_words)?;
+                    tokens.push(token);
+                } else {
+                    tokens.push(LexToken::Literal(value));
+                }
             }
 
             // Angle bracket placeholder: <cmd>, <path:name>
@@ -114,10 +126,60 @@ pub fn tokenize(pattern: &str) -> Result<Vec<LexToken>, PatternParseError> {
                 tokens.push(classify_negation(&word)?);
             }
 
-            // Any other character: consume a word (until whitespace, bracket, or angle bracket)
+            // Any other character: consume a word (until whitespace, bracket, angle bracket, or quote)
             _ => {
                 let word = consume_word(&mut chars, Some(ch));
-                tokens.push(classify_word(&word)?);
+                // Check if the word ends with `|` and next char is a quote,
+                // indicating a multi-word alternation like `prettier|"npx prettier"`
+                if word.ends_with('|') {
+                    if let Some(&(_, q @ ('"' | '\''))) = chars.peek() {
+                        let prefix = &word[..word.len() - 1];
+                        // Split existing pipe-separated parts into individual alternatives
+                        let mut alternatives: Vec<Vec<String>> = Vec::new();
+                        for part in prefix.split('|') {
+                            if part.is_empty() {
+                                return Err(PatternParseError::EmptyAlternation);
+                            }
+                            alternatives.push(vec![part.to_string()]);
+                        }
+                        // Now consume the quoted part and any further alternatives
+                        chars.next(); // consume opening quote
+                        let quoted = consume_until(&mut chars, q).ok_or_else(|| {
+                            PatternParseError::InvalidSyntax(format!(
+                                "unclosed quote starting at position {pos}"
+                            ))
+                        })?;
+                        let quoted_words: Vec<String> =
+                            quoted.split_whitespace().map(|s| s.to_string()).collect();
+                        if quoted_words.is_empty() {
+                            return Err(PatternParseError::EmptyAlternation);
+                        }
+                        // Continue consuming further alternatives if more `|` follow
+                        let token = consume_alternation_continuation(&mut chars, quoted_words)?;
+                        // Merge: prepend the bare-word alternatives to the continuation result
+                        match token {
+                            LexToken::MultiWordAlternation(mut rest) => {
+                                alternatives.append(&mut rest);
+                                tokens.push(classify_multi_word_alternation(alternatives)?);
+                            }
+                            LexToken::Alternation(rest_alts) => {
+                                for alt in rest_alts {
+                                    alternatives.push(vec![alt]);
+                                }
+                                tokens.push(classify_multi_word_alternation(alternatives)?);
+                            }
+                            _ => unreachable!(
+                                "consume_alternation_continuation returned unexpected: {token:?}"
+                            ),
+                        }
+                    } else {
+                        // Trailing `|` without a following quote: delegate to classify_word
+                        // which will report EmptyAlternation
+                        tokens.push(classify_word(&word)?);
+                    }
+                } else {
+                    tokens.push(classify_word(&word)?);
+                }
             }
         }
     }
@@ -170,7 +232,7 @@ fn consume_until(
 }
 
 fn is_word_boundary(c: char) -> bool {
-    matches!(c, ' ' | '\t' | '[' | ']' | '<')
+    matches!(c, ' ' | '\t' | '[' | ']' | '<' | '"' | '\'')
 }
 
 /// Classify a raw word into the appropriate LexToken.
@@ -195,6 +257,85 @@ fn classify_negation(word: &str) -> Result<LexToken, PatternParseError> {
     }
 }
 
+/// Consume remaining alternatives in a multi-word alternation.
+///
+/// Called after the first alternative has already been parsed (either quoted or bare word).
+/// `first_words` is the word list of the first alternative.
+/// Expects the iterator to be positioned at a `|` character (or past the first alternative).
+/// Returns a `MultiWordAlternation` token (which may be downgraded to `Alternation` if all
+/// alternatives are single-word).
+fn consume_alternation_continuation(
+    chars: &mut std::iter::Peekable<std::str::CharIndices<'_>>,
+    first_words: Vec<String>,
+) -> Result<LexToken, PatternParseError> {
+    let mut alternatives = vec![first_words];
+
+    while let Some(&(_, '|')) = chars.peek() {
+        chars.next(); // consume '|'
+
+        match chars.peek() {
+            Some(&(pos, q @ ('"' | '\''))) => {
+                chars.next(); // consume opening quote
+                let quoted = consume_until(chars, q).ok_or_else(|| {
+                    PatternParseError::InvalidSyntax(format!(
+                        "unclosed quote starting at position {pos}"
+                    ))
+                })?;
+                let words: Vec<String> = quoted.split_whitespace().map(|s| s.to_string()).collect();
+                if words.is_empty() {
+                    return Err(PatternParseError::EmptyAlternation);
+                }
+                alternatives.push(words);
+            }
+            Some(&(_, c)) if !is_word_boundary(c) => {
+                let word = consume_word(chars, Some(c));
+                if word.is_empty() {
+                    return Err(PatternParseError::EmptyAlternation);
+                }
+                alternatives.push(vec![word]);
+            }
+            _ => {
+                return Err(PatternParseError::EmptyAlternation);
+            }
+        }
+    }
+
+    classify_multi_word_alternation(alternatives)
+}
+
+/// Classify a list of word-list alternatives into the appropriate LexToken.
+///
+/// If all alternatives are single-word, returns `Alternation`.
+/// Otherwise returns `MultiWordAlternation`.
+fn classify_multi_word_alternation(
+    alternatives: Vec<Vec<String>>,
+) -> Result<LexToken, PatternParseError> {
+    if alternatives.iter().all(|alt| alt.len() == 1) {
+        // All single-word: use regular Alternation
+        let parts: Vec<String> = alternatives.into_iter().map(|mut v| v.remove(0)).collect();
+        validate_alternation_parts_vec(&parts)?;
+        Ok(LexToken::Alternation(parts))
+    } else {
+        // At least one multi-word alternative
+        for alt in &alternatives {
+            if alt.is_empty() {
+                return Err(PatternParseError::EmptyAlternation);
+            }
+        }
+        Ok(LexToken::MultiWordAlternation(alternatives))
+    }
+}
+
+/// Validate that no part in a pre-split alternation is empty.
+fn validate_alternation_parts_vec(parts: &[String]) -> Result<(), PatternParseError> {
+    for part in parts {
+        if part.is_empty() {
+            return Err(PatternParseError::EmptyAlternation);
+        }
+    }
+    Ok(())
+}
+
 /// Split on '|' and validate that no part is empty.
 fn validate_alternation_parts(word: &str) -> Result<Vec<String>, PatternParseError> {
     word.split('|')
@@ -556,6 +697,97 @@ mod tests {
         );
     }
 
+    // === Multi-word alternation ===
+
+    #[rstest]
+    #[case::quoted_then_bare(
+        r#""npx prettier"|prettier"#,
+        vec![LexToken::MultiWordAlternation(vec![
+            vec!["npx".into(), "prettier".into()],
+            vec!["prettier".into()],
+        ])]
+    )]
+    #[case::bare_then_quoted(
+        r#"prettier|"npx prettier""#,
+        vec![LexToken::MultiWordAlternation(vec![
+            vec!["prettier".into()],
+            vec!["npx".into(), "prettier".into()],
+        ])]
+    )]
+    #[case::three_alternatives(
+        r#""npx prettier"|"bunx prettier"|prettier"#,
+        vec![LexToken::MultiWordAlternation(vec![
+            vec!["npx".into(), "prettier".into()],
+            vec!["bunx".into(), "prettier".into()],
+            vec!["prettier".into()],
+        ])]
+    )]
+    #[case::multi_word_with_trailing_tokens(
+        r#""npx prettier"|prettier *"#,
+        vec![
+            LexToken::MultiWordAlternation(vec![
+                vec!["npx".into(), "prettier".into()],
+                vec!["prettier".into()],
+            ]),
+            LexToken::Wildcard,
+        ]
+    )]
+    #[case::all_single_word_quoted_becomes_alternation(
+        r#""ast-grep"|sg"#,
+        vec![LexToken::Alternation(vec!["ast-grep".into(), "sg".into()])]
+    )]
+    #[case::mixed_single_and_multi_word(
+        r#"prettier|"npx prettier"|"bunx prettier""#,
+        vec![LexToken::MultiWordAlternation(vec![
+            vec!["prettier".into()],
+            vec!["npx".into(), "prettier".into()],
+            vec!["bunx".into(), "prettier".into()],
+        ])]
+    )]
+    #[case::single_quoted_multi_word(
+        "prettier|'npx prettier'",
+        vec![LexToken::MultiWordAlternation(vec![
+            vec!["prettier".into()],
+            vec!["npx".into(), "prettier".into()],
+        ])]
+    )]
+    #[case::all_single_word_via_bare_and_quoted(
+        r#"foo|"bar""#,
+        vec![LexToken::Alternation(vec!["foo".into(), "bar".into()])]
+    )]
+    #[case::three_bare_and_quoted_single_word(
+        r#"foo|"bar"|baz"#,
+        vec![LexToken::Alternation(vec!["foo".into(), "bar".into(), "baz".into()])]
+    )]
+    fn tokenize_multi_word_alternation(#[case] input: &str, #[case] expected: Vec<LexToken>) {
+        assert_eq!(tokenize(input).unwrap(), expected);
+    }
+
+    // === Multi-word alternation error cases ===
+
+    #[rstest]
+    #[case::empty_quoted_alternative(r#"""|prettier"#)]
+    #[case::trailing_pipe_after_quoted(r#""npx prettier"|"#)]
+    fn tokenize_multi_word_alternation_errors(#[case] input: &str) {
+        let result = tokenize(input);
+        assert!(
+            matches!(result, Err(PatternParseError::EmptyAlternation)),
+            "expected EmptyAlternation for {input:?}, got {result:?}"
+        );
+    }
+
+    // === Backward compatibility: single-word alternation unchanged ===
+
+    #[test]
+    fn tokenize_single_word_alternation_unchanged() {
+        // Existing single-word alternation should still produce Alternation, not MultiWordAlternation
+        let result = tokenize("ast-grep|sg").unwrap();
+        assert_eq!(
+            result,
+            vec![LexToken::Alternation(vec!["ast-grep".into(), "sg".into()])]
+        );
+    }
+
     #[test]
     fn tokenize_single_literal() {
         let result = tokenize("ls").unwrap();

diff --git a/src/rules/pattern_matcher.rs b/src/rules/pattern_matcher.rs
@@ -970,4 +970,77 @@ mod tests {
             expected
         );
     }
+
+    // ========================================
+    // Multi-word alternation matching
+    // ========================================
+
+    /// Helper: parse pattern with parse_multi, then check if any expanded pattern matches.
+    fn check_multi_match(pattern_str: &str, command_str: &str, definitions: &Definitions) -> bool {
+        use crate::rules::pattern_parser::parse_multi;
+
+        let patterns = parse_multi(pattern_str).unwrap();
+        for pattern in &patterns {
+            let schema = build_schema_from_pattern(pattern);
+            let command = parse_command(command_str, &schema).unwrap();
+            if matches(pattern, &command, definitions) {
+                return true;
+            }
+        }
+        false
+    }
+
+    #[rstest]
+    #[case::npx_variant(r#""npx prettier"|prettier *"#, "npx prettier --write .", true)]
+    #[case::bare_variant(r#""npx prettier"|prettier *"#, "prettier --write .", true)]
+    #[case::no_match_different_runner(
+        r#""npx prettier"|prettier *"#,
+        "yarn prettier --write .",
+        false
+    )]
+    #[case::no_match_different_tool(r#""npx prettier"|prettier *"#, "npx eslint --fix .", false)]
+    #[case::three_alternatives_first(
+        r#""npx prettier"|"bunx prettier"|prettier *"#,
+        "npx prettier --write .",
+        true
+    )]
+    #[case::three_alternatives_second(
+        r#""npx prettier"|"bunx prettier"|prettier *"#,
+        "bunx prettier --write .",
+        true
+    )]
+    #[case::three_alternatives_third(
+        r#""npx prettier"|"bunx prettier"|prettier *"#,
+        "prettier --write .",
+        true
+    )]
+    #[case::python_pytest_module(r#""python -m pytest"|pytest *"#, "python -m pytest tests/", true)]
+    #[case::python_pytest_bare(r#""python -m pytest"|pytest *"#, "pytest tests/", true)]
+    #[case::python_pytest_no_match(r#""python -m pytest"|pytest *"#, "python -m mypy", false)]
+    fn multi_word_alternation_matching(
+        #[case] pattern_str: &str,
+        #[case] command_str: &str,
+        #[case] expected: bool,
+    ) {
+        assert_eq!(
+            check_multi_match(pattern_str, command_str, &empty_defs()),
+            expected,
+            "pattern {pattern_str:?} vs command {command_str:?}",
+        );
+    }
+
+    #[rstest]
+    #[case::backward_compat_first("ast-grep|sg scan *", "ast-grep scan foo", true)]
+    #[case::backward_compat_second("ast-grep|sg scan *", "sg scan foo", true)]
+    #[case::backward_compat_no_match("ast-grep|sg scan *", "rg scan foo", false)]
+    fn multi_word_alternation_backward_compat(
+        #[case] pattern_str: &str,
+        #[case] command_str: &str,
+        #[case] expected: bool,
+    ) {
+        assert_eq!(
+            check_multi_match(pattern_str, command_str, &empty_defs()),
+            expected,
+        );
+    }
 }