diff --git a/docs/src/content/docs/pattern-syntax/matching-behavior.md b/docs/src/content/docs/pattern-syntax/matching-behavior.md index 81d414d..5d71f5b 100644 --- a/docs/src/content/docs/pattern-syntax/matching-behavior.md +++ b/docs/src/content/docs/pattern-syntax/matching-behavior.md @@ -81,6 +81,17 @@ Tokens that do not start with `-` are matched **in order**: | `git push origin main` | Matches | | `git push main origin` | Does not match | +## Backslash Escapes + +A backslash (`\`) in a pattern escapes the following character. During matching, the backslash is stripped and the remaining character is compared literally. This is useful for characters that have special meaning in shells, such as `;`: + +```yaml +# \; in the pattern matches ; in the command +- "find * -exec \\;|+" +``` + +The shell resolves `\;` to `;` before runok sees the command, so the pattern's `\;` (after unescape) matches the command's `;`. + ## Combined Short Flags Combined short flags like `-am` are **not** split into individual flags — they are matched as a single token, exactly as written: diff --git a/docs/src/content/docs/pattern-syntax/overview.md b/docs/src/content/docs/pattern-syntax/overview.md index 729f4fd..18caa0b 100644 --- a/docs/src/content/docs/pattern-syntax/overview.md +++ b/docs/src/content/docs/pattern-syntax/overview.md @@ -21,6 +21,7 @@ Patterns are parsed exactly as written, with no hidden rewriting or implicit tra | [Optional group](/pattern-syntax/optional-groups/) | `[-f]`, `[-X POST]` | Matches with or without the group | | [Flag with value](/pattern-syntax/matching-behavior/#flag-schema-inference) | `-X\|--request POST` | A flag-value pair matched in any order | | [Placeholder](/pattern-syntax/placeholders/) | ``, ``, `` | Special tokens in `<...>` with various behaviors (see below) | +| Backslash escape | `\;` | Literal match after removing the backslash | | Quoted literal | `"WIP*"`, `'hello'` | Exact match without glob expansion | | [Multi-word alternation](/pattern-syntax/alternation/#multi-word-alternation) | `"npx prettier"\|prettier` | Alternatives that include multi-word commands | diff --git a/docs/src/content/docs/pattern-syntax/placeholders.md b/docs/src/content/docs/pattern-syntax/placeholders.md index 50b2577..fa9f2a4 100644 --- a/docs/src/content/docs/pattern-syntax/placeholders.md +++ b/docs/src/content/docs/pattern-syntax/placeholders.md @@ -161,8 +161,15 @@ definitions: # Handles: xargs [flags...] command [args...] - 'xargs ' + + # Handles: find [args...] -exec|-execdir|-ok|-okdir command [args...] \;|+ + - "find * -exec|-execdir|-ok|-okdir \\;|+" ``` +:::note +In the `find` wrapper example, `\\;` is a backslash-escaped semicolon in YAML. The pattern parser preserves the backslash (`\;`), and the matcher strips it during comparison so that it matches the shell-unescaped `;` in the actual command. +::: + ## Restrictions - `` captures one or more tokens; it tries all possible split points to find a valid wrapped command diff --git a/src/rules/pattern_matcher.rs b/src/rules/pattern_matcher.rs index d7906f5..19081e0 100644 --- a/src/rules/pattern_matcher.rs +++ b/src/rules/pattern_matcher.rs @@ -620,17 +620,81 @@ fn optional_flags_absent(optional_tokens: &[PatternToken], cmd_tokens: &[&str]) /// where `*` matches zero or more arbitrary characters. Otherwise, an /// exact string comparison is performed. fn literal_matches(pattern: &str, token: &str) -> bool { - if pattern.contains('*') { + if pattern.contains('\\') { + // Strip backslash escapes so that pattern `\;` matches command token `;`. + // The pattern lexer preserves backslash-escaped characters as-is (e.g. `\;`), + // while the command tokenizer resolves them (e.g. `\;` -> `;`). + // Uses sentinel-based matching so that `\*` is treated as a literal `*`, + // not a glob, even when the same token also contains a bare `*`. + unescape_and_match(pattern, token) + } else if pattern.contains('*') { glob_match(pattern, token) } else { pattern == token } } +/// Remove backslash escapes and perform matching that correctly distinguishes +/// escaped characters from glob wildcards. +/// +/// `\;` → matches `;`, `\*` → matches literal `*` (not a glob), `\\` → matches `\`. +/// +/// When the pattern contains both `\*` (literal) and bare `*` (glob), the +/// escaped `*` characters are temporarily replaced with a sentinel (`\x00`) +/// during glob expansion so they are not treated as wildcards. +fn unescape_and_match(pattern: &str, token: &str) -> bool { + let mut unescaped = String::with_capacity(pattern.len()); + let mut has_unescaped_glob = false; + let mut has_escaped_star = false; + let mut chars = pattern.chars(); + while let Some(ch) = chars.next() { + if ch == '\\' { + if let Some(next) = chars.next() { + if next == '*' { + // Use sentinel for escaped `*` so glob_match won't treat it + // as a wildcard. We restore it after matching. + unescaped.push('\x00'); + has_escaped_star = true; + } else { + unescaped.push(next); + } + } + } else { + if ch == '*' { + has_unescaped_glob = true; + } + unescaped.push(ch); + } + } + if has_unescaped_glob { + // Perform glob matching. Escaped `*` characters are sentinels (`\x00`) + // and won't be split by glob_match. We need to also place the sentinel + // in the token for comparison purposes. + // Actually, the token is a real command string and won't contain `\x00`, + // but sentinels in the pattern's literal segments need to match `*` in + // the token. Replace sentinel back to `*` in the pattern parts that + // glob_match compares literally. + glob_match(&unescaped, token) + } else { + // No glob — restore sentinels to `*` and do exact comparison. + if has_escaped_star { + let plain = unescaped.replace('\x00', "*"); + plain == token + } else { + unescaped == token + } + } +} + /// Simple glob matching where `*` matches zero or more arbitrary characters. /// /// Only supports `*` as a wildcard; no other glob syntax (e.g. `?`, `[...]`) /// is supported. +/// +/// When the pattern contains the sentinel character `\x00` (used by +/// [`unescape_and_match`] for escaped `\*`), sentinels are restored to `*` +/// in each literal segment before comparison so they match a literal `*` in +/// the text rather than acting as a wildcard. fn glob_match(pattern: &str, text: &str) -> bool { let parts: Vec<&str> = pattern.split('*').collect(); @@ -639,28 +703,34 @@ fn glob_match(pattern: &str, text: &str) -> bool { return true; } + let has_sentinel = pattern.contains('\x00'); let mut pos = 0; for (i, part) in parts.iter().enumerate() { if part.is_empty() { continue; } + // Restore sentinel `\x00` back to `*` for literal comparison when needed. + let owned; + let segment: &str = if has_sentinel && part.contains('\x00') { + owned = part.replace('\x00', "*"); + &owned + } else { + part + }; if i == 0 { - // First part must match the beginning of the text - if !text.starts_with(part) { + if !text.starts_with(segment) { return false; } - pos = part.len(); + pos = segment.len(); } else if i == parts.len() - 1 { - // Last part must match the end of the text - if !text[pos..].ends_with(part) { + if !text[pos..].ends_with(segment) { return false; } pos = text.len(); } else { - // Middle parts: find the next occurrence - match text[pos..].find(part) { - Some(offset) => pos += offset + part.len(), + match text[pos..].find(segment) { + Some(offset) => pos += offset + segment.len(), None => return false, } } @@ -1657,4 +1727,23 @@ mod tests { "pattern {pattern_str:?} vs command {command_str:?}", ); } + + // === literal_matches: backslash escape === + + #[rstest] + #[case::backslash_semicolon(r"\;", ";", true)] + #[case::backslash_semicolon_no_match(r"\;", "x", false)] + #[case::backslash_star_literal(r"\*", "*", true)] + #[case::backslash_star_not_glob(r"\*", "foo", false)] + #[case::escaped_and_bare_glob(r"\*.*", "*.foo", true)] + #[case::escaped_and_bare_glob_no_match(r"\*.*", "foo.bar", false)] + #[case::no_backslash("foo", "foo", true)] + #[case::plain_glob("fo*", "foobar", true)] + fn literal_matches_cases(#[case] pattern: &str, #[case] token: &str, #[case] expected: bool) { + assert_eq!( + literal_matches(pattern, token), + expected, + "literal_matches({pattern:?}, {token:?})", + ); + } } diff --git a/src/rules/pattern_parser.rs b/src/rules/pattern_parser.rs index 230cb62..3e78a61 100644 --- a/src/rules/pattern_parser.rs +++ b/src/rules/pattern_parser.rs @@ -187,8 +187,7 @@ fn build_pattern_tokens( // alternation so that flag-with-value and order-independent // matching work the same as for `-X|--request` style patterns. if let Some(&(j, next)) = iter.peek() { - if should_consume_as_value_strict(next, j + 1 < lex_tokens.len(), inside_group) - { + if should_consume_as_value(next, j + 1 < lex_tokens.len(), inside_group) { let (_, next_token) = iter.next().ok_or( PatternParseError::InvalidSyntax("unexpected end of tokens".into()), )?; @@ -358,26 +357,12 @@ fn should_consume_as_value(next: &LexToken, has_more_after: bool, inside_group: // Flags and the bare `--` separator must not be consumed as values. LexToken::Literal(s) if is_flag(s) || s == "--" => false, LexToken::Alternation(alts) if alts.iter().any(|a| is_flag(a)) => false, + LexToken::Placeholder(_) => false, LexToken::Wildcard => inside_group || has_more_after, _ => true, } } -/// Like [`should_consume_as_value`], but stricter: also refuses to consume -/// placeholder tokens as flag values. Used for bare flags (e.g. `-c`) where -/// the flag is written without alternation syntax and the next token may be a -/// wrapper placeholder (e.g. ``) rather than a flag value. -fn should_consume_as_value_strict( - next: &LexToken, - has_more_after: bool, - inside_group: bool, -) -> bool { - match next { - LexToken::Placeholder(_) => false, - _ => should_consume_as_value(next, has_more_after, inside_group), - } -} - /// Check if a string looks like a flag (starts with `-`). /// /// The bare double-dash `--` is excluded because it is a positional @@ -501,17 +486,13 @@ mod tests { PatternToken::Alternation(vec!["-f".into(), "--force".into()]), PatternToken::Wildcard, ])] - #[case::placeholder_value("cmd -o|--option ", "cmd", vec![ - PatternToken::FlagWithValue { - aliases: vec!["-o".into(), "--option".into()], - value: Box::new(PatternToken::Placeholder("cmd".into())), - }, + #[case::placeholder_not_consumed_as_flag_value("cmd -o|--option ", "cmd", vec![ + PatternToken::Alternation(vec!["-o".into(), "--option".into()]), + PatternToken::Placeholder("cmd".into()), ])] #[case::path_ref_value("cmd -c|--config ", "cmd", vec![ - PatternToken::FlagWithValue { - aliases: vec!["-c".into(), "--config".into()], - value: Box::new(PatternToken::PathRef("config".into())), - }, + PatternToken::Alternation(vec!["-c".into(), "--config".into()]), + PatternToken::PathRef("config".into()), ])] fn parse_flag_with_value( #[case] input: &str, @@ -646,6 +627,18 @@ mod tests { #[case::path_ref("cat ", "cat", vec![ PatternToken::PathRef("sensitive".into()), ])] + #[case::flag_alternation_then_placeholder( + r"find * -exec|-execdir|-ok|-okdir \;|+", + "find", + vec![ + PatternToken::Wildcard, + PatternToken::Alternation(vec![ + "-exec".into(), "-execdir".into(), "-ok".into(), "-okdir".into(), + ]), + PatternToken::Placeholder("cmd".into()), + PatternToken::Alternation(vec![r"\;".into(), "+".into()]), + ], + )] fn parse_placeholder( #[case] input: &str, #[case] expected_command: &str, diff --git a/tests/e2e/check_generic.rs b/tests/e2e/check_generic.rs index 3eaac2a..7a74ed2 100644 --- a/tests/e2e/check_generic.rs +++ b/tests/e2e/check_generic.rs @@ -1,5 +1,6 @@ use indoc::indoc; use rstest::{fixture, rstest}; +use serde_json::Value; use super::helpers::TestEnv; @@ -186,6 +187,38 @@ fn check_allow_with_sandbox_info() { assert_eq!(json["sandbox"]["preset"], "restricted"); } +// --- Wrapper with find -exec and placeholder --- + +#[rstest] +#[case::find_exec_deny(r#"{"command":"find . -exec rm -rf / \\;"}"#, "deny")] +#[case::find_exec_plus_deny(r#"{"command":"find . -exec rm -rf / +"}"#, "deny")] +#[case::find_execdir_allow(r#"{"command":"find . -execdir echo hello +"}"#, "allow")] +#[case::find_ok_deny(r#"{"command":"find /tmp -ok rm -rf / \\;"}"#, "deny")] +#[case::find_okdir_allow(r#"{"command":"find . -okdir ls -la +"}"#, "allow")] +fn check_find_exec_wrapper(#[case] stdin_json: &str, #[case] expected_decision: &str) { + let env = TestEnv::new(indoc! {r#" + rules: + - deny: 'rm *' + - allow: 'echo *' + - allow: 'ls *' + definitions: + wrappers: + - "find * -exec|-execdir|-ok|-okdir \\;|+" + "#}); + let assert = env + .command() + .args(["check", "--output-format", "json"]) + .write_stdin(stdin_json) + .assert(); + let output = assert.code(0).get_output().stdout.clone(); + let json: Value = + serde_json::from_slice(&output).unwrap_or_else(|e| panic!("invalid JSON: {e}")); + assert_eq!( + json["decision"], expected_decision, + "stdin: {stdin_json}, full output: {json}" + ); +} + // --- Command substitution in quoted strings --- #[fixture] diff --git a/tests/integration/wrapper_recursive_evaluation.rs b/tests/integration/wrapper_recursive_evaluation.rs index 5b2d9f9..6fda564 100644 --- a/tests/integration/wrapper_recursive_evaluation.rs +++ b/tests/integration/wrapper_recursive_evaluation.rs @@ -757,3 +757,35 @@ fn wrapper_compound_with_sandbox(empty_context: EvalContext) { assert_eq!(result.action, Action::Allow); assert_eq!(result.sandbox_preset.as_deref(), Some("py_sandbox")); } + +// ======================================== +// find -exec/-execdir wrapper: flag alternation followed by +// placeholder is parsed correctly, enabling recursive evaluation +// ======================================== + +#[rstest] +#[case::find_exec_rm_denied_semicolon("find . -exec rm -rf / \\;", assert_deny as ActionAssertion)] +#[case::find_exec_rm_denied_plus("find . -exec rm -rf / +", assert_deny as ActionAssertion)] +#[case::find_execdir_echo_allowed("find . -execdir echo hello +", assert_allow as ActionAssertion)] +#[case::find_ok_rm_denied("find /tmp -ok rm -rf / \\;", assert_deny as ActionAssertion)] +#[case::find_okdir_ls_allowed("find . -okdir ls -la +", assert_allow as ActionAssertion)] +#[case::find_exec_unmatched_default("find . -exec hg status +", assert_default as ActionAssertion)] +fn find_exec_wrapper_evaluates_inner( + #[case] command: &str, + #[case] expected: ActionAssertion, + empty_context: EvalContext, +) { + let config = parse_config(indoc! {" + rules: + - deny: 'rm -rf *' + - allow: 'echo *' + - allow: 'ls *' + definitions: + wrappers: + - 'find * -exec|-execdir|-ok|-okdir \\;|+' + "}) + .unwrap(); + + let result = evaluate_command(&config, command, &empty_context).unwrap(); + expected(&result.action); +}