From e4d29cb9a86ab27c0123fdfa8ac022159e24026e Mon Sep 17 00:00:00 2001 From: Hayato Kawai Date: Sat, 14 Mar 2026 00:40:42 +0900 Subject: [PATCH 1/4] feat(rules): add typed variable definitions (`definitions.vars`) with `` placeholder Pattern matching lacked a way to match command arguments against a typed list of allowed values. Long alternations (e.g., `a|b|c|d`) had to be inlined in patterns, and path arguments couldn't be canonicalized for robust matching across different path forms. Add `definitions.vars` with typed variable definitions: - `type: literal` (default): exact string match - `type: path`: canonicalize both sides before comparison, with normalize_path fallback when the file doesn't exist - `` placeholder in rule patterns - Config validation, merge strategy, and JSON schema - `vars` context variable in CEL expressions Co-Authored-By: Claude Opus 4.6 --- docs/src/content/docs/configuration/schema.md | 53 +++- .../content/docs/pattern-syntax/overview.md | 39 +-- .../docs/pattern-syntax/placeholders.md | 85 +++++- schema/runok.schema.json | 45 +++ src/config/model.rs | 281 ++++++++++++++++++ src/rules/expr_evaluator.rs | 5 + src/rules/pattern_matcher/mod.rs | 24 ++ src/rules/pattern_matcher/token_matching.rs | 138 +++++++++ src/rules/pattern_parser.rs | 17 ++ src/rules/rule_engine.rs | 1 + tests/integration/main.rs | 1 + tests/integration/var_ref_evaluation.rs | 231 ++++++++++++++ 12 files changed, 893 insertions(+), 27 deletions(-) create mode 100644 tests/integration/var_ref_evaluation.rs diff --git a/docs/src/content/docs/configuration/schema.md b/docs/src/content/docs/configuration/schema.md index ffc1609..82ffed2 100644 --- a/docs/src/content/docs/configuration/schema.md +++ b/docs/src/content/docs/configuration/schema.md @@ -185,7 +185,7 @@ Name of a sandbox preset (defined in `definitions.sandbox`) to apply when this r ### `definitions` -Reusable definitions for paths, sandbox presets, wrappers, and commands. +Reusable definitions for paths, variables, sandbox presets, wrappers, and commands. **Type:** `object`\ **Default:** `{}`\ @@ -311,6 +311,51 @@ definitions: - mycustomtool ``` +#### `definitions.vars` + +Typed variable definitions referenced by `` in rule patterns. Each variable has a `type` (controlling how values are matched) and a list of `values`. + +**Type:** `map[str, VarDefinition]`\ +**Default:** `{}` + +```yaml title="runok.yml" +definitions: + vars: + instance-ids: + values: + - i-abc123 + - i-def456 + test-script: + type: path + values: + - ./tests/run +``` + +##### Variable Definition Fields + +###### `type` + +Controls how the variable's values are matched against command arguments. + +**Type:** `"literal" | "path"`\ +**Default:** `"literal"` + +| Type | Matching behavior | +| --------- | ------------------------------------------------------------------------- | +| `literal` | Exact string match | +| `path` | Canonicalize both sides before comparison, fallback to path normalization | + +###### `values` + +List of allowed values for this variable. + +**Type:** `list[str]`\ +**Required:** Yes + +:::note +Variable definitions must contain concrete values. `` or `` references inside `definitions.vars` values are not allowed. +::: + ### `audit` Audit log settings. Controls whether command evaluations are recorded and where log files are stored. Audit settings can only be configured in the **global** `runok.yml` — audit sections in project or local override configs are silently ignored. @@ -370,6 +415,12 @@ definitions: secrets: - ~/.ssh - ~/.gnupg + vars: + safe-scripts: + type: path + values: + - ./tests/run + - ./scripts/lint.sh sandbox: standard: fs: diff --git a/docs/src/content/docs/pattern-syntax/overview.md b/docs/src/content/docs/pattern-syntax/overview.md index 18caa0b..1841a21 100644 --- a/docs/src/content/docs/pattern-syntax/overview.md +++ b/docs/src/content/docs/pattern-syntax/overview.md @@ -11,30 +11,31 @@ Patterns are parsed exactly as written, with no hidden rewriting or implicit tra ## Syntax Elements -| Syntax | Example | Description | -| ----------------------------------------------------------------------------- | ------------------------------- | ------------------------------------------------------------ | -| Literal | `git status` | Exact token match | -| [Wildcard](/pattern-syntax/wildcards/) | `git *` | Zero or more tokens | -| [Glob](/pattern-syntax/wildcards/#glob-patterns) | `list-*`, `*.txt` | `*` inside a literal matches zero or more characters | -| [Alternation](/pattern-syntax/alternation/) | `-X\|--request`, `main\|master` | Pipe-separated alternatives | -| [Negation](/pattern-syntax/alternation/#negation) | `!GET`, `!describe\|get` | Matches anything except the specified value(s) | -| [Optional group](/pattern-syntax/optional-groups/) | `[-f]`, `[-X POST]` | Matches with or without the group | -| [Flag with value](/pattern-syntax/matching-behavior/#flag-schema-inference) | `-X\|--request POST` | A flag-value pair matched in any order | -| [Placeholder](/pattern-syntax/placeholders/) | ``, ``, `` | Special tokens in `<...>` with various behaviors (see below) | -| Backslash escape | `\;` | Literal match after removing the backslash | -| Quoted literal | `"WIP*"`, `'hello'` | Exact match without glob expansion | -| [Multi-word alternation](/pattern-syntax/alternation/#multi-word-alternation) | `"npx prettier"\|prettier` | Alternatives that include multi-word commands | +| Syntax | Example | Description | +| ----------------------------------------------------------------------------- | -------------------------------------------- | ------------------------------------------------------------ | +| Literal | `git status` | Exact token match | +| [Wildcard](/pattern-syntax/wildcards/) | `git *` | Zero or more tokens | +| [Glob](/pattern-syntax/wildcards/#glob-patterns) | `list-*`, `*.txt` | `*` inside a literal matches zero or more characters | +| [Alternation](/pattern-syntax/alternation/) | `-X\|--request`, `main\|master` | Pipe-separated alternatives | +| [Negation](/pattern-syntax/alternation/#negation) | `!GET`, `!describe\|get` | Matches anything except the specified value(s) | +| [Optional group](/pattern-syntax/optional-groups/) | `[-f]`, `[-X POST]` | Matches with or without the group | +| [Flag with value](/pattern-syntax/matching-behavior/#flag-schema-inference) | `-X\|--request POST` | A flag-value pair matched in any order | +| [Placeholder](/pattern-syntax/placeholders/) | ``, ``, ``, `` | Special tokens in `<...>` with various behaviors (see below) | +| Backslash escape | `\;` | Literal match after removing the backslash | +| Quoted literal | `"WIP*"`, `'hello'` | Exact match without glob expansion | +| [Multi-word alternation](/pattern-syntax/alternation/#multi-word-alternation) | `"npx prettier"\|prettier` | Alternatives that include multi-word commands | ### Placeholders Tokens wrapped in `<...>` are **placeholders** — special tokens that match dynamically rather than by exact string comparison. Each placeholder type has different matching behavior: -| Placeholder | Example | Description | Details | -| ------------- | ---------------------- | --------------------------------------------------------- | ------------------------------------------------------------------------- | -| `` | `sudo ` | Captures the wrapped command for further rule evaluation | [Command](/pattern-syntax/placeholders/#command-cmd) | -| `` | `env ` | Absorbs zero or more flag-like tokens (starting with `-`) | [Options](/pattern-syntax/placeholders/#options-opts) | -| `` | `env ` | Absorbs zero or more `KEY=VALUE` tokens | [Variables](/pattern-syntax/placeholders/#variables-vars) | -| `` | `cat ` | Matches against a named list of paths from `definitions` | [Path References](/pattern-syntax/placeholders/#path-references-pathname) | +| Placeholder | Example | Description | Details | +| ------------- | ---------------------- | --------------------------------------------------------- | -------------------------------------------------------------------------------- | +| `` | `sudo ` | Captures the wrapped command for further rule evaluation | [Command](/pattern-syntax/placeholders/#command-cmd) | +| `` | `env ` | Absorbs zero or more flag-like tokens (starting with `-`) | [Options](/pattern-syntax/placeholders/#options-opts) | +| `` | `env ` | Absorbs zero or more `KEY=VALUE` tokens | [Variables](/pattern-syntax/placeholders/#variables-vars) | +| `` | `cat ` | Matches against a named list of paths from `definitions` | [Path References](/pattern-syntax/placeholders/#path-references-pathname) | +| `` | `cmd ` | Matches against a typed variable definition | [Variable References](/pattern-syntax/placeholders/#variable-references-varname) | ## Pattern Structure diff --git a/docs/src/content/docs/pattern-syntax/placeholders.md b/docs/src/content/docs/pattern-syntax/placeholders.md index fa9f2a4..18d46df 100644 --- a/docs/src/content/docs/pattern-syntax/placeholders.md +++ b/docs/src/content/docs/pattern-syntax/placeholders.md @@ -7,12 +7,13 @@ sidebar: Tokens wrapped in `<...>` are **placeholders** — special tokens that match dynamically rather than by exact string comparison. -| Placeholder | Description | -| ------------------------------------------ | -------------------------------------------------------- | -| [``](#command-cmd) | Captures the wrapped command for further rule evaluation | -| [``](#options-opts) | Absorbs zero or more flag-like tokens | -| [``](#variables-vars) | Absorbs zero or more `KEY=VALUE` tokens | -| [``](#path-references-pathname) | Matches against a named list of paths | +| Placeholder | Description | +| -------------------------------------------- | -------------------------------------------------------- | +| [``](#command-cmd) | Captures the wrapped command for further rule evaluation | +| [``](#options-opts) | Absorbs zero or more flag-like tokens | +| [``](#variables-vars) | Absorbs zero or more `KEY=VALUE` tokens | +| [``](#path-references-pathname) | Matches against a named list of paths | +| [``](#variable-references-varname) | Matches against a typed variable definition | ## Command (``) @@ -146,6 +147,76 @@ If a pattern references a path name that is not defined in `definitions.paths`, - deny: 'cat ' ``` +## Variable References (``) + +The `` placeholder matches a command argument against a **typed variable definition** in the [`definitions.vars`](/configuration/schema/#definitionsvars) block. + +### Defining Variables + +Each variable has an optional `type` (default: `literal`) and a list of `values`: + +```yaml +definitions: + vars: + instance-ids: + values: + - i-abc123 + - i-def456 + - i-ghi789 + test-script: + type: path + values: + - ./tests/run +``` + +### Variable Types + +| Type | Matching behavior | +| --------- | ------------------------------------------------------------------------- | +| `literal` | Exact string match (default) | +| `path` | Canonicalize both sides before comparison, fallback to path normalization | + +### Using Variable References + +```yaml +rules: + - allow: aws ec2 terminate-instances --instance-ids + - allow: bash +``` + +| Command | Rule | Result | +| ------------------------------------------------------ | ------------------------------------------------ | ---------------------------- | +| `aws ec2 terminate-instances --instance-ids i-abc123` | `allow: "... --instance-ids "` | Allowed | +| `aws ec2 terminate-instances --instance-ids i-UNKNOWN` | `allow: "... --instance-ids "` | No match | +| `bash ./tests/run` | `allow: "bash "` | Allowed | +| `bash tests/run` | `allow: "bash "` | Allowed (path normalization) | + +### Path Type Normalization + +When `type: path` is set, both the command argument and the defined values are **canonicalized** (resolved to absolute paths via the filesystem). If the path does not exist on disk, logical normalization is used as a fallback (`.` removal and `..` resolution). + +This handles cases where the same file is referenced with different path forms: + +```yaml +definitions: + vars: + test-script: + type: path + values: + - ./tests/run +``` + +| Command | Matches `` | +| --------------------------- | --------------------------- | +| `bash tests/run` | Yes | +| `bash ./tests/run` | Yes | +| `bash ./tests/../tests/run` | Yes | +| `bash ./scripts/deploy` | No | + +### Undefined Variable Names + +If a pattern references a variable name that is not defined in `definitions.vars`, the pattern **never matches**. + ## Combining Placeholders Placeholders can be combined to handle complex wrapper patterns: @@ -173,7 +244,7 @@ In the `find` wrapper example, `\\;` is a backslash-escaped semicolon in YAML. T ## Restrictions - `` captures one or more tokens; it tries all possible split points to find a valid wrapped command -- Optional groups and path references are not supported inside wrapper patterns +- Optional groups, path references, and variable references are not supported inside wrapper patterns ## Related diff --git a/schema/runok.schema.json b/schema/runok.schema.json index c8d3ee4..d01746c 100644 --- a/schema/runok.schema.json +++ b/schema/runok.schema.json @@ -160,6 +160,16 @@ "$ref": "#/$defs/SandboxPreset" } }, + "vars": { + "description": "Typed variable definitions referenced by `` in rule patterns.", + "type": [ + "object", + "null" + ], + "additionalProperties": { + "$ref": "#/$defs/VarDefinition" + } + }, "wrappers": { "description": "Wrapper command patterns for recursive evaluation (e.g., `sudo `).", "type": [ @@ -374,6 +384,41 @@ ] } } + }, + "VarDefinition": { + "description": "A typed variable definition with a list of allowed values.", + "type": "object", + "properties": { + "type": { + "description": "The type of this variable (default: `literal`).", + "$ref": "#/$defs/VarType" + }, + "values": { + "description": "Allowed values for this variable.", + "type": "array", + "items": { + "type": "string" + } + } + }, + "required": [ + "values" + ] + }, + "VarType": { + "description": "Type of a variable definition, controlling how values are matched.", + "oneOf": [ + { + "description": "Exact string match (default).", + "type": "string", + "const": "literal" + }, + { + "description": "Path match: canonicalize both sides before comparison,\nfalling back to `normalize_path` when the file does not exist.", + "type": "string", + "const": "path" + } + ] } } } diff --git a/src/config/model.rs b/src/config/model.rs index ce7f972..f64c2c6 100644 --- a/src/config/model.rs +++ b/src/config/model.rs @@ -88,6 +88,32 @@ pub struct Definitions { pub wrappers: Option>, /// Additional command patterns to recognize. pub commands: Option>, + /// Typed variable definitions referenced by `` in rule patterns. + pub vars: Option>, +} + +/// Type of a variable definition, controlling how values are matched. +#[derive(Debug, Deserialize, Default, Clone, Copy, PartialEq, Eq)] +#[cfg_attr(any(feature = "config-schema", test), derive(JsonSchema))] +#[serde(rename_all = "lowercase")] +pub enum VarType { + /// Exact string match (default). + #[default] + Literal, + /// Path match: canonicalize both sides before comparison, + /// falling back to `normalize_path` when the file does not exist. + Path, +} + +/// A typed variable definition with a list of allowed values. +#[derive(Debug, Deserialize, Clone, PartialEq)] +#[cfg_attr(any(feature = "config-schema", test), derive(JsonSchema))] +pub struct VarDefinition { + /// The type of this variable (default: `literal`). + #[serde(default, rename = "type")] + pub var_type: VarType, + /// Allowed values for this variable. + pub values: Vec, } /// Sandbox preset defining filesystem and network restrictions. @@ -270,6 +296,24 @@ impl Config { } } + // Reject and references inside definitions.vars values. + if let Some(defs) = &self.definitions + && let Some(vars) = &defs.vars + { + for (key, var_def) in vars { + for value in &var_def.values { + if (value.starts_with("') + { + errors.push(format!( + "definitions.vars.{key}: value '{value}' contains a placeholder \ + reference. Variable definitions must contain concrete values, not references" + )); + } + } + } + } + let rules = match &self.rules { Some(rules) => rules, None => { @@ -367,6 +411,7 @@ impl Config { sandbox: Self::merge_hashmaps(b.sandbox, o.sandbox), wrappers: Self::merge_vecs(b.wrappers, o.wrappers), commands: Self::merge_vecs(b.commands, o.commands), + vars: Self::merge_vars(b.vars, o.vars), }), } } @@ -389,6 +434,20 @@ impl Config { } } + /// Merge vars with per-key override strategy: the override wins for each key. + fn merge_vars( + base: Option>, + over: Option>, + ) -> Option> { + match (base, over) { + (Some(mut b), Some(o)) => { + b.extend(o); + Some(b) + } + (b, o) => b.or(o), + } + } + fn merge_hashmaps( base: Option>, over: Option>, @@ -2066,6 +2125,228 @@ mod tests { assert!(result.network_allowed); } + // === definitions.vars === + + #[test] + fn parse_definitions_vars_literal() { + let config = parse_config(indoc! {" + definitions: + vars: + instance-ids: + type: literal + values: + - i-abc123 + - i-def456 + "}) + .unwrap(); + let vars = config.definitions.unwrap().vars.unwrap(); + let var_def = &vars["instance-ids"]; + assert_eq!(var_def.var_type, VarType::Literal); + assert_eq!(var_def.values, vec!["i-abc123", "i-def456"]); + } + + #[test] + fn parse_definitions_vars_path() { + let config = parse_config(indoc! {" + definitions: + vars: + test-scripts: + type: path + values: + - ./tests/run + - ./scripts/test.sh + "}) + .unwrap(); + let vars = config.definitions.unwrap().vars.unwrap(); + let var_def = &vars["test-scripts"]; + assert_eq!(var_def.var_type, VarType::Path); + assert_eq!(var_def.values, vec!["./tests/run", "./scripts/test.sh"]); + } + + #[test] + fn parse_definitions_vars_default_type_is_literal() { + let config = parse_config(indoc! {" + definitions: + vars: + regions: + values: + - us-east-1 + - eu-west-1 + "}) + .unwrap(); + let vars = config.definitions.unwrap().vars.unwrap(); + let var_def = &vars["regions"]; + assert_eq!(var_def.var_type, VarType::Literal); + assert_eq!(var_def.values, vec!["us-east-1", "eu-west-1"]); + } + + #[test] + fn parse_definitions_vars_multiple_vars() { + let config = parse_config(indoc! {" + definitions: + vars: + regions: + values: [us-east-1] + instance-ids: + type: literal + values: [i-abc123] + "}) + .unwrap(); + let vars = config.definitions.unwrap().vars.unwrap(); + assert!(vars.contains_key("regions")); + assert!(vars.contains_key("instance-ids")); + } + + #[test] + fn validate_rejects_var_ref_in_definitions_vars() { + let mut config = parse_config(indoc! {" + definitions: + vars: + ids: + values: + - i-abc123 + - '' + other-ids: + values: + - i-xyz999 + "}) + .unwrap(); + let err = config.validate().unwrap_err(); + assert!( + err.to_string().contains("definitions.vars.ids"), + "error should mention the var key: {}", + err + ); + assert!( + err.to_string().contains("concrete values, not references"), + "error should explain the constraint: {}", + err + ); + } + + #[test] + fn validate_rejects_path_ref_in_definitions_vars() { + let mut config = parse_config(indoc! {" + definitions: + vars: + scripts: + type: path + values: + - ./run.sh + - '' + paths: + sensitive: + - /etc/passwd + "}) + .unwrap(); + let err = config.validate().unwrap_err(); + assert!( + err.to_string().contains("definitions.vars.scripts"), + "error should mention the var key: {}", + err + ); + } + + #[test] + fn validate_accepts_valid_vars() { + let mut config = parse_config(indoc! {" + definitions: + vars: + ids: + values: + - i-abc123 + - i-def456 + "}) + .unwrap(); + assert!(config.validate().is_ok()); + } + + #[test] + fn merge_definitions_vars_override_per_key() { + let base = Config { + definitions: Some(Definitions { + vars: Some(HashMap::from([ + ( + "ids".to_string(), + VarDefinition { + var_type: VarType::Literal, + values: vec!["i-abc123".to_string()], + }, + ), + ( + "regions".to_string(), + VarDefinition { + var_type: VarType::Literal, + values: vec!["us-east-1".to_string()], + }, + ), + ])), + ..Definitions::default() + }), + ..Config::default() + }; + let over = Config { + definitions: Some(Definitions { + vars: Some(HashMap::from([( + "ids".to_string(), + VarDefinition { + var_type: VarType::Literal, + values: vec!["i-xyz999".to_string()], + }, + )])), + ..Definitions::default() + }), + ..Config::default() + }; + let result = base.merge(over); + let vars = result.definitions.unwrap().vars.unwrap(); + // "ids" is overridden by the override config + assert_eq!(vars["ids"].values, vec!["i-xyz999"]); + // "regions" is preserved from base + assert_eq!(vars["regions"].values, vec!["us-east-1"]); + } + + #[test] + fn merge_definitions_vars_base_only() { + let base = Config { + definitions: Some(Definitions { + vars: Some(HashMap::from([( + "ids".to_string(), + VarDefinition { + var_type: VarType::Literal, + values: vec!["i-abc123".to_string()], + }, + )])), + ..Definitions::default() + }), + ..Config::default() + }; + let result = base.merge(Config::default()); + let vars = result.definitions.unwrap().vars.unwrap(); + assert_eq!(vars["ids"].values, vec!["i-abc123"]); + } + + #[test] + fn merge_definitions_vars_override_only() { + let over = Config { + definitions: Some(Definitions { + vars: Some(HashMap::from([( + "ids".to_string(), + VarDefinition { + var_type: VarType::Path, + values: vec!["./run.sh".to_string()], + }, + )])), + ..Definitions::default() + }), + ..Config::default() + }; + let result = Config::default().merge(over); + let vars = result.definitions.unwrap().vars.unwrap(); + assert_eq!(vars["ids"].var_type, VarType::Path); + assert_eq!(vars["ids"].values, vec!["./run.sh"]); + } + // === JSON Schema === const SCHEMA_PATH: &str = concat!(env!("CARGO_MANIFEST_DIR"), "/schema/runok.schema.json"); diff --git a/src/rules/expr_evaluator.rs b/src/rules/expr_evaluator.rs index 216822b..f17ffed 100644 --- a/src/rules/expr_evaluator.rs +++ b/src/rules/expr_evaluator.rs @@ -9,6 +9,7 @@ pub struct ExprContext { pub flags: HashMap>, pub args: Vec, pub paths: HashMap>, + pub vars: HashMap, } /// Evaluates a CEL expression against a given context, returning a boolean result. @@ -41,6 +42,8 @@ pub fn evaluate(expr: &str, context: &ExprContext) -> Result { .add_variable("paths", &context.paths) .map_err(|e| ExprError::Eval(e.to_string()))?; + cel_context.add_variable_from_value("vars", context.vars.clone()); + let result = program .execute(&cel_context) .map_err(|e| ExprError::Eval(e.to_string()))?; @@ -62,6 +65,7 @@ mod tests { flags: HashMap::new(), args: Vec::new(), paths: HashMap::new(), + vars: HashMap::new(), } } @@ -204,6 +208,7 @@ mod tests { flags: HashMap::from([("method".to_string(), Some("POST".to_string()))]), args: vec!["https://prod.example.com/api".to_string()], paths: HashMap::new(), + vars: HashMap::new(), }; assert!( evaluate( diff --git a/src/rules/pattern_matcher/mod.rs b/src/rules/pattern_matcher/mod.rs index 9957b1d..1f5c9e2 100644 --- a/src/rules/pattern_matcher/mod.rs +++ b/src/rules/pattern_matcher/mod.rs @@ -701,6 +701,30 @@ fn match_engine<'a>( } } + PatternToken::VarRef(name) => { + if is_extract { + return Err(RuleError::UnsupportedWrapperToken(format!( + "VarRef ()" + ))); + } + if cmd_tokens.is_empty() { + return Ok(false); + } + if token_matching::match_var_ref(name, cmd_tokens[0], definitions) { + match_engine( + rest, + &cmd_tokens[1..], + definitions, + steps, + captures, + None, + after_double_dash, + ) + } else { + Ok(false) + } + } + PatternToken::Placeholder(name) => { if let Some((captured, all_candidates)) = &mut extract { // Wrapper placeholder extraction mode diff --git a/src/rules/pattern_matcher/token_matching.rs b/src/rules/pattern_matcher/token_matching.rs index c34db92..4463f3c 100644 --- a/src/rules/pattern_matcher/token_matching.rs +++ b/src/rules/pattern_matcher/token_matching.rs @@ -148,6 +148,7 @@ pub(crate) fn match_single_token( let normalized_cmd = normalize_path(cmd_token); paths.iter().any(|p| normalize_path(p) == normalized_cmd) } + PatternToken::VarRef(name) => match_var_ref(name, cmd_token, definitions), PatternToken::Placeholder(_) => true, // FlagWithValue, Optional, Opts, and Vars don't make sense as single-token matches PatternToken::FlagWithValue { .. } @@ -219,6 +220,45 @@ pub(crate) fn resolve_paths<'a>(name: &str, definitions: &'a Definitions) -> &'a .unwrap_or(&[]) } +/// Resolve a variable definition from `definitions.vars`. +pub(crate) fn resolve_var<'a>( + name: &str, + definitions: &'a Definitions, +) -> Option<&'a crate::config::VarDefinition> { + definitions.vars.as_ref().and_then(|vars| vars.get(name)) +} + +/// Match a `` reference against a command token. +/// +/// Matching strategy depends on the variable's type: +/// - `literal`: exact string match against each value. +/// - `path`: canonicalize both sides (falling back to `normalize_path` when +/// the file does not exist) and compare. +pub(crate) fn match_var_ref(name: &str, cmd_token: &str, definitions: &Definitions) -> bool { + let Some(var_def) = resolve_var(name, definitions) else { + return false; + }; + match var_def.var_type { + crate::config::VarType::Literal => var_def.values.iter().any(|v| v == cmd_token), + crate::config::VarType::Path => { + let cmd_canonical = canonicalize_or_normalize(cmd_token); + var_def + .values + .iter() + .any(|v| canonicalize_or_normalize(v) == cmd_canonical) + } + } +} + +/// Try to canonicalize a path via the filesystem; fall back to logical +/// normalization when the path does not exist. +fn canonicalize_or_normalize(path: &str) -> String { + match std::fs::canonicalize(path) { + Ok(p) => p.to_string_lossy().into_owned(), + Err(_) => normalize_path(path), + } +} + #[cfg(test)] mod tests { use super::*; @@ -284,4 +324,102 @@ mod tests { fn normalize_path_cases(#[case] input: &str, #[case] expected: &str) { assert_eq!(normalize_path(input), expected); } + + // === match_var_ref / match_single_token with VarRef === + + use crate::config::{Definitions, VarDefinition, VarType}; + use std::collections::HashMap; + + #[rstest] + #[case::literal_match("i-abc123", true)] + #[case::literal_match_second_value("i-def456", true)] + #[case::literal_no_match("i-xyz999", false)] + #[case::literal_empty_string("", false)] + fn match_var_ref_literal(#[case] cmd_token: &str, #[case] expected: bool) { + let definitions = Definitions { + vars: Some(HashMap::from([( + "instance-ids".to_string(), + VarDefinition { + var_type: VarType::Literal, + values: vec!["i-abc123".into(), "i-def456".into()], + }, + )])), + ..Default::default() + }; + assert_eq!( + match_single_token( + &PatternToken::VarRef("instance-ids".into()), + cmd_token, + &definitions + ), + expected, + ); + } + + #[test] + fn match_var_ref_undefined() { + let definitions = Definitions::default(); + assert!(!match_single_token( + &PatternToken::VarRef("nonexistent".into()), + "anything", + &definitions, + )); + } + + #[rstest] + #[case::exact_match("tests/run", true)] + #[case::dot_prefix("./tests/run", true)] + #[case::no_match("other/file", false)] + fn match_var_ref_path(#[case] cmd_token: &str, #[case] expected: bool) { + let definitions = Definitions { + vars: Some(HashMap::from([( + "test-script".to_string(), + VarDefinition { + var_type: VarType::Path, + values: vec!["./tests/run".into()], + }, + )])), + ..Default::default() + }; + assert_eq!( + match_single_token( + &PatternToken::VarRef("test-script".into()), + cmd_token, + &definitions + ), + expected, + ); + } + + #[test] + fn match_var_ref_empty_values() { + let definitions = Definitions { + vars: Some(HashMap::from([( + "empty".to_string(), + VarDefinition { + var_type: VarType::Literal, + values: vec![], + }, + )])), + ..Default::default() + }; + assert!(!match_single_token( + &PatternToken::VarRef("empty".into()), + "anything", + &definitions, + )); + } + + #[test] + fn match_var_ref_no_vars_section() { + let definitions = Definitions { + vars: None, + ..Default::default() + }; + assert!(!match_single_token( + &PatternToken::VarRef("anything".into()), + "value", + &definitions, + )); + } } diff --git a/src/rules/pattern_parser.rs b/src/rules/pattern_parser.rs index 53cdd71..dd47506 100644 --- a/src/rules/pattern_parser.rs +++ b/src/rules/pattern_parser.rs @@ -56,6 +56,8 @@ pub enum PatternToken { Wildcard, /// Path variable reference (e.g., ) PathRef(String), + /// Typed variable reference (e.g., ) + VarRef(String), /// Wrapper placeholder (e.g., ) Placeholder(String), /// Options placeholder for wrapper patterns (e.g., ). @@ -73,6 +75,7 @@ pub enum PatternToken { /// Remaining tokens are converted to PatternToken variants based on syntax: /// - `*` -> Wildcard /// - `` -> PathRef +/// - `` -> VarRef /// - `` -> Placeholder (single word, no pipe, no colon) /// - `!value` -> Negation /// - `[...]` -> Optional group @@ -328,6 +331,10 @@ fn parse_placeholder(content: &str) -> Result", "cat", vec![ + PatternToken::VarRef("instance-ids".into()), + ])] + #[case::var_ref_after_flag("aws ec2 terminate-instances --instance-ids *", "aws", vec![ + PatternToken::Literal("ec2".into()), + PatternToken::Literal("terminate-instances".into()), + PatternToken::Alternation(vec!["--instance-ids".into()]), + PatternToken::VarRef("instance-ids".into()), + PatternToken::Wildcard, + ])] fn parse_placeholder( #[case] input: &str, #[case] expected_command: &str, diff --git a/src/rules/rule_engine.rs b/src/rules/rule_engine.rs index 3129607..28f7682 100644 --- a/src/rules/rule_engine.rs +++ b/src/rules/rule_engine.rs @@ -659,6 +659,7 @@ fn build_expr_context( flags, args: parsed_command.args.clone(), paths, + vars: HashMap::new(), } } diff --git a/tests/integration/main.rs b/tests/integration/main.rs index b4d45c9..fa89073 100644 --- a/tests/integration/main.rs +++ b/tests/integration/main.rs @@ -7,6 +7,7 @@ mod optional_notation_and_path_ref; mod path_resolution; mod property_based; mod remote_preset_with_path; +mod var_ref_evaluation; mod when_clause_rules; mod wrapper_recursive_evaluation; diff --git a/tests/integration/var_ref_evaluation.rs b/tests/integration/var_ref_evaluation.rs new file mode 100644 index 0000000..c2cd55b --- /dev/null +++ b/tests/integration/var_ref_evaluation.rs @@ -0,0 +1,231 @@ +use super::{ActionAssertion, assert_allow, assert_ask, assert_deny, empty_context}; + +use indoc::indoc; +use rstest::rstest; +use runok::config::parse_config; +use runok::rules::rule_engine::{EvalContext, evaluate_command}; + +// ======================================== +// with type: literal +// ======================================== + +#[rstest] +#[case::literal_match( + indoc! {" + definitions: + vars: + instance-ids: + values: + - i-abc123 + - i-def456 + - i-ghi789 + rules: + - allow: aws ec2 terminate-instances --instance-ids + "}, + "aws ec2 terminate-instances --instance-ids i-abc123", + assert_allow as ActionAssertion, +)] +#[case::literal_second_value( + indoc! {" + definitions: + vars: + instance-ids: + values: + - i-abc123 + - i-def456 + rules: + - allow: aws ec2 terminate-instances --instance-ids + "}, + "aws ec2 terminate-instances --instance-ids i-def456", + assert_allow as ActionAssertion, +)] +#[case::literal_no_match( + indoc! {" + definitions: + vars: + instance-ids: + values: + - i-abc123 + - i-def456 + rules: + - allow: aws ec2 terminate-instances --instance-ids + "}, + "aws ec2 terminate-instances --instance-ids i-UNKNOWN", + assert_ask as ActionAssertion, +)] +#[case::literal_explicit_type( + indoc! {" + definitions: + vars: + regions: + type: literal + values: + - us-east-1 + - eu-west-1 + rules: + - allow: aws --region * + "}, + "aws --region us-east-1 s3 ls", + assert_allow as ActionAssertion, +)] +fn var_ref_literal( + #[case] yaml: &str, + #[case] command: &str, + #[case] expected: ActionAssertion, + empty_context: EvalContext, +) { + let config = parse_config(yaml).unwrap(); + let result = evaluate_command(&config, command, &empty_context).unwrap(); + expected(&result.action); +} + +// ======================================== +// with type: path +// ======================================== + +#[rstest] +#[case::path_exact( + indoc! {" + definitions: + vars: + test-script: + type: path + values: + - ./tests/run + rules: + - allow: bash + "}, + "bash tests/run", + assert_allow as ActionAssertion, +)] +#[case::path_dot_prefix( + indoc! {" + definitions: + vars: + test-script: + type: path + values: + - ./tests/run + rules: + - allow: bash + "}, + "bash ./tests/run", + assert_allow as ActionAssertion, +)] +#[case::path_no_match( + indoc! {" + definitions: + vars: + test-script: + type: path + values: + - ./tests/run + rules: + - allow: bash + "}, + "bash ./scripts/deploy", + assert_ask as ActionAssertion, +)] +#[case::path_dotdot_resolution( + indoc! {" + definitions: + vars: + test-script: + type: path + values: + - ./tests/run + rules: + - allow: bash + "}, + "bash ./tests/../tests/run", + assert_allow as ActionAssertion, +)] +fn var_ref_path( + #[case] yaml: &str, + #[case] command: &str, + #[case] expected: ActionAssertion, + empty_context: EvalContext, +) { + let config = parse_config(yaml).unwrap(); + let result = evaluate_command(&config, command, &empty_context).unwrap(); + expected(&result.action); +} + +// ======================================== +// in deny rules +// ======================================== + +#[rstest] +#[case::deny_matching_var( + indoc! {" + definitions: + vars: + dangerous-instances: + values: + - i-prod-001 + - i-prod-002 + rules: + - deny: aws ec2 terminate-instances --instance-ids + "}, + "aws ec2 terminate-instances --instance-ids i-prod-001", + assert_deny as ActionAssertion, +)] +#[case::deny_non_matching_var_falls_through( + indoc! {" + definitions: + vars: + dangerous-instances: + values: + - i-prod-001 + rules: + - deny: aws ec2 terminate-instances --instance-ids + - allow: aws * + "}, + "aws ec2 terminate-instances --instance-ids i-dev-001", + assert_allow as ActionAssertion, +)] +fn var_ref_deny( + #[case] yaml: &str, + #[case] command: &str, + #[case] expected: ActionAssertion, + empty_context: EvalContext, +) { + let config = parse_config(yaml).unwrap(); + let result = evaluate_command(&config, command, &empty_context).unwrap(); + expected(&result.action); +} + +// ======================================== +// coexists with +// ======================================== + +#[rstest] +fn var_ref_and_path_ref_coexist(empty_context: EvalContext) { + let yaml = indoc! {" + definitions: + paths: + sensitive: + - /etc/passwd + vars: + safe-scripts: + type: path + values: + - ./tests/run + rules: + - deny: cat + - allow: bash + "}; + let config = parse_config(yaml).unwrap(); + + let deny_result = evaluate_command(&config, "cat /etc/passwd", &empty_context).unwrap(); + assert!(matches!( + deny_result.action, + runok::rules::rule_engine::Action::Deny(_) + )); + + let allow_result = evaluate_command(&config, "bash ./tests/run", &empty_context).unwrap(); + assert_eq!( + allow_result.action, + runok::rules::rule_engine::Action::Allow + ); +} From dfbbba80a14f25f86fcdb68ec8f18377eb51eb9f Mon Sep 17 00:00:00 2001 From: Hayato Kawai Date: Sat, 14 Mar 2026 01:34:09 +0900 Subject: [PATCH 2/4] feat(rules): capture `` matched values into CEL `vars` context Pattern matching with `` placeholders only validated whether a command token was in the allowed values list, but did not capture the actual matched value. This meant `when` clauses could not inspect which specific value was matched, limiting conditional deny rules. Add `MatchCaptures` struct to carry both wildcard and var captures from pattern matching. Thread a `RefCell` through `match_engine` to accumulate var captures during recursive matching, then populate `ExprContext.vars` so CEL expressions can reference captured values. --- .../docs/rule-evaluation/when-clause.md | 44 +++++- src/rules/expr_evaluator.rs | 39 +++++ src/rules/pattern_matcher/mod.rs | 136 +++++++++++++++++- src/rules/rule_engine.rs | 16 ++- tests/integration/var_ref_evaluation.rs | 84 +++++++++++ 5 files changed, 304 insertions(+), 15 deletions(-) diff --git a/docs/src/content/docs/rule-evaluation/when-clause.md b/docs/src/content/docs/rule-evaluation/when-clause.md index 1f546f1..ddaffe5 100644 --- a/docs/src/content/docs/rule-evaluation/when-clause.md +++ b/docs/src/content/docs/rule-evaluation/when-clause.md @@ -23,7 +23,7 @@ CEL expressions must evaluate to a **boolean** (`true` or `false`). If the expre ## Context variables -Four context variables are available inside `when` expressions: +Five context variables are available inside `when` expressions: ### `env` — Environment variables @@ -94,6 +94,48 @@ rules: The `paths` variable is most useful for checking properties of the defined path list itself (e.g., its size), since the `` pattern already handles matching individual files against the list. +### `vars` -- Captured variable values + +A map of values captured by `` placeholders in the matched pattern. When a pattern contains `` and matches a command token, the matched token value is stored in `vars` under the variable name. + +```yaml +definitions: + vars: + instance-ids: + values: + - i-abc123 + - i-prod-001 + +rules: + # Deny terminating production instances, allow others + - deny: 'aws ec2 terminate-instances --instance-ids ' + when: "vars['instance-ids'] == 'i-prod-001'" + - allow: 'aws ec2 terminate-instances --instance-ids ' +``` + +In this example, when the command matches ``, the actual token value (e.g., `i-prod-001`) is captured into `vars['instance-ids']`. The `when` clause can then inspect this value to make conditional decisions. + +```yaml +definitions: + vars: + regions: + type: literal + values: + - us-east-1 + - eu-west-1 + - ap-southeast-1 + +rules: + # Deny AWS operations in US regions, allow others + - deny: 'aws --region *' + when: "has(vars.regions) && vars.regions.startsWith('us-')" + - allow: 'aws --region *' +``` + +:::note +The `vars` map only contains entries for `` placeholders that were present in the matched pattern. If the pattern doesn't use ``, the `vars` map is empty. Use `has(vars.name)` to safely check for a variable before accessing it. +::: + ## Operators CEL supports standard operators for building conditions: diff --git a/src/rules/expr_evaluator.rs b/src/rules/expr_evaluator.rs index f17ffed..3140780 100644 --- a/src/rules/expr_evaluator.rs +++ b/src/rules/expr_evaluator.rs @@ -164,6 +164,45 @@ mod tests { assert!(!evaluate("'.bashrc' in paths.sensitive", &context).unwrap()); } + // === Variable reference access === + + #[rstest] + #[case::exact_match("vars['instance-ids'] == 'i-abc123'", "instance-ids", "i-abc123", true)] + #[case::no_match( + "vars['instance-ids'] == 'i-abc123'", + "instance-ids", + "i-xyz999", + false + )] + fn vars_access( + #[case] expr: &str, + #[case] key: &str, + #[case] value: &str, + #[case] expected: bool, + ) { + let context = ExprContext { + vars: HashMap::from([(key.to_string(), value.to_string())]), + ..empty_context() + }; + assert_eq!(evaluate(expr, &context).unwrap(), expected); + } + + #[test] + fn vars_has_check() { + let context = ExprContext { + vars: HashMap::from([("region".to_string(), "us-east-1".to_string())]), + ..empty_context() + }; + assert!(evaluate("has(vars.region)", &context).unwrap()); + assert!(evaluate("vars.region == 'us-east-1'", &context).unwrap()); + } + + #[test] + fn vars_empty_when_no_var_captured() { + let context = empty_context(); + assert!(evaluate("vars.size() == 0", &context).unwrap()); + } + // === Logical operators === #[test] diff --git a/src/rules/pattern_matcher/mod.rs b/src/rules/pattern_matcher/mod.rs index 1f5c9e2..afc2375 100644 --- a/src/rules/pattern_matcher/mod.rs +++ b/src/rules/pattern_matcher/mod.rs @@ -7,8 +7,8 @@ mod flag_utils; mod token_matching; -use std::cell::Cell; -use std::collections::HashSet; +use std::cell::{Cell, RefCell}; +use std::collections::{HashMap, HashSet}; use crate::config::Definitions; use crate::rules::RuleError; @@ -21,6 +21,16 @@ use token_matching::{ resolve_paths, }; +/// Result of a successful pattern match, containing both wildcard captures +/// and variable reference captures. +#[derive(Debug, Clone, PartialEq)] +pub struct MatchCaptures { + /// Tokens captured by wildcards (`*`) in the pattern. + pub wildcards: Vec, + /// Values captured by `` references, keyed by variable name. + pub vars: HashMap, +} + /// Maximum number of recursive steps allowed during pattern matching. /// Prevents exponential blowup from patterns with multiple consecutive wildcards. const MAX_MATCH_STEPS: usize = 10_000; @@ -108,6 +118,7 @@ pub fn matches(pattern: &Pattern, command: &ParsedCommand, definitions: &Definit for skip in skip_range { let steps = Cell::new(0usize); let after_dd = Cell::new(false); + let var_captures = RefCell::new(HashMap::new()); if match_engine( &pattern.tokens, &cmd_tokens[skip..], @@ -116,6 +127,7 @@ pub fn matches(pattern: &Pattern, command: &ParsedCommand, definitions: &Definit None, None, &after_dd, + &var_captures, ) .unwrap_or(false) { @@ -125,14 +137,15 @@ pub fn matches(pattern: &Pattern, command: &ParsedCommand, definitions: &Definit false } -/// Like `matches`, but also returns the tokens captured by wildcards (`*`). +/// Like `matches`, but also returns the tokens captured by wildcards (`*`) +/// and `` references. /// -/// Returns `Some(captured_tokens)` if the pattern matches, `None` otherwise. +/// Returns `Some(MatchCaptures)` if the pattern matches, `None` otherwise. pub fn matches_with_captures( pattern: &Pattern, command: &ParsedCommand, definitions: &Definitions, -) -> Option> { +) -> Option { if !pattern.command.matches(&command.command) { return None; } @@ -142,6 +155,7 @@ pub fn matches_with_captures( let steps = Cell::new(0usize); let after_dd = Cell::new(false); let mut captures = Vec::new(); + let var_captures = RefCell::new(HashMap::new()); if match_engine( &pattern.tokens, &cmd_tokens[skip..], @@ -150,10 +164,14 @@ pub fn matches_with_captures( Some(&mut captures), None, &after_dd, + &var_captures, ) .unwrap_or(false) { - return Some(captures.into_iter().map(|s| s.to_string()).collect()); + return Some(MatchCaptures { + wildcards: captures.into_iter().map(|s| s.to_string()).collect(), + vars: var_captures.into_inner(), + }); } } None @@ -182,6 +200,7 @@ pub fn extract_placeholder( let steps = Cell::new(0usize); let after_dd = Cell::new(false); let mut captured = Vec::new(); + let var_captures = RefCell::new(HashMap::new()); match_engine( &pattern.tokens, &cmd_tokens[skip..], @@ -190,6 +209,7 @@ pub fn extract_placeholder( None, Some((&mut captured, &mut all_candidates)), &after_dd, + &var_captures, )?; } Ok(all_candidates @@ -218,6 +238,10 @@ pub fn extract_placeholder( /// is ignored in this mode. /// /// Returns `Err` if the pattern contains unsupported tokens for the current mode. +#[expect( + clippy::too_many_arguments, + reason = "recursive engine needs all context threaded through" +)] fn match_engine<'a>( pattern_tokens: &[PatternToken], cmd_tokens: &[&'a str], @@ -226,6 +250,7 @@ fn match_engine<'a>( mut captures: Option<&mut Vec<&'a str>>, mut extract: Option<(&mut Vec<&'a str>, &mut Vec>)>, after_double_dash: &Cell, + var_captures: &RefCell>, ) -> Result { let count = steps.get() + 1; steps.set(count); @@ -259,6 +284,7 @@ fn match_engine<'a>( None, Some((captured, all_candidates)), after_double_dash, + var_captures, )?; } else if let Some(caps) = &mut captures { let saved_len = caps.len(); @@ -271,6 +297,7 @@ fn match_engine<'a>( Some(*caps), None, after_double_dash, + var_captures, )? { return Ok(true); } @@ -283,6 +310,7 @@ fn match_engine<'a>( None, None, after_double_dash, + var_captures, )? { return Ok(true); } @@ -309,6 +337,7 @@ fn match_engine<'a>( captures, extract, after_double_dash, + var_captures, ); } return Ok(false); @@ -326,6 +355,7 @@ fn match_engine<'a>( captures, extract, after_double_dash, + var_captures, ); } return Ok(false); @@ -350,6 +380,7 @@ fn match_engine<'a>( captures, extract, after_double_dash, + var_captures, ) } else { Ok(false) @@ -368,6 +399,7 @@ fn match_engine<'a>( captures, extract, after_double_dash, + var_captures, ) } @@ -400,6 +432,7 @@ fn match_engine<'a>( Some(*caps), None, after_double_dash, + var_captures, )? { return Ok(true); } @@ -412,6 +445,7 @@ fn match_engine<'a>( None, None, after_double_dash, + var_captures, )? { return Ok(true); } @@ -437,6 +471,7 @@ fn match_engine<'a>( Some(*caps), None, after_double_dash, + var_captures, )? { return Ok(true); } @@ -449,6 +484,7 @@ fn match_engine<'a>( None, None, after_double_dash, + var_captures, )? { return Ok(true); } @@ -470,6 +506,7 @@ fn match_engine<'a>( captures, extract, after_double_dash, + var_captures, ); } return Ok(false); @@ -493,6 +530,7 @@ fn match_engine<'a>( captures, extract, after_double_dash, + var_captures, ) } else { Ok(false) @@ -519,6 +557,7 @@ fn match_engine<'a>( &mut extract, capture_val, after_double_dash, + var_captures, )? { return Ok(true); } @@ -543,6 +582,7 @@ fn match_engine<'a>( &mut extract, capture_val, after_double_dash, + var_captures, )? { return Ok(true); } @@ -567,6 +607,7 @@ fn match_engine<'a>( captures, extract, after_double_dash, + var_captures, ) } else { Ok(false) @@ -586,6 +627,7 @@ fn match_engine<'a>( captures, extract, after_double_dash, + var_captures, ) } else { Ok(false) @@ -612,6 +654,7 @@ fn match_engine<'a>( captures, extract, after_double_dash, + var_captures, ) } else { Ok(false) @@ -643,6 +686,7 @@ fn match_engine<'a>( Some(*caps), None, after_double_dash, + var_captures, )? { return Ok(true); } @@ -655,6 +699,7 @@ fn match_engine<'a>( None, None, after_double_dash, + var_captures, )? { return Ok(true); } @@ -669,6 +714,7 @@ fn match_engine<'a>( captures, None, after_double_dash, + var_captures, ); } Ok(false) @@ -695,6 +741,7 @@ fn match_engine<'a>( captures, None, after_double_dash, + var_captures, ) } else { Ok(false) @@ -711,6 +758,11 @@ fn match_engine<'a>( return Ok(false); } if token_matching::match_var_ref(name, cmd_tokens[0], definitions) { + // Capture the matched command token for this var reference. + // For path-type vars, store the actual matched token (as-is). + var_captures + .borrow_mut() + .insert(name.clone(), cmd_tokens[0].to_string()); match_engine( rest, &cmd_tokens[1..], @@ -719,6 +771,7 @@ fn match_engine<'a>( captures, None, after_double_dash, + var_captures, ) } else { Ok(false) @@ -758,6 +811,7 @@ fn match_engine<'a>( None, Some((captured, all_candidates)), after_double_dash, + var_captures, )?; captured.truncate(saved_len); } @@ -776,6 +830,7 @@ fn match_engine<'a>( captures, None, after_double_dash, + var_captures, ) } } @@ -790,6 +845,7 @@ fn match_engine<'a>( captures, extract, after_double_dash, + var_captures, ) } @@ -803,6 +859,7 @@ fn match_engine<'a>( captures, extract, after_double_dash, + var_captures, ) } } @@ -823,6 +880,7 @@ fn try_recurse_flag_value<'a>( extract: &mut Option<(&mut Vec<&'a str>, &mut Vec>)>, capture_val: Option<&'a str>, after_double_dash: &Cell, + var_captures: &RefCell>, ) -> Result { if let Some((captured, all_candidates)) = extract { match_engine( @@ -833,6 +891,7 @@ fn try_recurse_flag_value<'a>( None, Some((captured, all_candidates)), after_double_dash, + var_captures, )?; // In extract mode, always continue scanning (don't return true) Ok(false) @@ -849,6 +908,7 @@ fn try_recurse_flag_value<'a>( Some(*caps), None, after_double_dash, + var_captures, )? { return Ok(true); } @@ -863,6 +923,7 @@ fn try_recurse_flag_value<'a>( None, None, after_double_dash, + var_captures, ) } } @@ -948,7 +1009,18 @@ mod tests { let pattern = parse_pattern(pattern_str).unwrap(); let schema = build_schema_from_pattern(&pattern); let command = parse_command(command_str, &schema).unwrap(); - matches_with_captures(&pattern, &command, definitions) + matches_with_captures(&pattern, &command, definitions).map(|c| c.wildcards) + } + + fn check_var_captures( + pattern_str: &str, + command_str: &str, + definitions: &Definitions, + ) -> Option> { + let pattern = parse_pattern(pattern_str).unwrap(); + let schema = build_schema_from_pattern(&pattern); + let command = parse_command(command_str, &schema).unwrap(); + matches_with_captures(&pattern, &command, definitions).map(|c| c.vars) } /// Build a FlagSchema from a pattern's FlagWithValue tokens. @@ -1557,6 +1629,56 @@ mod tests { ); } + // ======================================== + // var_captures from matches_with_captures + // ======================================== + + #[rstest] + #[case::literal_var_captured( + "aws ec2 terminate-instances --instance-ids ", + "aws ec2 terminate-instances --instance-ids i-abc123", + Some(HashMap::from([("instance-ids".to_string(), "i-abc123".to_string())])), + )] + #[case::no_match_returns_none( + "aws ec2 terminate-instances --instance-ids ", + "aws ec2 terminate-instances --instance-ids i-UNKNOWN", + None + )] + #[case::path_var_captured( + "bash ", + "bash ./tests/run", + Some(HashMap::from([("test-script".to_string(), "./tests/run".to_string())])), + )] + fn var_captures_returns_expected( + #[case] pattern_str: &str, + #[case] command_str: &str, + #[case] expected: Option>, + ) { + let definitions = Definitions { + vars: Some(HashMap::from([ + ( + "instance-ids".to_string(), + crate::config::VarDefinition { + var_type: crate::config::VarType::Literal, + values: vec!["i-abc123".into(), "i-def456".into()], + }, + ), + ( + "test-script".to_string(), + crate::config::VarDefinition { + var_type: crate::config::VarType::Path, + values: vec!["./tests/run".into()], + }, + ), + ])), + ..Default::default() + }; + assert_eq!( + check_var_captures(pattern_str, command_str, &definitions), + expected, + ); + } + // ======================================== // matching in non-wrapper context // ======================================== diff --git a/src/rules/rule_engine.rs b/src/rules/rule_engine.rs index 28f7682..2db66f6 100644 --- a/src/rules/rule_engine.rs +++ b/src/rules/rule_engine.rs @@ -9,7 +9,7 @@ use crate::rules::command_parser::{ FlagSchema, ParsedCommand, extract_commands, parse_command, shell_quote_join, }; use crate::rules::expr_evaluator::{ExprContext, evaluate}; -use crate::rules::pattern_matcher::{extract_placeholder, matches_with_captures}; +use crate::rules::pattern_matcher::{MatchCaptures, extract_placeholder, matches_with_captures}; use crate::rules::pattern_parser::{Pattern, PatternToken, parse_multi}; /// Context for rule evaluation, providing environment variables and @@ -371,14 +371,15 @@ fn evaluate_simple_command( let schema = build_flag_schema(pattern); let parsed_command = parse_command(command, &schema)?; - let captures = matches_with_captures(pattern, &parsed_command, definitions); - if captures.is_none() { + let match_captures = matches_with_captures(pattern, &parsed_command, definitions); + let Some(match_captures) = match_captures else { continue; - } + }; // Evaluate when clause if present if let Some(when_expr) = &rule.when { - let expr_context = build_expr_context(&parsed_command, context, definitions); + let expr_context = + build_expr_context(&parsed_command, context, definitions, &match_captures); match evaluate(when_expr, &expr_context) { Ok(true) => {} Ok(false) => continue, @@ -389,7 +390,7 @@ fn evaluate_simple_command( match_infos.push(RuleMatchInfo { action_kind, pattern: pattern_str.to_string(), - matched_tokens: captures.unwrap_or_default(), + matched_tokens: match_captures.wildcards, }); matched.push(MatchedRule { @@ -641,6 +642,7 @@ fn build_expr_context( parsed_command: &ParsedCommand, eval_context: &EvalContext, definitions: &Definitions, + match_captures: &MatchCaptures, ) -> ExprContext { let flags: HashMap> = parsed_command .flags @@ -659,7 +661,7 @@ fn build_expr_context( flags, args: parsed_command.args.clone(), paths, - vars: HashMap::new(), + vars: match_captures.vars.clone(), } } diff --git a/tests/integration/var_ref_evaluation.rs b/tests/integration/var_ref_evaluation.rs index c2cd55b..372473e 100644 --- a/tests/integration/var_ref_evaluation.rs +++ b/tests/integration/var_ref_evaluation.rs @@ -229,3 +229,87 @@ fn var_ref_and_path_ref_coexist(empty_context: EvalContext) { runok::rules::rule_engine::Action::Allow ); } + +// ======================================== +// captured values in when clause via vars context +// ======================================== + +#[rstest] +#[case::var_value_matches_when_condition( + indoc! {" + definitions: + vars: + instance-ids: + values: + - i-abc123 + - i-prod-001 + rules: + - deny: aws ec2 terminate-instances --instance-ids + when: \"vars['instance-ids'] == 'i-prod-001'\" + - allow: aws ec2 terminate-instances --instance-ids + "}, + "aws ec2 terminate-instances --instance-ids i-prod-001", + assert_deny as ActionAssertion, +)] +#[case::var_value_does_not_match_when_falls_through( + indoc! {" + definitions: + vars: + instance-ids: + values: + - i-abc123 + - i-prod-001 + rules: + - deny: aws ec2 terminate-instances --instance-ids + when: \"vars['instance-ids'] == 'i-prod-001'\" + - allow: aws ec2 terminate-instances --instance-ids + "}, + "aws ec2 terminate-instances --instance-ids i-abc123", + assert_allow as ActionAssertion, +)] +#[case::var_with_has_guard_and_starts_with( + indoc! {" + definitions: + vars: + regions: + type: literal + values: + - us-east-1 + - eu-west-1 + - ap-southeast-1 + rules: + - deny: aws --region * + when: \"has(vars.regions) && vars.regions.startsWith('us-')\" + - allow: aws --region * + "}, + "aws --region us-east-1 s3 ls", + assert_deny as ActionAssertion, +)] +#[case::var_with_has_guard_non_us_region_allowed( + indoc! {" + definitions: + vars: + regions: + type: literal + values: + - us-east-1 + - eu-west-1 + - ap-southeast-1 + rules: + - deny: aws --region * + when: \"has(vars.regions) && vars.regions.startsWith('us-')\" + - allow: aws --region * + "}, + "aws --region eu-west-1 s3 ls", + assert_allow as ActionAssertion, +)] +fn var_ref_when_clause_with_vars( + #[case] yaml: &str, + #[case] command: &str, + #[case] expected: ActionAssertion, + empty_context: EvalContext, +) { + let config = parse_config(yaml).unwrap(); + let result = evaluate_command(&config, command, &empty_context).unwrap(); + expected(&result.action); +} From 2fae61a7d5be8447e3ed0d35bd93921ab9a800e8 Mon Sep 17 00:00:00 2001 From: Hayato Kawai Date: Sat, 14 Mar 2026 01:34:36 +0900 Subject: [PATCH 3/4] refactor(tests): merge similar validation and matching tests into parameterized cases Separate test functions that only differed in input values made it harder to see that they tested the same invariant. Consolidate `validate_rejects_var_ref_in_definitions_vars` / `validate_rejects_path_ref_in_definitions_vars` into a single parameterized `validate_rejects_placeholder_in_definitions_vars`, and merge three `match_var_ref` negative tests into `match_var_ref_negative_cases`. --- src/config/model.rs | 46 ++++++++++---------- src/rules/pattern_matcher/token_matching.rs | 48 +++++++++------------ 2 files changed, 43 insertions(+), 51 deletions(-) diff --git a/src/config/model.rs b/src/config/model.rs index f64c2c6..d1a9faa 100644 --- a/src/config/model.rs +++ b/src/config/model.rs @@ -2197,9 +2197,9 @@ mod tests { assert!(vars.contains_key("instance-ids")); } - #[test] - fn validate_rejects_var_ref_in_definitions_vars() { - let mut config = parse_config(indoc! {" + #[rstest] + #[case::var_ref( + indoc! {" definitions: vars: ids: @@ -2209,24 +2209,11 @@ mod tests { other-ids: values: - i-xyz999 - "}) - .unwrap(); - let err = config.validate().unwrap_err(); - assert!( - err.to_string().contains("definitions.vars.ids"), - "error should mention the var key: {}", - err - ); - assert!( - err.to_string().contains("concrete values, not references"), - "error should explain the constraint: {}", - err - ); - } - - #[test] - fn validate_rejects_path_ref_in_definitions_vars() { - let mut config = parse_config(indoc! {" + "}, + "definitions.vars.ids", + )] + #[case::path_ref( + indoc! {" definitions: vars: scripts: @@ -2237,14 +2224,25 @@ mod tests { paths: sensitive: - /etc/passwd - "}) - .unwrap(); + "}, + "definitions.vars.scripts", + )] + fn validate_rejects_placeholder_in_definitions_vars( + #[case] yaml: &str, + #[case] expected_key_msg: &str, + ) { + let mut config = parse_config(yaml).unwrap(); let err = config.validate().unwrap_err(); assert!( - err.to_string().contains("definitions.vars.scripts"), + err.to_string().contains(expected_key_msg), "error should mention the var key: {}", err ); + assert!( + err.to_string().contains("concrete values, not references"), + "error should explain the constraint: {}", + err + ); } #[test] diff --git a/src/rules/pattern_matcher/token_matching.rs b/src/rules/pattern_matcher/token_matching.rs index 4463f3c..fad093a 100644 --- a/src/rules/pattern_matcher/token_matching.rs +++ b/src/rules/pattern_matcher/token_matching.rs @@ -356,16 +356,6 @@ mod tests { ); } - #[test] - fn match_var_ref_undefined() { - let definitions = Definitions::default(); - assert!(!match_single_token( - &PatternToken::VarRef("nonexistent".into()), - "anything", - &definitions, - )); - } - #[rstest] #[case::exact_match("tests/run", true)] #[case::dot_prefix("./tests/run", true)] @@ -391,9 +381,10 @@ mod tests { ); } - #[test] - fn match_var_ref_empty_values() { - let definitions = Definitions { + #[rstest] + #[case::undefined_var(Definitions::default(), "nonexistent", "anything")] + #[case::empty_values( + Definitions { vars: Some(HashMap::from([( "empty".to_string(), VarDefinition { @@ -402,23 +393,26 @@ mod tests { }, )])), ..Default::default() - }; - assert!(!match_single_token( - &PatternToken::VarRef("empty".into()), - "anything", - &definitions, - )); - } - - #[test] - fn match_var_ref_no_vars_section() { - let definitions = Definitions { + }, + "empty", + "anything", + )] + #[case::no_vars_section( + Definitions { vars: None, ..Default::default() - }; + }, + "anything", + "value", + )] + fn match_var_ref_negative_cases( + #[case] definitions: Definitions, + #[case] var_name: &str, + #[case] cmd_token: &str, + ) { assert!(!match_single_token( - &PatternToken::VarRef("anything".into()), - "value", + &PatternToken::VarRef(var_name.into()), + cmd_token, &definitions, )); } From aa25f808a3dae535971c9fe25177c10fc8fd1525 Mon Sep 17 00:00:00 2001 From: Hayato Kawai Date: Sat, 14 Mar 2026 01:49:02 +0900 Subject: [PATCH 4/4] fix(rules): restore `var_captures` on backtrack to prevent stale entries When pattern matching backtracked (e.g., Optional "with" branch fails and falls back to "without"), wildcard captures and `after_double_dash` were properly saved/restored, but `var_captures` was not. This caused `` captures from failed branches to persist, contaminating the `vars` map passed to `when` clause CEL expressions. Save and restore `var_captures` at all six backtracking points in `match_engine`: Wildcard, Alternation (direct and `=`-joined), FlagWithValue (space-separated and `=`-joined), and Optional. --- src/rules/pattern_matcher/mod.rs | 46 ++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/src/rules/pattern_matcher/mod.rs b/src/rules/pattern_matcher/mod.rs index afc2375..ba95078 100644 --- a/src/rules/pattern_matcher/mod.rs +++ b/src/rules/pattern_matcher/mod.rs @@ -275,6 +275,7 @@ fn match_engine<'a>( PatternToken::Wildcard => { for skip in 0..=cmd_tokens.len() { let saved_dd = after_double_dash.get(); + let saved_vc = var_captures.borrow().clone(); if let Some((captured, all_candidates)) = &mut extract { match_engine( rest, @@ -315,6 +316,7 @@ fn match_engine<'a>( return Ok(true); } after_double_dash.set(saved_dd); + *var_captures.borrow_mut() = saved_vc; } Ok(false) } @@ -422,6 +424,7 @@ fn match_engine<'a>( if alts.iter().any(|a| literal_matches(a, cmd_tokens[i])) { let remaining = remove_indices(cmd_tokens, &[i]); let saved_dd = after_double_dash.get(); + let saved_vc = var_captures.borrow().clone(); if let Some(caps) = &mut captures { let saved_len = caps.len(); if match_engine( @@ -450,6 +453,7 @@ fn match_engine<'a>( return Ok(true); } after_double_dash.set(saved_dd); + *var_captures.borrow_mut() = saved_vc; } // `=`-joined match: e.g. `--output=/tmp/out` where @@ -461,6 +465,7 @@ fn match_engine<'a>( let mut remaining = remove_indices(cmd_tokens, &[i]); remaining.insert(i.min(remaining.len()), value_part); let saved_dd = after_double_dash.get(); + let saved_vc = var_captures.borrow().clone(); if let Some(caps) = &mut captures { let saved_len = caps.len(); if match_engine( @@ -489,6 +494,7 @@ fn match_engine<'a>( return Ok(true); } after_double_dash.set(saved_dd); + *var_captures.borrow_mut() = saved_vc; } } return Ok(false); @@ -548,6 +554,7 @@ fn match_engine<'a>( let capture_val = matches!(**value, PatternToken::Wildcard).then_some(cmd_tokens[i + 1]); let saved_dd = after_double_dash.get(); + let saved_vc = var_captures.borrow().clone(); if try_recurse_flag_value( rest, &remaining, @@ -562,6 +569,7 @@ fn match_engine<'a>( return Ok(true); } after_double_dash.set(saved_dd); + *var_captures.borrow_mut() = saved_vc; } // Case 2: `=`-joined flag and value (e.g. `--sort=value`) @@ -573,6 +581,7 @@ fn match_engine<'a>( let capture_val = matches!(**value, PatternToken::Wildcard).then_some(value_part); let saved_dd = after_double_dash.get(); + let saved_vc = var_captures.borrow().clone(); if try_recurse_flag_value( rest, &remaining, @@ -587,6 +596,7 @@ fn match_engine<'a>( return Ok(true); } after_double_dash.set(saved_dd); + *var_captures.borrow_mut() = saved_vc; } } Ok(false) @@ -675,6 +685,7 @@ fn match_engine<'a>( .chain(rest.iter().cloned()) .collect(); let saved_dd = after_double_dash.get(); + let saved_vc = var_captures.borrow().clone(); // extract is always None here (early return above for is_extract) if let Some(caps) = &mut captures { let saved_len = caps.len(); @@ -704,6 +715,7 @@ fn match_engine<'a>( return Ok(true); } after_double_dash.set(saved_dd); + *var_captures.borrow_mut() = saved_vc; // Try without the optional tokens if optional_flags_absent(inner_tokens, cmd_tokens) { return match_engine( @@ -1679,6 +1691,40 @@ mod tests { ); } + // ======================================== + // var_captures backtracking: stale entries must not persist + // ======================================== + + #[test] + fn var_captures_not_stale_after_optional_backtrack() { + // Pattern: `cmd [] other` + // Command: `cmd other` + // + // Optional "with" branch tries ` other` against ["other"]. + // matches "other" (it's in values) and captures name=other, + // but then the remaining `other` pattern has no tokens left → fails. + // On backtrack, the stale capture name=other must be removed. + // Optional "without" branch matches `other` against ["other"] → success. + // Final vars should be empty (no was matched in the + // successful branch). + let definitions = Definitions { + vars: Some(HashMap::from([( + "name".to_string(), + crate::config::VarDefinition { + var_type: crate::config::VarType::Literal, + values: vec!["other".into(), "val".into()], + }, + )])), + ..Default::default() + }; + let result = check_var_captures("cmd [] other", "cmd other", &definitions); + assert_eq!( + result, + Some(HashMap::new()), + "var_captures should be empty when only matched in a backtracked branch", + ); + } + // ======================================== // matching in non-wrapper context // ========================================