Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 56 additions & 0 deletions src/rules/command_parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,46 @@ fn collect_commands(node: tree_sitter::Node, source: &[u8], commands: &mut Vec<S
}
}

/// Join tokens into a shell-safe string by quoting tokens that contain
/// spaces or shell metacharacters. Tokens without special characters are
/// emitted verbatim.
pub fn shell_quote_join(tokens: &[String]) -> String {
tokens
.iter()
.map(|t| shell_quote(t))
.collect::<Vec<_>>()
.join(" ")
}

/// Quote a single token for safe shell usage. If the token contains no
/// shell-significant characters it is returned as-is. Otherwise it is
/// wrapped in single quotes, with internal single quotes escaped as `'\''`.
fn shell_quote(token: &str) -> String {
if token.is_empty() {
return "''".to_string();
}
// Characters that require quoting in POSIX shells
if token.chars().all(|c| {
c.is_ascii_alphanumeric()
|| matches!(c, '-' | '_' | '.' | '/' | ':' | '=' | '@' | '%' | '+' | ',')
}) {
return token.to_string();
}
// Wrap in single quotes, escaping internal single quotes
let mut quoted = String::with_capacity(token.len() + 2);
quoted.push('\'');
for ch in token.chars() {
if ch == '\'' {
// End current quote, insert escaped quote, restart quote
quoted.push_str("'\\''");
} else {
quoted.push(ch);
}
}
quoted.push('\'');
quoted
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down Expand Up @@ -766,4 +806,20 @@ mod tests {
vec!["curl", "-X", "POST", "https://example.com"]
);
}

// ========================================
// shell_quote_join
// ========================================

#[rstest]
#[case::simple(&["echo", "hello"], "echo hello")]
#[case::space_in_token(&["echo", "hello world"], "echo 'hello world'")]
#[case::empty_token(&["echo", ""], "echo ''")]
#[case::single_quote_in_token(&["echo", "it's"], "echo 'it'\\''s'")]
#[case::flags_and_paths(&["rm", "-rf", "/tmp/dir"], "rm -rf /tmp/dir")]
#[case::single_token(&["ls"], "ls")]
fn shell_quote_join_cases(#[case] tokens: &[&str], #[case] expected: &str) {
let owned: Vec<String> = tokens.iter().map(|s| s.to_string()).collect();
assert_eq!(shell_quote_join(&owned), expected);
}
}
2 changes: 2 additions & 0 deletions src/rules/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ pub enum RuleError {
ExprEval(#[from] ExprError),
#[error("recursion depth exceeded (max: {0})")]
RecursionDepthExceeded(usize),
#[error("unsupported token in wrapper pattern: {0}")]
UnsupportedWrapperToken(String),
}

#[derive(Debug, thiserror::Error)]
Expand Down
179 changes: 179 additions & 0 deletions src/rules/pattern_matcher.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use std::cell::Cell;
use std::path::{Component, Path};

use crate::config::Definitions;
use crate::rules::RuleError;
use crate::rules::command_parser::ParsedCommand;
use crate::rules::pattern_parser::{Pattern, PatternToken};

Expand Down Expand Up @@ -152,6 +153,184 @@ fn match_tokens_inner(
}
}

/// Try to match a wrapper pattern against a command and extract the tokens
/// captured by the `<cmd>` placeholder.
///
/// Returns `Ok(Some(tokens))` if the pattern matches and contains a `<cmd>`
/// placeholder, where `tokens` are the individual command tokens captured at
/// the placeholder position (with quotes already stripped by the tokenizer).
/// Returns `Ok(None)` if the pattern does not match or has no `<cmd>` placeholder.
/// Returns `Err` if the wrapper pattern contains unsupported tokens
/// (`Optional` or `PathRef`).
pub fn extract_placeholder(
pattern: &Pattern,
command: &ParsedCommand,
definitions: &Definitions,
) -> Result<Option<Vec<String>>, RuleError> {
if pattern.command != command.command {
return Ok(None);
}

let cmd_tokens: Vec<&str> = command.raw_tokens[1..].iter().map(|s| s.as_str()).collect();
let steps = Cell::new(0usize);
let mut captured = Vec::new();
if extract_placeholder_inner(
&pattern.tokens,
&cmd_tokens,
definitions,
&steps,
&mut captured,
)? {
if captured.is_empty() {
Ok(None)
} else {
Ok(Some(captured.iter().map(|s| (*s).to_string()).collect()))
}
} else {
Ok(None)
}
}

/// Core recursive extractor that matches pattern tokens against command tokens,
/// capturing the tokens that align with a `Placeholder("cmd")` token.
///
/// Only `<cmd>` placeholders contribute to the `captured` vector; other
/// placeholder names (e.g., `<user>`) are consumed without capturing.
///
/// Returns `Err` if `Optional` or `PathRef` tokens are encountered, as
/// these are not supported in wrapper patterns.
fn extract_placeholder_inner<'a>(
pattern_tokens: &[PatternToken],
cmd_tokens: &[&'a str],
definitions: &Definitions,
steps: &Cell<usize>,
captured: &mut Vec<&'a str>,
) -> Result<bool, RuleError> {
let count = steps.get() + 1;
steps.set(count);
if count > MAX_MATCH_STEPS {
return Ok(false);
}

let Some((first, rest)) = pattern_tokens.split_first() else {
return Ok(cmd_tokens.is_empty());
};

match first {
PatternToken::Placeholder(name) => {
let is_cmd = name == "cmd";
if rest.is_empty() {
if is_cmd {
// <cmd> at end of pattern: capture all remaining tokens
captured.extend_from_slice(cmd_tokens);
Ok(true)
} else {
// Non-<cmd> placeholder consumes exactly one token
Ok(cmd_tokens.len() == 1)
}
} else if cmd_tokens.is_empty() {
Ok(false)
} else {
// Try consuming 1, 2, … tokens with backtracking, similar
// to the Wildcard strategy, so multi-token inner commands
// can be captured when <cmd> is not the last pattern token.
for take in 1..=cmd_tokens.len() {
let saved_len = captured.len();
if is_cmd {
captured.extend_from_slice(&cmd_tokens[..take]);
}
if extract_placeholder_inner(
rest,
&cmd_tokens[take..],
definitions,
steps,
captured,
)? {
return Ok(true);
}
captured.truncate(saved_len);
}
Ok(false)
}
}

PatternToken::Literal(s) => {
if cmd_tokens.is_empty() {
return Ok(false);
}
if cmd_tokens[0] == s.as_str() {
extract_placeholder_inner(rest, &cmd_tokens[1..], definitions, steps, captured)
} else {
Ok(false)
}
}

PatternToken::Alternation(alts) => {
if cmd_tokens.is_empty() {
return Ok(false);
}
if alts.iter().any(|a| a.as_str() == cmd_tokens[0]) {
extract_placeholder_inner(rest, &cmd_tokens[1..], definitions, steps, captured)
} else {
Ok(false)
}
}

PatternToken::FlagWithValue { aliases, value } => {
for i in 0..cmd_tokens.len() {
if aliases.iter().any(|a| a.as_str() == cmd_tokens[i])
&& i + 1 < cmd_tokens.len()
&& match_single_token(value, cmd_tokens[i + 1], definitions)
{
let saved_len = captured.len();
let remaining = remove_indices(cmd_tokens, &[i, i + 1]);
if extract_placeholder_inner(rest, &remaining, definitions, steps, captured)? {
return Ok(true);
}
captured.truncate(saved_len);
}
}
Ok(false)
}

PatternToken::Wildcard => {
for skip in 0..=cmd_tokens.len() {
let saved_len = captured.len();
if extract_placeholder_inner(
rest,
&cmd_tokens[skip..],
definitions,
steps,
captured,
)? {
return Ok(true);
}
captured.truncate(saved_len);
}
Ok(false)
}

PatternToken::Negation(inner) => {
if cmd_tokens.is_empty() {
return Ok(false);
}
if !match_single_token(inner, cmd_tokens[0], definitions) {
extract_placeholder_inner(rest, &cmd_tokens[1..], definitions, steps, captured)
} else {
Ok(false)
}
}

PatternToken::Optional(_) => Err(RuleError::UnsupportedWrapperToken(
"Optional ([...])".into(),
)),

PatternToken::PathRef(name) => Err(RuleError::UnsupportedWrapperToken(format!(
"PathRef (<path:{name}>)"
))),
}
}

/// Check that flags referenced by the optional group are not present in
/// the command tokens. When we take the "absent" path for an Optional,
/// the flag itself must not appear in the remaining command tokens.
Expand Down
Loading
Loading