Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions python-dsl/codepathfinder/matchers.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@ class CallMatcher:
calls("request.*") # Wildcard (any request.* call)
calls("*.json") # Wildcard (any *.json call)
calls("app.run", match_name={"debug": True}) # Keyword argument matching
calls("socket.bind", match_position={0: "0.0.0.0"}) # Positional argument matching
calls("open", match_position={1: "w"}) # Positional argument matching
calls("socket.bind", match_position={"0[0]": "0.0.0.0"}) # Tuple indexing
calls("connect", match_position={"0[0]": "192.168.*"}) # Wildcard + tuple
"""

def __init__(
Expand All @@ -32,9 +34,15 @@ def __init__(
"""
Args:
*patterns: Function names to match. Supports wildcards (*).
match_position: Match positional arguments by index {position: value}
match_position: Match positional arguments by index or tuple index.
Examples: {0: "value"}, {1: ["a", "b"]}, {"0[0]": "0.0.0.0"}
match_name: Match named/keyword arguments {name: value}

Position indexing:
- Simple: {0: "value"} matches first argument
- Tuple: {"0[0]": "value"} matches first element of first argument tuple
- Wildcard: {"0[0]": "192.168.*"} matches with wildcard pattern

Raises:
ValueError: If no patterns provided or pattern is empty
"""
Expand Down
7 changes: 5 additions & 2 deletions sourcecode-parser/cmd/ci.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,12 @@ var ciCmd = &cobra.Command{
Outputs results in SARIF or JSON format for consumption by CI tools.

Examples:
# Generate SARIF report
# Generate SARIF report with single rules file
pathfinder ci --rules rules/owasp_top10.py --project . --output sarif > results.sarif

# Generate SARIF report with rules directory
pathfinder ci --rules rules/ --project . --output sarif > results.sarif

# Generate JSON report
pathfinder ci --rules rules/owasp_top10.py --project . --output json > results.json`,
RunE: func(cmd *cobra.Command, args []string) error {
Expand Down Expand Up @@ -264,7 +267,7 @@ var osExit = os.Exit

func init() {
rootCmd.AddCommand(ciCmd)
ciCmd.Flags().StringP("rules", "r", "", "Path to Python DSL rules file (required)")
ciCmd.Flags().StringP("rules", "r", "", "Path to Python DSL rules file or directory (required)")
ciCmd.Flags().StringP("project", "p", "", "Path to project directory to scan (required)")
ciCmd.Flags().StringP("output", "o", "sarif", "Output format: sarif or json (default: sarif)")
ciCmd.MarkFlagRequired("rules")
Expand Down
7 changes: 5 additions & 2 deletions sourcecode-parser/cmd/scan.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,12 @@ var scanCmd = &cobra.Command{
Long: `Scan codebase using Python DSL security rules.

Examples:
# Scan with OWASP rules
# Scan with a single rules file
pathfinder scan --rules rules/owasp_top10.py --project /path/to/project

# Scan with a directory of rules
pathfinder scan --rules rules/ --project /path/to/project

# Scan with custom rules
pathfinder scan --rules my_rules.py --project .`,
RunE: func(cmd *cobra.Command, args []string) error {
Expand Down Expand Up @@ -138,7 +141,7 @@ func printDetections(rule dsl.RuleIR, detections []dsl.DataflowDetection) {

func init() {
rootCmd.AddCommand(scanCmd)
scanCmd.Flags().StringP("rules", "r", "", "Path to Python DSL rules file (required)")
scanCmd.Flags().StringP("rules", "r", "", "Path to Python DSL rules file or directory (required)")
scanCmd.Flags().StringP("project", "p", "", "Path to project directory to scan (required)")
scanCmd.MarkFlagRequired("rules")
scanCmd.MarkFlagRequired("project")
Expand Down
142 changes: 135 additions & 7 deletions sourcecode-parser/dsl/call_matcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -228,14 +228,133 @@ func (e *CallMatcherExecutor) matchesArguments(cs *core.CallSite) bool {
return true // All constraints satisfied!
}

// parseTupleIndex parses position strings with optional tuple indexing.
//
// Supports:
// - Simple position: "0" → (0, 0, false)
// - Tuple indexing: "0[1]" → (0, 1, true)
//
// Parameters:
// - posStr: position string from IR (e.g., "0", "0[1]")
//
// Returns:
// - pos: argument position (0-indexed)
// - tupleIdx: tuple element index (0-indexed, only valid if isTupleIndex=true)
// - isTupleIndex: whether tuple indexing syntax was used
func parseTupleIndex(posStr string) (int, int, bool, bool) {
// Check if it looks like tuple indexing syntax
hasOpenBracket := strings.Contains(posStr, "[")
hasCloseBracket := strings.Contains(posStr, "]")

// Simple position (no brackets)
if !hasOpenBracket {
pos, err := strconv.Atoi(posStr)
if err != nil {
return 0, 0, false, false // Parse error
}
return pos, 0, false, true // Valid simple position
}

// Malformed: has bracket but not both open and close
if !hasCloseBracket {
// Try to parse just the part before [ as a fallback
parts := strings.SplitN(posStr, "[", 2)
pos, err := strconv.Atoi(parts[0])
if err != nil {
return 0, 0, false, false // Parse error
}
return pos, 0, false, true // Return simple position, not tuple index
}

// Parse "0[1]" format
parts := strings.SplitN(posStr, "[", 2)
pos, err := strconv.Atoi(parts[0])
if err != nil {
return 0, 0, false, false // Parse error
}

// Extract index from "[1]" (remove brackets)
idxStr := strings.TrimSuffix(parts[1], "]")
idx, err := strconv.Atoi(idxStr)
if err != nil {
return 0, 0, false, false // Parse error
}

return pos, idx, true, true // Valid tuple index
}

// extractTupleElement extracts an element at the specified index from a tuple string.
//
// Algorithm:
// 1. Check if string looks like a tuple (starts with '(' or '[')
// 2. Strip outer parentheses/brackets
// 3. Split by comma (handles simple cases, not nested structures)
// 4. Extract element at index
// 5. Clean up quotes and whitespace
//
// Examples:
// - extractTupleElement("(\"0.0.0.0\", 8080)", 0) → "0.0.0.0"
// - extractTupleElement("(\"0.0.0.0\", 8080)", 1) → "8080"
// - extractTupleElement("(\"a\", \"b\", \"c\")", 1) → "b"
// - extractTupleElement("not_a_tuple", 0) → "not_a_tuple"
//
// Limitations:
// - Does not handle nested tuples/lists
// - Uses simple comma splitting (could break on complex expressions)
//
// Parameters:
// - tupleStr: string representation of tuple from AST
// - index: 0-indexed position of element to extract
//
// Returns:
// - Extracted element as string, or empty string if index out of bounds
func extractTupleElement(tupleStr string, index int) string {
tupleStr = strings.TrimSpace(tupleStr)

// Check if it's a tuple or list
if !strings.HasPrefix(tupleStr, "(") && !strings.HasPrefix(tupleStr, "[") {
// Not a tuple/list, return as-is if index is 0
if index == 0 {
// Remove quotes from plain strings too
result := strings.Trim(tupleStr, `"'`)
return result
}
return "" // Index out of bounds for non-tuple
}

// Strip outer parentheses or brackets
inner := tupleStr[1 : len(tupleStr)-1]

// Split by comma
// Note: This is a simple implementation that doesn't handle nested structures
// For production, we'd need a proper parser
elements := strings.Split(inner, ",")

if index >= len(elements) {
return "" // Index out of bounds
}

element := strings.TrimSpace(elements[index])

// Remove quotes if present (handles both single and double quotes)
element = strings.Trim(element, `"'`)

return element
}

// matchesPositionalArguments checks positional argument constraints.
//
// Algorithm:
// 1. If no positional constraints, return true
// 2. For each position constraint:
// a. Convert position string to int
// a. Parse position string (supports tuple indexing: "0[1]")
// b. Check if position exists in arguments
// c. Extract and match argument value
// c. Extract tuple element if tuple indexing used
// d. Match argument value against constraint
//
// Supports:
// - Simple positional: {0: "value"} matches args[0] == "value"
// - Tuple indexing: {"0[1]": "value"} matches args[0] tuple element 1 == "value"
//
// Performance: O(P) where P = number of positional constraints.
func (e *CallMatcherExecutor) matchesPositionalArguments(args []core.Argument) bool {
Expand All @@ -244,11 +363,12 @@ func (e *CallMatcherExecutor) matchesPositionalArguments(args []core.Argument) b
}

for posStr, constraint := range e.IR.PositionalArgs {
// Convert position string to int
pos, err := strconv.Atoi(posStr)
if err != nil {
// Invalid position string - should not happen with valid IR
return false
// Parse position string (supports tuple indexing)
pos, tupleIdx, isTupleIndex, valid := parseTupleIndex(posStr)

// Check if position string was valid
if !valid {
return false // Invalid position string
}

// Check if position exists in arguments
Expand All @@ -259,6 +379,14 @@ func (e *CallMatcherExecutor) matchesPositionalArguments(args []core.Argument) b
// Get actual argument value
actualValue := args[pos].Value

// Extract tuple element if tuple indexing used
if isTupleIndex {
actualValue = extractTupleElement(actualValue, tupleIdx)
if actualValue == "" {
return false // Tuple index out of bounds
}
}

// Match against constraint
if !e.matchesArgumentValue(actualValue, constraint) {
return false
Expand Down
Loading
Loading