diff --git a/sourcecode-parser/cmd/analyze.go b/sourcecode-parser/cmd/analyze.go index 4828d60e..82044452 100644 --- a/sourcecode-parser/cmd/analyze.go +++ b/sourcecode-parser/cmd/analyze.go @@ -33,21 +33,6 @@ var analyzeCmd = &cobra.Command{ fmt.Printf("Call graph built successfully: %d functions indexed\n", len(cg.Functions)) fmt.Printf("Module registry: %d modules\n", len(registry.Modules)) - // Debug: Print call graph details (commented out for production) - // fmt.Printf("\nDEBUG: Call graph statistics:\n") - // fmt.Printf(" Functions indexed: %d\n", len(cg.Functions)) - // for fqn := range cg.Functions { - // fmt.Printf(" - %s\n", fqn) - // } - // fmt.Printf(" Call sites: %d callers\n", len(cg.CallSites)) - // for caller, sites := range cg.CallSites { - // fmt.Printf(" %s makes %d calls:\n", caller, len(sites)) - // for _, site := range sites { - // fmt.Printf(" - Target: %s, TargetFQN: %s, Resolved: %v\n", site.Target, site.TargetFQN, site.Resolved) - // } - // } - // fmt.Println() - // Run security analysis matches := callgraph.AnalyzePatterns(cg, patternRegistry) diff --git a/sourcecode-parser/graph/callgraph/patterns.go b/sourcecode-parser/graph/callgraph/patterns.go index 5706a687..23b9f488 100644 --- a/sourcecode-parser/graph/callgraph/patterns.go +++ b/sourcecode-parser/graph/callgraph/patterns.go @@ -1,6 +1,7 @@ package callgraph import ( + "log" "strings" ) @@ -118,12 +119,13 @@ func (pr *PatternRegistry) MatchPattern(pattern *Pattern, callGraph *CallGraph) // PatternMatchDetails contains detailed information about a pattern match. type PatternMatchDetails struct { - Matched bool - SourceFQN string // Fully qualified name of function containing the source call - SourceCall string // The actual dangerous call (e.g., "input", "request.GET") - SinkFQN string // Fully qualified name of function containing the sink call - SinkCall string // The actual dangerous call (e.g., "eval", "exec") - DataFlowPath []string // Complete path from source to sink + Matched bool + IsIntraProcedural bool // true if source and sink are in the same function + SourceFQN string // Fully qualified name of function containing the source call + SourceCall string // The actual dangerous call (e.g., "input", "request.GET") + SinkFQN string // Fully qualified name of function containing the sink call + SinkCall string // The actual dangerous call (e.g., "eval", "exec") + DataFlowPath []string // Complete path from source to sink } // matchDangerousFunction checks if any dangerous function is called. @@ -195,9 +197,13 @@ func (pr *PatternRegistry) matchMissingSanitizer(pattern *Pattern, callGraph *Ca for _, source := range sourceCalls { for _, sink := range sinkCalls { - // Skip false positives where source and sink are in the same function + // Check intra-procedural taint flow using on-demand taint analysis if source.caller == sink.caller { - continue + intraMatch := pr.checkIntraProceduralTaint(source, sink, callGraph, pattern) + if intraMatch != nil { + return intraMatch // Vulnerability found! + } + continue // No taint flow, skip } path := pr.findPath(source.caller, sink.caller, callGraph) @@ -218,12 +224,13 @@ func (pr *PatternRegistry) matchMissingSanitizer(pattern *Pattern, callGraph *Ca } if !hasSanitizer { return &PatternMatchDetails{ - Matched: true, - SourceFQN: source.caller, - SourceCall: source.target, - SinkFQN: sink.caller, - SinkCall: sink.target, - DataFlowPath: path, + Matched: true, + IsIntraProcedural: false, // Explicit flag for inter-procedural + SourceFQN: source.caller, + SourceCall: source.target, + SinkFQN: sink.caller, + SinkCall: sink.target, + DataFlowPath: path, } } } @@ -350,28 +357,34 @@ func sortCallInfo(calls []callInfo) { // - "request.GET.get" matches pattern "request.GET" (prefix match for sources) // - "vulnerable_app.eval" matches pattern "eval" (last component match) func matchesFunctionName(fqn, pattern string) bool { + // Strip everything after ( from fqn if present (e.g., "input(...)" -> "input") + cleanFqn := fqn + if idx := strings.Index(fqn, "("); idx >= 0 { + cleanFqn = fqn[:idx] + } + // Exact match: "eval" == "eval" - if fqn == pattern { + if cleanFqn == pattern { return true } // Suffix match: "builtins.eval" ends with ".eval" - if strings.HasSuffix(fqn, "."+pattern) { + if strings.HasSuffix(cleanFqn, "."+pattern) { return true } // Prefix match: "request.GET.get" starts with "request.GET." // This handles attribute access chains for sources - if strings.HasPrefix(fqn, pattern+".") { + if strings.HasPrefix(cleanFqn, pattern+".") { return true } // Extract last component after last dot and compare // This handles cases like "vulnerable_app.eval" → "eval" // but avoids matching "executor" against "exec" - lastDot := strings.LastIndex(fqn, ".") - if lastDot >= 0 && lastDot < len(fqn)-1 { - lastComponent := fqn[lastDot+1:] + lastDot := strings.LastIndex(cleanFqn, ".") + if lastDot >= 0 && lastDot < len(cleanFqn)-1 { + lastComponent := cleanFqn[lastDot+1:] if lastComponent == pattern { return true } @@ -379,3 +392,84 @@ func matchesFunctionName(fqn, pattern string) bool { return false } + +// checkIntraProceduralTaint checks if source and sink in same function have taint flow. +// Uses on-demand taint analysis with pattern-specific sources/sinks to verify actual data flow. +// Returns non-nil PatternMatchDetails if vulnerable, nil otherwise. +func (pr *PatternRegistry) checkIntraProceduralTaint( + source callInfo, + sink callInfo, + callGraph *CallGraph, + pattern *Pattern, +) *PatternMatchDetails { + functionFQN := source.caller // Same as sink.caller by precondition + + // Get the function node + funcNode, ok := callGraph.Functions[functionFQN] + if !ok { + log.Printf("Function %s not found in call graph", functionFQN) + return nil + } + + // Read the source file + sourceCode, err := readFileBytes(funcNode.File) + if err != nil { + log.Printf("Failed to read file %s: %v", funcNode.File, err) + return nil + } + + // Parse the file to get AST + tree, err := ParsePythonFile(sourceCode) + if err != nil { + log.Printf("Failed to parse file %s: %v", funcNode.File, err) + return nil + } + defer tree.Close() + + // Find the function node at the line number + functionNode := findFunctionAtLine(tree.RootNode(), funcNode.LineNumber) + if functionNode == nil { + log.Printf("Could not find function at line %d in %s", funcNode.LineNumber, funcNode.File) + return nil + } + + // Extract statements from the function + statements, err := ExtractStatements(funcNode.File, sourceCode, functionNode) + if err != nil { + log.Printf("Failed to extract statements from %s: %v", functionFQN, err) + return nil + } + + // Build def-use chains + defUseChain := BuildDefUseChains(statements) + + // Run taint analysis with pattern-specific sources/sinks + summary := AnalyzeIntraProceduralTaint( + functionFQN, + statements, + defUseChain, + pattern.Sources, // Use pattern's sources + pattern.Sinks, // Use pattern's sinks + pattern.Sanitizers, // Use pattern's sanitizers + ) + + // Check if taint analysis found vulnerabilities + if !summary.HasDetections() { + return nil // No taint flow detected + } + + // ✅ Vulnerability confirmed via taint analysis! + log.Printf("Intra-procedural vulnerability detected in %s: %d detection(s)", + functionFQN, summary.GetDetectionCount()) + + // Build match details + return &PatternMatchDetails{ + Matched: true, + IsIntraProcedural: true, + SourceFQN: functionFQN, + SourceCall: source.target, + SinkFQN: functionFQN, + SinkCall: sink.target, + DataFlowPath: []string{functionFQN}, // Single function in path + } +} diff --git a/sourcecode-parser/graph/callgraph/patterns_test.go b/sourcecode-parser/graph/callgraph/patterns_test.go index 02708bab..5b066020 100644 --- a/sourcecode-parser/graph/callgraph/patterns_test.go +++ b/sourcecode-parser/graph/callgraph/patterns_test.go @@ -1,8 +1,11 @@ package callgraph import ( + "os" + "path/filepath" "testing" + "github.com/shivasurya/code-pathfinder/sourcecode-parser/graph" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -312,3 +315,205 @@ func TestPatternTypeConstants(t *testing.T) { assert.Equal(t, PatternType("missing-sanitizer"), PatternTypeMissingSanitizer) assert.Equal(t, PatternType("dangerous-function"), PatternTypeDangerousFunction) } + +// ========== INTRA-PROCEDURAL TAINT DETECTION TESTS (PR #6) ========== + +func TestMatchMissingSanitizer_IntraProceduralSimple(t *testing.T) { + // Test basic intra-procedural vulnerability detection using real file + tmpDir := t.TempDir() + testFile := filepath.Join(tmpDir, "test.py") + + sourceCode := ` +def vulnerable(): + x = input() + eval(x) +` + + err := os.WriteFile(testFile, []byte(sourceCode), 0644) + assert.NoError(t, err) + + funcNode := &graph.Node{ + ID: "test.vulnerable", + Name: "vulnerable", + File: testFile, + LineNumber: 2, + } + + callGraph := &CallGraph{ + Functions: map[string]*graph.Node{ + "test.vulnerable": funcNode, + }, + CallSites: map[string][]CallSite{ + "test.vulnerable": { + {Target: "input", TargetFQN: "builtins.input"}, + {Target: "eval", TargetFQN: "builtins.eval"}, + }, + }, + Summaries: make(map[string]*TaintSummary), + Edges: make(map[string][]string), + ReverseEdges: make(map[string][]string), + } + + pattern := &Pattern{ + ID: "CODE-INJECTION-001", + Sources: []string{"input"}, + Sinks: []string{"eval"}, + Sanitizers: []string{}, + } + + registry := NewPatternRegistry() + match := registry.matchMissingSanitizer(pattern, callGraph) + + // Assertions + assert.NotNil(t, match) + assert.True(t, match.Matched) + assert.True(t, match.IsIntraProcedural) + assert.Equal(t, "test.vulnerable", match.SourceFQN) + assert.Equal(t, "test.vulnerable", match.SinkFQN) + assert.Equal(t, []string{"test.vulnerable"}, match.DataFlowPath) + assert.Contains(t, match.SourceCall, "input") + assert.Contains(t, match.SinkCall, "eval") +} + +func TestMatchMissingSanitizer_IntraProceduralNoFile(t *testing.T) { + // Test graceful handling when file cannot be read + callGraph := &CallGraph{ + Functions: map[string]*graph.Node{ + "test.unknown": { + ID: "test.unknown", + Name: "unknown", + File: "/nonexistent/file.py", + LineNumber: 1, + }, + }, + CallSites: map[string][]CallSite{ + "test.unknown": { + {Target: "request.GET", TargetFQN: "django.http.request.GET"}, + {Target: "eval", TargetFQN: "builtins.eval"}, + }, + }, + Summaries: map[string]*TaintSummary{}, + Edges: make(map[string][]string), + ReverseEdges: make(map[string][]string), + } + + pattern := &Pattern{ + Sources: []string{"request.GET"}, + Sinks: []string{"eval"}, + } + + registry := NewPatternRegistry() + match := registry.matchMissingSanitizer(pattern, callGraph) + + // Should not match if file cannot be read (graceful degradation) + assert.False(t, match.Matched) +} + +func TestMatchMissingSanitizer_IntraProceduralWithSanitizer(t *testing.T) { + // Test that sanitizers are respected (no false positive) + tmpDir := t.TempDir() + testFile := filepath.Join(tmpDir, "test.py") + + sourceCode := ` +def safe_function(): + user_input = input() + sanitized = sanitize(user_input) + eval(sanitized) +` + + err := os.WriteFile(testFile, []byte(sourceCode), 0644) + assert.NoError(t, err) + + funcNode := &graph.Node{ + ID: "test.safe_function", + Name: "safe_function", + File: testFile, + LineNumber: 2, + } + + callGraph := &CallGraph{ + Functions: map[string]*graph.Node{ + "test.safe_function": funcNode, + }, + CallSites: map[string][]CallSite{ + "test.safe_function": { + {Target: "input", TargetFQN: "builtins.input"}, + {Target: "sanitize", TargetFQN: "test.sanitize"}, + {Target: "eval", TargetFQN: "builtins.eval"}, + }, + }, + Summaries: make(map[string]*TaintSummary), + Edges: make(map[string][]string), + ReverseEdges: make(map[string][]string), + } + + pattern := &Pattern{ + Sources: []string{"input"}, + Sinks: []string{"eval"}, + Sanitizers: []string{"sanitize"}, + } + + registry := NewPatternRegistry() + match := registry.matchMissingSanitizer(pattern, callGraph) + + // Should not match because sanitizer breaks taint flow + assert.False(t, match.Matched) +} + +func TestMatchMissingSanitizer_InterProceduralUnchanged(t *testing.T) { + // Test that inter-procedural detection still works + callGraph := &CallGraph{ + Functions: map[string]*graph.Node{ + "test.source_func": {ID: "test.source_func", Name: "source_func"}, + "test.sink_func": {ID: "test.sink_func", Name: "sink_func"}, + }, + CallSites: map[string][]CallSite{ + "test.source_func": { + {Target: "request.GET", TargetFQN: "django.http.request.GET"}, + }, + "test.sink_func": { + {Target: "eval", TargetFQN: "builtins.eval"}, + }, + }, + Edges: map[string][]string{ + "test.source_func": {"test.sink_func"}, + }, + ReverseEdges: map[string][]string{ + "test.sink_func": {"test.source_func"}, + }, + Summaries: map[string]*TaintSummary{ + "test.source_func": {FunctionFQN: "test.source_func"}, + "test.sink_func": {FunctionFQN: "test.sink_func"}, + }, + } + + pattern := &Pattern{ + Sources: []string{"request.GET"}, + Sinks: []string{"eval"}, + } + + registry := NewPatternRegistry() + match := registry.matchMissingSanitizer(pattern, callGraph) + + // Should detect inter-procedural + assert.True(t, match.Matched) + assert.False(t, match.IsIntraProcedural) // Inter-procedural + assert.Equal(t, "test.source_func", match.SourceFQN) + assert.Equal(t, "test.sink_func", match.SinkFQN) + assert.True(t, len(match.DataFlowPath) > 1) +} + +func TestPatternMatchDetails_BackwardCompatibility(t *testing.T) { + // Test that old code works with new schema + match := &PatternMatchDetails{ + Matched: true, + SourceFQN: "test.source", + SinkFQN: "test.sink", + DataFlowPath: []string{"test.source", "test.sink"}, + // IsIntraProcedural not set - should default to false + } + + // Should work correctly + assert.True(t, match.Matched) + assert.False(t, match.IsIntraProcedural) // Default value +}