Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 0 additions & 15 deletions sourcecode-parser/cmd/analyze.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,21 +33,6 @@ var analyzeCmd = &cobra.Command{
fmt.Printf("Call graph built successfully: %d functions indexed\n", len(cg.Functions))
fmt.Printf("Module registry: %d modules\n", len(registry.Modules))

// Debug: Print call graph details (commented out for production)
// fmt.Printf("\nDEBUG: Call graph statistics:\n")
// fmt.Printf(" Functions indexed: %d\n", len(cg.Functions))
// for fqn := range cg.Functions {
// fmt.Printf(" - %s\n", fqn)
// }
// fmt.Printf(" Call sites: %d callers\n", len(cg.CallSites))
// for caller, sites := range cg.CallSites {
// fmt.Printf(" %s makes %d calls:\n", caller, len(sites))
// for _, site := range sites {
// fmt.Printf(" - Target: %s, TargetFQN: %s, Resolved: %v\n", site.Target, site.TargetFQN, site.Resolved)
// }
// }
// fmt.Println()

// Run security analysis
matches := callgraph.AnalyzePatterns(cg, patternRegistry)

Expand Down
134 changes: 114 additions & 20 deletions sourcecode-parser/graph/callgraph/patterns.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package callgraph

import (
"log"
"strings"
)

Expand Down Expand Up @@ -118,12 +119,13 @@ func (pr *PatternRegistry) MatchPattern(pattern *Pattern, callGraph *CallGraph)

// PatternMatchDetails contains detailed information about a pattern match.
type PatternMatchDetails struct {
Matched bool
SourceFQN string // Fully qualified name of function containing the source call
SourceCall string // The actual dangerous call (e.g., "input", "request.GET")
SinkFQN string // Fully qualified name of function containing the sink call
SinkCall string // The actual dangerous call (e.g., "eval", "exec")
DataFlowPath []string // Complete path from source to sink
Matched bool
IsIntraProcedural bool // true if source and sink are in the same function
SourceFQN string // Fully qualified name of function containing the source call
SourceCall string // The actual dangerous call (e.g., "input", "request.GET")
SinkFQN string // Fully qualified name of function containing the sink call
SinkCall string // The actual dangerous call (e.g., "eval", "exec")
DataFlowPath []string // Complete path from source to sink
}

// matchDangerousFunction checks if any dangerous function is called.
Expand Down Expand Up @@ -195,9 +197,13 @@ func (pr *PatternRegistry) matchMissingSanitizer(pattern *Pattern, callGraph *Ca

for _, source := range sourceCalls {
for _, sink := range sinkCalls {
// Skip false positives where source and sink are in the same function
// Check intra-procedural taint flow using on-demand taint analysis
if source.caller == sink.caller {
continue
intraMatch := pr.checkIntraProceduralTaint(source, sink, callGraph, pattern)
if intraMatch != nil {
return intraMatch // Vulnerability found!
}
continue // No taint flow, skip
}

path := pr.findPath(source.caller, sink.caller, callGraph)
Expand All @@ -218,12 +224,13 @@ func (pr *PatternRegistry) matchMissingSanitizer(pattern *Pattern, callGraph *Ca
}
if !hasSanitizer {
return &PatternMatchDetails{
Matched: true,
SourceFQN: source.caller,
SourceCall: source.target,
SinkFQN: sink.caller,
SinkCall: sink.target,
DataFlowPath: path,
Matched: true,
IsIntraProcedural: false, // Explicit flag for inter-procedural
SourceFQN: source.caller,
SourceCall: source.target,
SinkFQN: sink.caller,
SinkCall: sink.target,
DataFlowPath: path,
}
}
}
Expand Down Expand Up @@ -350,32 +357,119 @@ func sortCallInfo(calls []callInfo) {
// - "request.GET.get" matches pattern "request.GET" (prefix match for sources)
// - "vulnerable_app.eval" matches pattern "eval" (last component match)
func matchesFunctionName(fqn, pattern string) bool {
// Strip everything after ( from fqn if present (e.g., "input(...)" -> "input")
cleanFqn := fqn
if idx := strings.Index(fqn, "("); idx >= 0 {
cleanFqn = fqn[:idx]
}

// Exact match: "eval" == "eval"
if fqn == pattern {
if cleanFqn == pattern {
return true
}

// Suffix match: "builtins.eval" ends with ".eval"
if strings.HasSuffix(fqn, "."+pattern) {
if strings.HasSuffix(cleanFqn, "."+pattern) {
return true
}

// Prefix match: "request.GET.get" starts with "request.GET."
// This handles attribute access chains for sources
if strings.HasPrefix(fqn, pattern+".") {
if strings.HasPrefix(cleanFqn, pattern+".") {
return true
}

// Extract last component after last dot and compare
// This handles cases like "vulnerable_app.eval" → "eval"
// but avoids matching "executor" against "exec"
lastDot := strings.LastIndex(fqn, ".")
if lastDot >= 0 && lastDot < len(fqn)-1 {
lastComponent := fqn[lastDot+1:]
lastDot := strings.LastIndex(cleanFqn, ".")
if lastDot >= 0 && lastDot < len(cleanFqn)-1 {
lastComponent := cleanFqn[lastDot+1:]
if lastComponent == pattern {
return true
}
}

return false
}

// checkIntraProceduralTaint checks if source and sink in same function have taint flow.
// Uses on-demand taint analysis with pattern-specific sources/sinks to verify actual data flow.
// Returns non-nil PatternMatchDetails if vulnerable, nil otherwise.
func (pr *PatternRegistry) checkIntraProceduralTaint(
source callInfo,
sink callInfo,
callGraph *CallGraph,
pattern *Pattern,
) *PatternMatchDetails {
functionFQN := source.caller // Same as sink.caller by precondition

// Get the function node
funcNode, ok := callGraph.Functions[functionFQN]
if !ok {
log.Printf("Function %s not found in call graph", functionFQN)
return nil
}

// Read the source file
sourceCode, err := readFileBytes(funcNode.File)
if err != nil {
log.Printf("Failed to read file %s: %v", funcNode.File, err)
return nil
}

// Parse the file to get AST
tree, err := ParsePythonFile(sourceCode)
if err != nil {
log.Printf("Failed to parse file %s: %v", funcNode.File, err)
return nil
}
defer tree.Close()

// Find the function node at the line number
functionNode := findFunctionAtLine(tree.RootNode(), funcNode.LineNumber)
if functionNode == nil {
log.Printf("Could not find function at line %d in %s", funcNode.LineNumber, funcNode.File)
return nil
}

// Extract statements from the function
statements, err := ExtractStatements(funcNode.File, sourceCode, functionNode)
if err != nil {
log.Printf("Failed to extract statements from %s: %v", functionFQN, err)
return nil
}

// Build def-use chains
defUseChain := BuildDefUseChains(statements)

// Run taint analysis with pattern-specific sources/sinks
summary := AnalyzeIntraProceduralTaint(
functionFQN,
statements,
defUseChain,
pattern.Sources, // Use pattern's sources
pattern.Sinks, // Use pattern's sinks
pattern.Sanitizers, // Use pattern's sanitizers
)

// Check if taint analysis found vulnerabilities
if !summary.HasDetections() {
return nil // No taint flow detected
}

// ✅ Vulnerability confirmed via taint analysis!
log.Printf("Intra-procedural vulnerability detected in %s: %d detection(s)",
functionFQN, summary.GetDetectionCount())

// Build match details
return &PatternMatchDetails{
Matched: true,
IsIntraProcedural: true,
SourceFQN: functionFQN,
SourceCall: source.target,
SinkFQN: functionFQN,
SinkCall: sink.target,
DataFlowPath: []string{functionFQN}, // Single function in path
}
}
Loading
Loading