diff --git a/sourcecode-parser/graph/callgraph/cfg.go b/sourcecode-parser/graph/callgraph/cfg.go new file mode 100644 index 00000000..63efa821 --- /dev/null +++ b/sourcecode-parser/graph/callgraph/cfg.go @@ -0,0 +1,364 @@ +package callgraph + +// BlockType represents the type of basic block in a control flow graph. +// Different block types enable different security analysis patterns. +type BlockType string + +const ( + // BlockTypeEntry represents the entry point of a function. + // Every function has exactly one entry block. + BlockTypeEntry BlockType = "entry" + + // BlockTypeExit represents the exit point of a function. + // Every function has exactly one exit block where all return paths converge. + BlockTypeExit BlockType = "exit" + + // BlockTypeNormal represents a regular basic block with sequential execution. + // Contains straight-line code with no branches. + BlockTypeNormal BlockType = "normal" + + // BlockTypeConditional represents a conditional branch block. + // Has multiple successor blocks (true/false branches). + // Examples: if statements, ternary operators, short-circuit logic. + BlockTypeConditional BlockType = "conditional" + + // BlockTypeLoop represents a loop header block. + // Has back-edges for loop iteration. + // Examples: while loops, for loops, do-while loops. + BlockTypeLoop BlockType = "loop" + + // BlockTypeSwitch represents a switch/match statement block. + // Has multiple successor blocks (one per case). + BlockTypeSwitch BlockType = "switch" + + // BlockTypeTry represents a try block in exception handling. + // Has normal successor and exception handler successors. + BlockTypeTry BlockType = "try" + + // BlockTypeCatch represents a catch/except block in exception handling. + // Handles exceptions from try blocks. + BlockTypeCatch BlockType = "catch" + + // BlockTypeFinally represents a finally block in exception handling. + // Always executes regardless of exceptions. + BlockTypeFinally BlockType = "finally" +) + +// BasicBlock represents a basic block in a control flow graph. +// A basic block is a maximal sequence of instructions with: +// - Single entry point (at the beginning) +// - Single exit point (at the end) +// - No internal branches +// +// Basic blocks are the nodes in a CFG, connected by edges representing +// control flow between blocks. +type BasicBlock struct { + // ID uniquely identifies this block within the CFG + ID string + + // Type categorizes the block for analysis purposes + Type BlockType + + // StartLine is the first line of code in this block (1-indexed) + StartLine int + + // EndLine is the last line of code in this block (1-indexed) + EndLine int + + // Instructions contains the call sites within this block. + // Call sites represent function/method invocations that occur + // during execution of this block. + Instructions []CallSite + + // Successors are the blocks that can execute after this block. + // For normal blocks: single successor + // For conditional blocks: two successors (true/false branches) + // For switch blocks: multiple successors (one per case) + // For exit blocks: empty (no successors) + Successors []string + + // Predecessors are the blocks that can execute before this block. + // Used for backward analysis and dominance calculations. + Predecessors []string + + // Condition stores the condition expression for conditional blocks. + // Empty for non-conditional blocks. + // Examples: "x > 0", "user.is_admin()", "data is not None" + Condition string + + // Dominators are the blocks that always execute before this block + // on any path from entry. Used for security analysis to determine + // if sanitization always occurs before usage. + Dominators []string +} + +// ControlFlowGraph represents the control flow graph of a function. +// A CFG models all possible execution paths through a function, enabling +// data flow and taint analysis for security vulnerabilities. +// +// Example: +// +// def process_user(user_id): +// user = get_user(user_id) # Block 1 (entry) +// if user.is_admin(): # Block 2 (conditional) +// grant_access() # Block 3 (true branch) +// else: +// deny_access() # Block 4 (false branch) +// log_action(user) # Block 5 (merge point) +// return # Block 6 (exit) +// +// CFG Structure: +// +// Entry → Block1 → Block2 → Block3 → Block5 → Exit +// → Block4 ↗ +type ControlFlowGraph struct { + // FunctionFQN is the fully qualified name of the function this CFG represents + FunctionFQN string + + // Blocks maps block IDs to BasicBlock objects + Blocks map[string]*BasicBlock + + // EntryBlockID identifies the entry block + EntryBlockID string + + // ExitBlockID identifies the exit block + ExitBlockID string + + // CallGraph reference for resolving inter-procedural flows + CallGraph *CallGraph +} + +// NewControlFlowGraph creates and initializes a new CFG for a function. +func NewControlFlowGraph(functionFQN string) *ControlFlowGraph { + cfg := &ControlFlowGraph{ + FunctionFQN: functionFQN, + Blocks: make(map[string]*BasicBlock), + } + + // Create entry and exit blocks + entryBlock := &BasicBlock{ + ID: functionFQN + ":entry", + Type: BlockTypeEntry, + Successors: []string{}, + Predecessors: []string{}, + Instructions: []CallSite{}, + } + + exitBlock := &BasicBlock{ + ID: functionFQN + ":exit", + Type: BlockTypeExit, + Successors: []string{}, + Predecessors: []string{}, + Instructions: []CallSite{}, + } + + cfg.Blocks[entryBlock.ID] = entryBlock + cfg.Blocks[exitBlock.ID] = exitBlock + cfg.EntryBlockID = entryBlock.ID + cfg.ExitBlockID = exitBlock.ID + + return cfg +} + +// AddBlock adds a basic block to the CFG. +func (cfg *ControlFlowGraph) AddBlock(block *BasicBlock) { + cfg.Blocks[block.ID] = block +} + +// AddEdge adds a control flow edge from one block to another. +// Automatically updates both successors and predecessors. +func (cfg *ControlFlowGraph) AddEdge(fromBlockID, toBlockID string) { + fromBlock, fromExists := cfg.Blocks[fromBlockID] + toBlock, toExists := cfg.Blocks[toBlockID] + + if !fromExists || !toExists { + return + } + + // Add to successors if not already present + if !containsString(fromBlock.Successors, toBlockID) { + fromBlock.Successors = append(fromBlock.Successors, toBlockID) + } + + // Add to predecessors if not already present + if !containsString(toBlock.Predecessors, fromBlockID) { + toBlock.Predecessors = append(toBlock.Predecessors, fromBlockID) + } +} + +// GetBlock retrieves a block by ID. +func (cfg *ControlFlowGraph) GetBlock(blockID string) (*BasicBlock, bool) { + block, exists := cfg.Blocks[blockID] + return block, exists +} + +// GetSuccessors returns the successor blocks of a given block. +func (cfg *ControlFlowGraph) GetSuccessors(blockID string) []*BasicBlock { + block, exists := cfg.Blocks[blockID] + if !exists { + return nil + } + + successors := make([]*BasicBlock, 0, len(block.Successors)) + for _, succID := range block.Successors { + if succBlock, ok := cfg.Blocks[succID]; ok { + successors = append(successors, succBlock) + } + } + return successors +} + +// GetPredecessors returns the predecessor blocks of a given block. +func (cfg *ControlFlowGraph) GetPredecessors(blockID string) []*BasicBlock { + block, exists := cfg.Blocks[blockID] + if !exists { + return nil + } + + predecessors := make([]*BasicBlock, 0, len(block.Predecessors)) + for _, predID := range block.Predecessors { + if predBlock, ok := cfg.Blocks[predID]; ok { + predecessors = append(predecessors, predBlock) + } + } + return predecessors +} + +// ComputeDominators calculates dominator sets for all blocks. +// A block X dominates block Y if every path from entry to Y must go through X. +// This is essential for determining if sanitization always occurs before usage. +// +// Algorithm: Iterative data flow analysis +// 1. Initialize: Entry dominates only itself, all others dominated by all blocks +// 2. Iterate until fixed point: +// For each block B (except entry): +// Dom(B) = {B} ∪ (intersection of Dom(P) for all predecessors P of B) +func (cfg *ControlFlowGraph) ComputeDominators() { + // Initialize dominator sets + allBlockIDs := make([]string, 0, len(cfg.Blocks)) + for blockID := range cfg.Blocks { + allBlockIDs = append(allBlockIDs, blockID) + } + + // Entry block dominates only itself + entryBlock := cfg.Blocks[cfg.EntryBlockID] + entryBlock.Dominators = []string{cfg.EntryBlockID} + + // All other blocks initially dominated by all blocks + for blockID, block := range cfg.Blocks { + if blockID != cfg.EntryBlockID { + block.Dominators = append([]string{}, allBlockIDs...) + } + } + + // Iterate until no changes + changed := true + for changed { + changed = false + + for blockID, block := range cfg.Blocks { + if blockID == cfg.EntryBlockID { + continue + } + + // Compute intersection of predecessors' dominators + var newDominators []string + if len(block.Predecessors) > 0 { + // Start with first predecessor's dominators + firstPred := cfg.Blocks[block.Predecessors[0]] + newDominators = append([]string{}, firstPred.Dominators...) + + // Intersect with other predecessors + for i := 1; i < len(block.Predecessors); i++ { + pred := cfg.Blocks[block.Predecessors[i]] + newDominators = intersect(newDominators, pred.Dominators) + } + } + + // Add block itself to dominator set + if !containsString(newDominators, blockID) { + newDominators = append(newDominators, blockID) + } + + // Check if dominators changed + if !slicesEqual(block.Dominators, newDominators) { + block.Dominators = newDominators + changed = true + } + } + } +} + +// IsDominator returns true if dominator dominates dominated. +// Used to check if sanitization (in dominator) always occurs before usage (in dominated). +func (cfg *ControlFlowGraph) IsDominator(dominator, dominated string) bool { + block, exists := cfg.Blocks[dominated] + if !exists { + return false + } + return containsString(block.Dominators, dominator) +} + +// GetAllPaths returns all execution paths from entry to exit. +// Used for exhaustive security analysis. +// WARNING: Can be exponential in size for complex CFGs with loops. +func (cfg *ControlFlowGraph) GetAllPaths() [][]string { + var paths [][]string + var currentPath []string + visited := make(map[string]bool) + + cfg.dfsAllPaths(cfg.EntryBlockID, currentPath, visited, &paths) + return paths +} + +// dfsAllPaths performs depth-first search to enumerate all paths. +func (cfg *ControlFlowGraph) dfsAllPaths(blockID string, currentPath []string, visited map[string]bool, paths *[][]string) { + // Avoid infinite loops in cyclic CFGs + if visited[blockID] { + return + } + + // Add current block to path + currentPath = append(currentPath, blockID) + visited[blockID] = true + + // If we reached exit, save this path + if blockID == cfg.ExitBlockID { + pathCopy := make([]string, len(currentPath)) + copy(pathCopy, currentPath) + *paths = append(*paths, pathCopy) + } else { + // Recurse on successors + block := cfg.Blocks[blockID] + for _, succID := range block.Successors { + cfg.dfsAllPaths(succID, currentPath, visited, paths) + } + } + + // Backtrack + visited[blockID] = false +} + +// Helper function to compute intersection of two string slices. +func intersect(a, b []string) []string { + result := []string{} + for _, item := range a { + if containsString(b, item) { + result = append(result, item) + } + } + return result +} + +// Helper function to check if two string slices are equal. +func slicesEqual(a, b []string) bool { + if len(a) != len(b) { + return false + } + for i := range a { + if a[i] != b[i] { + return false + } + } + return true +} diff --git a/sourcecode-parser/graph/callgraph/cfg_test.go b/sourcecode-parser/graph/callgraph/cfg_test.go new file mode 100644 index 00000000..167e8b69 --- /dev/null +++ b/sourcecode-parser/graph/callgraph/cfg_test.go @@ -0,0 +1,563 @@ +package callgraph + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestNewControlFlowGraph(t *testing.T) { + cfg := NewControlFlowGraph("myapp.views.get_user") + + assert.NotNil(t, cfg) + assert.Equal(t, "myapp.views.get_user", cfg.FunctionFQN) + assert.NotNil(t, cfg.Blocks) + assert.Len(t, cfg.Blocks, 2) // Entry and exit blocks + + // Verify entry block + entryBlock, exists := cfg.Blocks[cfg.EntryBlockID] + require.True(t, exists) + assert.Equal(t, BlockTypeEntry, entryBlock.Type) + assert.Equal(t, "myapp.views.get_user:entry", entryBlock.ID) + + // Verify exit block + exitBlock, exists := cfg.Blocks[cfg.ExitBlockID] + require.True(t, exists) + assert.Equal(t, BlockTypeExit, exitBlock.Type) + assert.Equal(t, "myapp.views.get_user:exit", exitBlock.ID) +} + +func TestBasicBlock_Creation(t *testing.T) { + block := &BasicBlock{ + ID: "block1", + Type: BlockTypeNormal, + StartLine: 10, + EndLine: 15, + Instructions: []CallSite{}, + Successors: []string{"block2"}, + Predecessors: []string{"entry"}, + } + + assert.Equal(t, "block1", block.ID) + assert.Equal(t, BlockTypeNormal, block.Type) + assert.Equal(t, 10, block.StartLine) + assert.Equal(t, 15, block.EndLine) + assert.Len(t, block.Successors, 1) + assert.Len(t, block.Predecessors, 1) +} + +func TestCFG_AddBlock(t *testing.T) { + cfg := NewControlFlowGraph("myapp.test") + + block := &BasicBlock{ + ID: "block1", + Type: BlockTypeNormal, + } + + cfg.AddBlock(block) + + assert.Len(t, cfg.Blocks, 3) // Entry, exit, and new block + retrievedBlock, exists := cfg.GetBlock("block1") + assert.True(t, exists) + assert.Equal(t, block, retrievedBlock) +} + +func TestCFG_AddEdge(t *testing.T) { + cfg := NewControlFlowGraph("myapp.test") + + block1 := &BasicBlock{ID: "block1", Type: BlockTypeNormal, Successors: []string{}, Predecessors: []string{}} + block2 := &BasicBlock{ID: "block2", Type: BlockTypeNormal, Successors: []string{}, Predecessors: []string{}} + + cfg.AddBlock(block1) + cfg.AddBlock(block2) + + cfg.AddEdge("block1", "block2") + + // Verify successors + assert.Contains(t, block1.Successors, "block2") + + // Verify predecessors + assert.Contains(t, block2.Predecessors, "block1") +} + +func TestCFG_AddEdge_Duplicate(t *testing.T) { + cfg := NewControlFlowGraph("myapp.test") + + block1 := &BasicBlock{ID: "block1", Type: BlockTypeNormal, Successors: []string{}, Predecessors: []string{}} + block2 := &BasicBlock{ID: "block2", Type: BlockTypeNormal, Successors: []string{}, Predecessors: []string{}} + + cfg.AddBlock(block1) + cfg.AddBlock(block2) + + // Add edge twice + cfg.AddEdge("block1", "block2") + cfg.AddEdge("block1", "block2") + + // Should only appear once + assert.Len(t, block1.Successors, 1) + assert.Len(t, block2.Predecessors, 1) +} + +func TestCFG_AddEdge_NonExistentBlocks(t *testing.T) { + cfg := NewControlFlowGraph("myapp.test") + + // Try to add edge between non-existent blocks + cfg.AddEdge("nonexistent1", "nonexistent2") + + // Should not crash, just silently ignore + assert.Len(t, cfg.Blocks, 2) // Only entry and exit +} + +func TestCFG_GetBlock(t *testing.T) { + cfg := NewControlFlowGraph("myapp.test") + + block := &BasicBlock{ID: "block1", Type: BlockTypeNormal} + cfg.AddBlock(block) + + // Existing block + retrieved, exists := cfg.GetBlock("block1") + assert.True(t, exists) + assert.Equal(t, block, retrieved) + + // Non-existent block + _, exists = cfg.GetBlock("nonexistent") + assert.False(t, exists) +} + +func TestCFG_GetSuccessors(t *testing.T) { + cfg := NewControlFlowGraph("myapp.test") + + block1 := &BasicBlock{ID: "block1", Type: BlockTypeConditional, Successors: []string{}, Predecessors: []string{}} + block2 := &BasicBlock{ID: "block2", Type: BlockTypeNormal, Successors: []string{}, Predecessors: []string{}} + block3 := &BasicBlock{ID: "block3", Type: BlockTypeNormal, Successors: []string{}, Predecessors: []string{}} + + cfg.AddBlock(block1) + cfg.AddBlock(block2) + cfg.AddBlock(block3) + + cfg.AddEdge("block1", "block2") + cfg.AddEdge("block1", "block3") + + successors := cfg.GetSuccessors("block1") + assert.Len(t, successors, 2) + + successorIDs := []string{successors[0].ID, successors[1].ID} + assert.Contains(t, successorIDs, "block2") + assert.Contains(t, successorIDs, "block3") +} + +func TestCFG_GetPredecessors(t *testing.T) { + cfg := NewControlFlowGraph("myapp.test") + + block1 := &BasicBlock{ID: "block1", Type: BlockTypeNormal, Successors: []string{}, Predecessors: []string{}} + block2 := &BasicBlock{ID: "block2", Type: BlockTypeNormal, Successors: []string{}, Predecessors: []string{}} + block3 := &BasicBlock{ID: "block3", Type: BlockTypeNormal, Successors: []string{}, Predecessors: []string{}} + + cfg.AddBlock(block1) + cfg.AddBlock(block2) + cfg.AddBlock(block3) + + cfg.AddEdge("block1", "block3") + cfg.AddEdge("block2", "block3") + + predecessors := cfg.GetPredecessors("block3") + assert.Len(t, predecessors, 2) + + predecessorIDs := []string{predecessors[0].ID, predecessors[1].ID} + assert.Contains(t, predecessorIDs, "block1") + assert.Contains(t, predecessorIDs, "block2") +} + +func TestCFG_ComputeDominators_Linear(t *testing.T) { + // Test linear CFG: Entry → Block1 → Block2 → Exit + cfg := NewControlFlowGraph("myapp.test") + + block1 := &BasicBlock{ID: "block1", Type: BlockTypeNormal, Successors: []string{}, Predecessors: []string{}} + block2 := &BasicBlock{ID: "block2", Type: BlockTypeNormal, Successors: []string{}, Predecessors: []string{}} + + cfg.AddBlock(block1) + cfg.AddBlock(block2) + + cfg.AddEdge(cfg.EntryBlockID, "block1") + cfg.AddEdge("block1", "block2") + cfg.AddEdge("block2", cfg.ExitBlockID) + + cfg.ComputeDominators() + + // Entry dominates itself + assert.Contains(t, cfg.Blocks[cfg.EntryBlockID].Dominators, cfg.EntryBlockID) + assert.Len(t, cfg.Blocks[cfg.EntryBlockID].Dominators, 1) + + // Block1 dominated by entry and itself + assert.Contains(t, block1.Dominators, cfg.EntryBlockID) + assert.Contains(t, block1.Dominators, "block1") + + // Block2 dominated by entry, block1, and itself + assert.Contains(t, block2.Dominators, cfg.EntryBlockID) + assert.Contains(t, block2.Dominators, "block1") + assert.Contains(t, block2.Dominators, "block2") + + // Exit dominated by all blocks + assert.Contains(t, cfg.Blocks[cfg.ExitBlockID].Dominators, cfg.EntryBlockID) + assert.Contains(t, cfg.Blocks[cfg.ExitBlockID].Dominators, "block1") + assert.Contains(t, cfg.Blocks[cfg.ExitBlockID].Dominators, "block2") + assert.Contains(t, cfg.Blocks[cfg.ExitBlockID].Dominators, cfg.ExitBlockID) +} + +func TestCFG_ComputeDominators_Branch(t *testing.T) { + // Test branching CFG: + // Entry → Block1 → Block2 → Block4 → Exit + // → Block3 ↗ + cfg := NewControlFlowGraph("myapp.test") + + block1 := &BasicBlock{ID: "block1", Type: BlockTypeConditional, Successors: []string{}, Predecessors: []string{}} + block2 := &BasicBlock{ID: "block2", Type: BlockTypeNormal, Successors: []string{}, Predecessors: []string{}} + block3 := &BasicBlock{ID: "block3", Type: BlockTypeNormal, Successors: []string{}, Predecessors: []string{}} + block4 := &BasicBlock{ID: "block4", Type: BlockTypeNormal, Successors: []string{}, Predecessors: []string{}} + + cfg.AddBlock(block1) + cfg.AddBlock(block2) + cfg.AddBlock(block3) + cfg.AddBlock(block4) + + cfg.AddEdge(cfg.EntryBlockID, "block1") + cfg.AddEdge("block1", "block2") + cfg.AddEdge("block1", "block3") + cfg.AddEdge("block2", "block4") + cfg.AddEdge("block3", "block4") + cfg.AddEdge("block4", cfg.ExitBlockID) + + cfg.ComputeDominators() + + // Block1 dominates block2 and block3 + assert.Contains(t, block2.Dominators, "block1") + assert.Contains(t, block3.Dominators, "block1") + + // Block4 dominated by entry, block1, and itself (NOT by block2 or block3) + assert.Contains(t, block4.Dominators, cfg.EntryBlockID) + assert.Contains(t, block4.Dominators, "block1") + assert.Contains(t, block4.Dominators, "block4") + // Block4 should NOT be dominated by block2 or block3 (can reach via either path) + assert.NotContains(t, block4.Dominators, "block2") + assert.NotContains(t, block4.Dominators, "block3") +} + +func TestCFG_IsDominator(t *testing.T) { + // Linear CFG: Entry → Block1 → Block2 → Exit + cfg := NewControlFlowGraph("myapp.test") + + block1 := &BasicBlock{ID: "block1", Type: BlockTypeNormal, Successors: []string{}, Predecessors: []string{}} + block2 := &BasicBlock{ID: "block2", Type: BlockTypeNormal, Successors: []string{}, Predecessors: []string{}} + + cfg.AddBlock(block1) + cfg.AddBlock(block2) + + cfg.AddEdge(cfg.EntryBlockID, "block1") + cfg.AddEdge("block1", "block2") + cfg.AddEdge("block2", cfg.ExitBlockID) + + cfg.ComputeDominators() + + // Block1 dominates block2 + assert.True(t, cfg.IsDominator("block1", "block2")) + + // Entry dominates block1 + assert.True(t, cfg.IsDominator(cfg.EntryBlockID, "block1")) + + // Block2 does NOT dominate block1 + assert.False(t, cfg.IsDominator("block2", "block1")) +} + +func TestCFG_GetAllPaths_Linear(t *testing.T) { + // Linear CFG: Entry → Block1 → Exit + cfg := NewControlFlowGraph("myapp.test") + + block1 := &BasicBlock{ID: "block1", Type: BlockTypeNormal, Successors: []string{}, Predecessors: []string{}} + cfg.AddBlock(block1) + + cfg.AddEdge(cfg.EntryBlockID, "block1") + cfg.AddEdge("block1", cfg.ExitBlockID) + + paths := cfg.GetAllPaths() + + require.Len(t, paths, 1) + assert.Equal(t, []string{cfg.EntryBlockID, "block1", cfg.ExitBlockID}, paths[0]) +} + +func TestCFG_GetAllPaths_Branch(t *testing.T) { + // Branching CFG: + // Entry → Block1 → Block2 → Exit + // → Block3 ↗ + cfg := NewControlFlowGraph("myapp.test") + + block1 := &BasicBlock{ID: "block1", Type: BlockTypeConditional, Successors: []string{}, Predecessors: []string{}} + block2 := &BasicBlock{ID: "block2", Type: BlockTypeNormal, Successors: []string{}, Predecessors: []string{}} + block3 := &BasicBlock{ID: "block3", Type: BlockTypeNormal, Successors: []string{}, Predecessors: []string{}} + + cfg.AddBlock(block1) + cfg.AddBlock(block2) + cfg.AddBlock(block3) + + cfg.AddEdge(cfg.EntryBlockID, "block1") + cfg.AddEdge("block1", "block2") + cfg.AddEdge("block1", "block3") + cfg.AddEdge("block2", cfg.ExitBlockID) + cfg.AddEdge("block3", cfg.ExitBlockID) + + paths := cfg.GetAllPaths() + + require.Len(t, paths, 2) + + // Convert paths to comparable format + path1 := []string{cfg.EntryBlockID, "block1", "block2", cfg.ExitBlockID} + path2 := []string{cfg.EntryBlockID, "block1", "block3", cfg.ExitBlockID} + + assert.Contains(t, paths, path1) + assert.Contains(t, paths, path2) +} + +func TestBlockType_Constants(t *testing.T) { + assert.Equal(t, BlockType("entry"), BlockTypeEntry) + assert.Equal(t, BlockType("exit"), BlockTypeExit) + assert.Equal(t, BlockType("normal"), BlockTypeNormal) + assert.Equal(t, BlockType("conditional"), BlockTypeConditional) + assert.Equal(t, BlockType("loop"), BlockTypeLoop) + assert.Equal(t, BlockType("switch"), BlockTypeSwitch) + assert.Equal(t, BlockType("try"), BlockTypeTry) + assert.Equal(t, BlockType("catch"), BlockTypeCatch) + assert.Equal(t, BlockType("finally"), BlockTypeFinally) +} + +func TestBasicBlock_WithInstructions(t *testing.T) { + callSite := CallSite{ + Target: "sanitize", + Location: Location{ + File: "/test/file.py", + Line: 10, + Column: 5, + }, + Arguments: []Argument{ + {Value: "data", IsVariable: true, Position: 0}, + }, + Resolved: true, + TargetFQN: "myapp.utils.sanitize", + } + + block := &BasicBlock{ + ID: "block1", + Type: BlockTypeNormal, + StartLine: 10, + EndLine: 12, + Instructions: []CallSite{callSite}, + } + + assert.Len(t, block.Instructions, 1) + assert.Equal(t, "sanitize", block.Instructions[0].Target) + assert.Equal(t, "myapp.utils.sanitize", block.Instructions[0].TargetFQN) +} + +func TestBasicBlock_ConditionalWithCondition(t *testing.T) { + block := &BasicBlock{ + ID: "block1", + Type: BlockTypeConditional, + Condition: "user.is_admin()", + Successors: []string{"true_branch", "false_branch"}, + } + + assert.Equal(t, BlockTypeConditional, block.Type) + assert.Equal(t, "user.is_admin()", block.Condition) + assert.Len(t, block.Successors, 2) +} + +func TestIntersect(t *testing.T) { + tests := []struct { + name string + a []string + b []string + expected []string + }{ + { + name: "Common elements", + a: []string{"a", "b", "c"}, + b: []string{"b", "c", "d"}, + expected: []string{"b", "c"}, + }, + { + name: "No common elements", + a: []string{"a", "b"}, + b: []string{"c", "d"}, + expected: []string{}, + }, + { + name: "One empty slice", + a: []string{"a", "b"}, + b: []string{}, + expected: []string{}, + }, + { + name: "Identical slices", + a: []string{"a", "b", "c"}, + b: []string{"a", "b", "c"}, + expected: []string{"a", "b", "c"}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := intersect(tt.a, tt.b) + assert.Equal(t, tt.expected, result) + }) + } +} + +func TestSlicesEqual(t *testing.T) { + tests := []struct { + name string + a []string + b []string + expected bool + }{ + { + name: "Equal slices", + a: []string{"a", "b", "c"}, + b: []string{"a", "b", "c"}, + expected: true, + }, + { + name: "Different length", + a: []string{"a", "b"}, + b: []string{"a", "b", "c"}, + expected: false, + }, + { + name: "Different order", + a: []string{"a", "b", "c"}, + b: []string{"a", "c", "b"}, + expected: false, + }, + { + name: "Empty slices", + a: []string{}, + b: []string{}, + expected: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := slicesEqual(tt.a, tt.b) + assert.Equal(t, tt.expected, result) + }) + } +} + +func TestCFG_ComplexExample(t *testing.T) { + // Test a more realistic CFG structure representing: + // def process_user(user_id): + // user = get_user(user_id) # Block 1 + // if user.is_admin(): # Block 2 (conditional) + // grant_access() # Block 3 (true branch) + // else: + // deny_access() # Block 4 (false branch) + // log_action(user) # Block 5 (merge point) + // return # Exit + + cfg := NewControlFlowGraph("myapp.process_user") + + block1 := &BasicBlock{ + ID: "block1", + Type: BlockTypeNormal, + StartLine: 2, + EndLine: 2, + Instructions: []CallSite{ + {Target: "get_user", TargetFQN: "myapp.db.get_user"}, + }, + Successors: []string{}, + Predecessors: []string{}, + } + + block2 := &BasicBlock{ + ID: "block2", + Type: BlockTypeConditional, + StartLine: 3, + EndLine: 3, + Condition: "user.is_admin()", + Successors: []string{}, + Predecessors: []string{}, + } + + block3 := &BasicBlock{ + ID: "block3", + Type: BlockTypeNormal, + StartLine: 4, + EndLine: 4, + Instructions: []CallSite{ + {Target: "grant_access", TargetFQN: "myapp.auth.grant_access"}, + }, + Successors: []string{}, + Predecessors: []string{}, + } + + block4 := &BasicBlock{ + ID: "block4", + Type: BlockTypeNormal, + StartLine: 6, + EndLine: 6, + Instructions: []CallSite{ + {Target: "deny_access", TargetFQN: "myapp.auth.deny_access"}, + }, + Successors: []string{}, + Predecessors: []string{}, + } + + block5 := &BasicBlock{ + ID: "block5", + Type: BlockTypeNormal, + StartLine: 7, + EndLine: 7, + Instructions: []CallSite{ + {Target: "log_action", TargetFQN: "myapp.logging.log_action"}, + }, + Successors: []string{}, + Predecessors: []string{}, + } + + cfg.AddBlock(block1) + cfg.AddBlock(block2) + cfg.AddBlock(block3) + cfg.AddBlock(block4) + cfg.AddBlock(block5) + + // Build edges + cfg.AddEdge(cfg.EntryBlockID, "block1") + cfg.AddEdge("block1", "block2") + cfg.AddEdge("block2", "block3") // True branch + cfg.AddEdge("block2", "block4") // False branch + cfg.AddEdge("block3", "block5") // Merge + cfg.AddEdge("block4", "block5") // Merge + cfg.AddEdge("block5", cfg.ExitBlockID) + + // Compute dominators + cfg.ComputeDominators() + + // Verify structure + assert.Len(t, cfg.Blocks, 7) // Entry, 5 blocks, Exit + + // Verify paths + paths := cfg.GetAllPaths() + assert.Len(t, paths, 2) // Two paths (admin and non-admin) + + // Verify dominators + // Block1 should dominate block5 (always executed before block5) + assert.True(t, cfg.IsDominator("block1", "block5")) + + // Block2 should dominate block5 (always executed before block5) + assert.True(t, cfg.IsDominator("block2", "block5")) + + // Block3 should NOT dominate block5 (only on true path) + assert.False(t, cfg.IsDominator("block3", "block5")) + + // Block4 should NOT dominate block5 (only on false path) + assert.False(t, cfg.IsDominator("block4", "block5")) +}