From e3a70b60c4a8995c66cfd1f55cbd29fb1ed44171 Mon Sep 17 00:00:00 2001 From: shivasurya Date: Thu, 23 Oct 2025 23:21:18 -0400 Subject: [PATCH] added performance measure tools and optimized memory usage --- class_memory.csv | 1 + perf_tools/.gitignore | 25 ++ perf_tools/README.md | 226 +++++++++++++++++ perf_tools/fast_monitor.sh | 16 ++ sourcecode-parser/cmd/query.go | 4 +- sourcecode-parser/graph/parser_java.go | 16 +- sourcecode-parser/graph/parser_python.go | 54 +++- sourcecode-parser/graph/parser_statements.go | 20 +- sourcecode-parser/graph/query.go | 244 ++++++++++++++++++- sourcecode-parser/graph/types.go | 30 ++- test_memory.csv | 1 + 11 files changed, 603 insertions(+), 34 deletions(-) create mode 100644 class_memory.csv create mode 100644 perf_tools/.gitignore create mode 100644 perf_tools/README.md create mode 100755 perf_tools/fast_monitor.sh create mode 100644 test_memory.csv diff --git a/class_memory.csv b/class_memory.csv new file mode 100644 index 00000000..2cbab90c --- /dev/null +++ b/class_memory.csv @@ -0,0 +1 @@ +timestamp,rss_mb,vsz_mb diff --git a/perf_tools/.gitignore b/perf_tools/.gitignore new file mode 100644 index 00000000..0c48a39a --- /dev/null +++ b/perf_tools/.gitignore @@ -0,0 +1,25 @@ +# Ignore all benchmark output files +*.csv +*.png +*.log + +# Ignore specific benchmark runs +benchmark.* +*_benchmark.* +*_memory.* +class_* +function_* +test_* + +# Allow example files if we add them later +!example_output.png +!example_data.csv + +# Python cache +__pycache__/ +*.py[cod] +*$py.class + +# OS specific +.DS_Store +Thumbs.db diff --git a/perf_tools/README.md b/perf_tools/README.md new file mode 100644 index 00000000..36fa52d3 --- /dev/null +++ b/perf_tools/README.md @@ -0,0 +1,226 @@ +# Performance Profiling Tools for Code-Pathfinder + +A collection of tools to measure and visualize memory usage and performance of code-pathfinder queries. + +## Quick Start + +### 1. Basic Usage (Easiest) + +```bash +cd perf_tools +./benchmark.sh +``` + +This runs a default benchmark on the SaltStack codebase with a function definition query. + +### 2. Custom Query + +```bash +./benchmark.sh -q "FROM class_definition AS cd SELECT cd" -o class_results +``` + +### 3. Different Project + +```bash +./benchmark.sh -p /path/to/your/project -q "FROM function_definition AS fd SELECT fd" +``` + +## Command Line Options + +``` +Usage: ./benchmark.sh [options] + +Options: + -p, --project DIR Project directory to analyze (default: ~/src/shivasurya/salt) + -q, --query QUERY Query to run (default: 'FROM function_definition AS fd SELECT fd') + -o, --output NAME Output file prefix (default: 'benchmark') + -b, --binary PATH Path to pathfinder binary (default: ../sourcecode-parser/build/go/pathfinder) + -h, --help Show this help message +``` + +## Examples + +### Compare Class vs Function Queries + +```bash +# Run class definition benchmark +./benchmark.sh -q "FROM class_definition AS cd SELECT cd" -o class_benchmark + +# Run function definition benchmark +./benchmark.sh -q "FROM function_definition AS fd SELECT fd" -o function_benchmark + +# Compare the PNG graphs! +open class_benchmark.png function_benchmark.png +``` + +### Test Different Codebases + +```bash +# Test on your own project +./benchmark.sh -p ~/myproject -o myproject_benchmark + +# Test on multiple projects +for proj in project1 project2 project3; do + ./benchmark.sh -p ~/repos/$proj -o ${proj}_benchmark +done +``` + +## Output Files + +Each benchmark run creates 3 files: + +1. **`{name}.csv`** - Raw memory usage data (timestamp, RSS, VSZ) +2. **`{name}.png`** - Memory usage graph with timeline +3. **`{name}.log`** - Query execution log + +Example: +``` +benchmark.csv - Memory data points +benchmark.png - Visual graph +benchmark.log - Execution log +``` + +## Understanding the Results + +### Memory Metrics + +- **RSS (Resident Set Size)**: Actual physical memory used (most important) +- **VSZ (Virtual Memory Size)**: Total virtual memory allocated + +### Graph Interpretation + +``` +Memory Usage Over Time +│ +│ Peak: 2943.6 MB +│ Avg: 2813.4 MB +│ +│ 3000 MB ├─────────────────────── Flat line (good!) +│ │ ╱──────────────── +│ 2000 MB │ ╱ +│ │ ╱ Parsing phase +│ 1000 MB │ ╱ +│ │ ╱ +│ 0 MB └──────────────────────► + 0s 20s 40s 60s 80s +``` + +**Good patterns:** +- ✅ Rapid rise then flat = efficient memory use +- ✅ Stable plateau = no memory leaks + +**Bad patterns:** +- ❌ Continuous rise = possible memory leak +- ❌ Spikes during query = inefficient allocations + +## Requirements + +### Required +- Bash shell +- Built pathfinder binary (run `cd ../sourcecode-parser && gradle buildGo`) + +### Optional +- Python 3 with matplotlib and pandas for graph generation + ```bash + pip3 install matplotlib pandas + ``` + +## Manual Mode (Advanced) + +If you want more control, use the individual scripts: + +### 1. Run Query with Monitoring + +```bash +# Terminal 1: Start query +../sourcecode-parser/build/go/pathfinder query --project ~/salt --query "..." & +PID=$! + +# Terminal 2: Monitor memory +./fast_monitor.sh $PID memory_data.csv +``` + +### 2. Generate Graph + +```bash +python3 plot_memory.py memory_data.csv +# Creates: memory_data.png +``` + +## Scripts Overview + +| Script | Purpose | +|--------|---------| +| `benchmark.sh` | **Main tool** - Easy-to-use wrapper | +| `fast_monitor.sh` | Monitors process memory (100ms sampling) | +| `monitor_memory.sh` | Slower monitoring (500ms sampling) | +| `plot_memory.py` | Generates memory usage graphs | + +## Comparing Optimizations + +To measure the impact of performance optimizations: + +```bash +# Before optimization +git checkout main +cd sourcecode-parser && gradle clean buildGo && cd ../perf_tools +./benchmark.sh -o before_optimization + +# After optimization +git checkout feature-branch +cd sourcecode-parser && gradle clean buildGo && cd ../perf_tools +./benchmark.sh -o after_optimization + +# Compare results +echo "Before: $(grep 'Peak RSS' before_optimization.csv | tail -1)" +echo "After: $(grep 'Peak RSS' after_optimization.csv | tail -1)" +``` + +## Troubleshooting + +### "Pathfinder binary not found" + +Build the binary first: +```bash +cd ../sourcecode-parser +gradle clean buildGo +cd ../perf_tools +``` + +### "Python3 not found" + +The CSV data is still generated. You can: +1. Install Python: `brew install python3` +2. Use the CSV data with your own tools +3. Run without graphs (CSV has all the data) + +### "Project directory not found" + +Specify the correct path: +```bash +./benchmark.sh -p /absolute/path/to/your/project +``` + +## Tips + +1. **Run multiple times**: Results can vary due to system load. Run 3 times and compare. + +2. **Close other apps**: For accurate results, close memory-heavy applications. + +3. **Use full paths**: When in doubt, use absolute paths for `-p` and `-b` options. + +4. **Compare similar queries**: Compare "class vs class" or "function vs function" for fair comparisons. + +## Contributing + +Found a bug or want to improve these tools? The scripts are simple bash/Python: + +- `benchmark.sh` - Main orchestration script +- `fast_monitor.sh` - Memory sampling loop +- `plot_memory.py` - matplotlib graphing + +Feel free to modify and improve! + +## License + +Same as code-pathfinder project (AGPL-3.0). diff --git a/perf_tools/fast_monitor.sh b/perf_tools/fast_monitor.sh new file mode 100755 index 00000000..e5e6a2ad --- /dev/null +++ b/perf_tools/fast_monitor.sh @@ -0,0 +1,16 @@ +#!/bin/bash +PID=$1 +OUTPUT=${2:-memory_usage.csv} + +echo "timestamp,rss_mb,vsz_mb" > $OUTPUT + +while kill -0 $PID 2>/dev/null; do + TIMESTAMP=$(date +%s.%N) + MEM=$(ps -p $PID -o rss=,vsz= 2>/dev/null | awk '{print $1/1024","$2/1024}') + if [ ! -z "$MEM" ]; then + echo "$TIMESTAMP,$MEM" >> $OUTPUT + fi + sleep 0.1 # Sample every 100ms instead of 500ms +done + +echo "Memory monitoring complete. Data saved to $OUTPUT" diff --git a/sourcecode-parser/cmd/query.go b/sourcecode-parser/cmd/query.go index 6f91895c..ec8693f7 100644 --- a/sourcecode-parser/cmd/query.go +++ b/sourcecode-parser/cmd/query.go @@ -139,7 +139,7 @@ func processQuery(input string, codeGraph *graph.CodeGraph, output string) (stri result := make(map[string]interface{}) result["file"] = entityObject.File result["line"] = entityObject.LineNumber - result["code"] = entityObject.CodeSnippet + result["code"] = entityObject.GetCodeSnippet() results["result_set"] = append(results["result_set"].([]map[string]interface{}), result) //nolint:all } @@ -167,7 +167,7 @@ func processQuery(input string, codeGraph *graph.CodeGraph, output string) (stri header += output + "\n" result += header result += "\n" - codeSnippetArray := strings.Split(entityObject.CodeSnippet, "\n") + codeSnippetArray := strings.Split(entityObject.GetCodeSnippet(), "\n") for i := 0; i < len(codeSnippetArray); i++ { lineNumber := color.New(color.FgCyan).SprintfFunc()("%4d", int(entityObject.LineNumber)+i) result += fmt.Sprintf("%s%s %s %s\n", strings.Repeat("\t", 2), lineNumber, verticalLine, yellowCode(codeSnippetArray[i])) diff --git a/sourcecode-parser/graph/parser_java.go b/sourcecode-parser/graph/parser_java.go index b8994be5..1e265f16 100644 --- a/sourcecode-parser/graph/parser_java.go +++ b/sourcecode-parser/graph/parser_java.go @@ -61,7 +61,7 @@ func parseJavaBinaryExpression(node *sitter.Node, sourceCode []byte, graph *Code ID: GenerateSha256(exprType + node.Content(sourceCode)), Type: exprType, Name: node.Content(sourceCode), - CodeSnippet: node.Content(sourceCode), + SourceLocation: &SourceLocation{File: file, StartByte: node.StartByte(), EndByte: node.EndByte()}, LineNumber: node.StartPoint().Row + 1, File: file, isJavaSourceFile: isJavaSourceFile, @@ -73,7 +73,7 @@ func parseJavaBinaryExpression(node *sitter.Node, sourceCode []byte, graph *Code ID: GenerateSha256("binary_expression" + node.Content(sourceCode)), Type: "binary_expression", Name: node.Content(sourceCode), - CodeSnippet: node.Content(sourceCode), + SourceLocation: &SourceLocation{File: file, StartByte: node.StartByte(), EndByte: node.EndByte()}, LineNumber: node.StartPoint().Row + 1, File: file, isJavaSourceFile: isJavaSourceFile, @@ -138,7 +138,7 @@ func parseJavaMethodDeclaration(node *sitter.Node, sourceCode []byte, graph *Cod ID: methodID, Type: "method_declaration", Name: methodName, - CodeSnippet: node.Content(sourceCode), + SourceLocation: &SourceLocation{File: file, StartByte: node.StartByte(), EndByte: node.EndByte()}, LineNumber: node.StartPoint().Row + 1, Modifier: extractVisibilityModifier(modifiers), ReturnType: returnType, @@ -183,7 +183,7 @@ func parseJavaMethodInvocation(node *sitter.Node, sourceCode []byte, graph *Code Type: "method_invocation", Name: methodName, IsExternal: true, - CodeSnippet: node.Content(sourceCode), + SourceLocation: &SourceLocation{File: file, StartByte: node.StartByte(), EndByte: node.EndByte()}, LineNumber: node.StartPoint().Row + 1, MethodArgumentsValue: arguments, File: file, @@ -241,7 +241,7 @@ func parseJavaClassDeclaration(node *sitter.Node, sourceCode []byte, graph *Code ID: GenerateMethodID("class:"+className, []string{}, file), Type: "class_declaration", Name: className, - CodeSnippet: node.Content(sourceCode), + SourceLocation: &SourceLocation{File: file, StartByte: node.StartByte(), EndByte: node.EndByte()}, LineNumber: node.StartPoint().Row + 1, PackageName: packageName, Modifier: extractVisibilityModifier(accessModifier), @@ -264,7 +264,7 @@ func parseJavaBlockComment(node *sitter.Node, sourceCode []byte, graph *CodeGrap commentNode := &Node{ ID: GenerateMethodID(node.Content(sourceCode), []string{}, file), Type: "block_comment", - CodeSnippet: commentContent, + SourceLocation: &SourceLocation{File: file, StartByte: node.StartByte(), EndByte: node.EndByte()}, LineNumber: node.StartPoint().Row + 1, File: file, isJavaSourceFile: true, @@ -316,7 +316,7 @@ func parseJavaVariableDeclaration(node *sitter.Node, sourceCode []byte, graph *C ID: GenerateMethodID(variableName, []string{}, file), Type: "variable_declaration", Name: variableName, - CodeSnippet: node.Content(sourceCode), + SourceLocation: &SourceLocation{File: file, StartByte: node.StartByte(), EndByte: node.EndByte()}, LineNumber: node.StartPoint().Row + 1, Modifier: extractVisibilityModifier(variableModifier), DataType: variableType, @@ -364,7 +364,7 @@ func parseJavaObjectCreation(node *sitter.Node, sourceCode []byte, graph *CodeGr ID: GenerateMethodID(className, []string{strconv.Itoa(int(node.StartPoint().Row + 1))}, file), Type: "ClassInstanceExpr", Name: className, - CodeSnippet: node.Content(sourceCode), + SourceLocation: &SourceLocation{File: file, StartByte: node.StartByte(), EndByte: node.EndByte()}, LineNumber: node.StartPoint().Row + 1, File: file, isJavaSourceFile: true, diff --git a/sourcecode-parser/graph/parser_python.go b/sourcecode-parser/graph/parser_python.go index 6f87906a..b7d8744f 100644 --- a/sourcecode-parser/graph/parser_python.go +++ b/sourcecode-parser/graph/parser_python.go @@ -33,7 +33,11 @@ func parsePythonFunctionDefinition(node *sitter.Node, sourceCode []byte, graph * ID: methodID, Type: "function_definition", Name: functionName, - CodeSnippet: node.Content(sourceCode), + SourceLocation: &SourceLocation{ + File: file, + StartByte: node.StartByte(), + EndByte: node.EndByte(), + }, LineNumber: node.StartPoint().Row + 1, MethodArgumentsValue: parameters, File: file, @@ -68,7 +72,11 @@ func parsePythonClassDefinition(node *sitter.Node, sourceCode []byte, graph *Cod ID: GenerateMethodID("class:"+className, []string{}, file), Type: "class_definition", Name: className, - CodeSnippet: node.Content(sourceCode), + SourceLocation: &SourceLocation{ + File: file, + StartByte: node.StartByte(), + EndByte: node.EndByte(), + }, LineNumber: node.StartPoint().Row + 1, Interface: superClasses, File: file, @@ -102,7 +110,11 @@ func parsePythonCall(node *sitter.Node, sourceCode []byte, graph *CodeGraph, cur Type: "call", Name: callName, IsExternal: true, - CodeSnippet: node.Content(sourceCode), + SourceLocation: &SourceLocation{ + File: file, + StartByte: node.StartByte(), + EndByte: node.EndByte(), + }, LineNumber: node.StartPoint().Row + 1, MethodArgumentsValue: arguments, File: file, @@ -124,7 +136,11 @@ func parsePythonReturnStatement(node *sitter.Node, sourceCode []byte, graph *Cod LineNumber: node.StartPoint().Row + 1, Name: "ReturnStmt", IsExternal: true, - CodeSnippet: node.Content(sourceCode), + SourceLocation: &SourceLocation{ + File: file, + StartByte: node.StartByte(), + EndByte: node.EndByte(), + }, File: file, isPythonSourceFile: true, ReturnStmt: returnNode, @@ -142,7 +158,11 @@ func parsePythonBreakStatement(node *sitter.Node, sourceCode []byte, graph *Code LineNumber: node.StartPoint().Row + 1, Name: "BreakStmt", IsExternal: true, - CodeSnippet: node.Content(sourceCode), + SourceLocation: &SourceLocation{ + File: file, + StartByte: node.StartByte(), + EndByte: node.EndByte(), + }, File: file, isPythonSourceFile: true, BreakStmt: breakNode, @@ -160,7 +180,11 @@ func parsePythonContinueStatement(node *sitter.Node, sourceCode []byte, graph *C LineNumber: node.StartPoint().Row + 1, Name: "ContinueStmt", IsExternal: true, - CodeSnippet: node.Content(sourceCode), + SourceLocation: &SourceLocation{ + File: file, + StartByte: node.StartByte(), + EndByte: node.EndByte(), + }, File: file, isPythonSourceFile: true, ContinueStmt: continueNode, @@ -178,7 +202,11 @@ func parsePythonAssertStatement(node *sitter.Node, sourceCode []byte, graph *Cod LineNumber: node.StartPoint().Row + 1, Name: "AssertStmt", IsExternal: true, - CodeSnippet: node.Content(sourceCode), + SourceLocation: &SourceLocation{ + File: file, + StartByte: node.StartByte(), + EndByte: node.EndByte(), + }, File: file, isPythonSourceFile: true, AssertStmt: assertNode, @@ -200,7 +228,11 @@ func parsePythonYieldExpression(node *sitter.Node, sourceCode []byte, graph *Cod LineNumber: child.StartPoint().Row + 1, Name: "YieldStmt", IsExternal: true, - CodeSnippet: child.Content(sourceCode), + SourceLocation: &SourceLocation{ + File: file, + StartByte: child.StartByte(), + EndByte: child.EndByte(), + }, File: file, isPythonSourceFile: true, YieldStmt: yieldNode, @@ -231,7 +263,11 @@ func parsePythonAssignment(node *sitter.Node, sourceCode []byte, graph *CodeGrap ID: GenerateMethodID(variableName, []string{}, file), Type: "variable_assignment", Name: variableName, - CodeSnippet: node.Content(sourceCode), + SourceLocation: &SourceLocation{ + File: file, + StartByte: node.StartByte(), + EndByte: node.EndByte(), + }, LineNumber: node.StartPoint().Row + 1, VariableValue: variableValue, Scope: "local", diff --git a/sourcecode-parser/graph/parser_statements.go b/sourcecode-parser/graph/parser_statements.go index 467a14d5..305616ce 100644 --- a/sourcecode-parser/graph/parser_statements.go +++ b/sourcecode-parser/graph/parser_statements.go @@ -18,7 +18,7 @@ func parseBlockStatement(node *sitter.Node, sourceCode []byte, graph *CodeGraph, LineNumber: node.StartPoint().Row + 1, Name: "BlockStmt", IsExternal: true, - CodeSnippet: node.Content(sourceCode), + SourceLocation: &SourceLocation{File: file, StartByte: node.StartByte(), EndByte: node.EndByte()}, File: file, isJavaSourceFile: isJavaSourceFile, BlockStmt: blockNode, @@ -39,7 +39,7 @@ func parseReturnStatement(node *sitter.Node, sourceCode []byte, graph *CodeGraph LineNumber: node.StartPoint().Row + 1, Name: "ReturnStmt", IsExternal: true, - CodeSnippet: node.Content(sourceCode), + SourceLocation: &SourceLocation{File: file, StartByte: node.StartByte(), EndByte: node.EndByte()}, File: file, isJavaSourceFile: isJava, ReturnStmt: returnNode, @@ -61,7 +61,7 @@ func parseBreakStatement(node *sitter.Node, sourceCode []byte, graph *CodeGraph, LineNumber: node.StartPoint().Row + 1, Name: "BreakStmt", IsExternal: true, - CodeSnippet: node.Content(sourceCode), + SourceLocation: &SourceLocation{File: file, StartByte: node.StartByte(), EndByte: node.EndByte()}, File: file, isJavaSourceFile: isJava, BreakStmt: breakNode, @@ -83,7 +83,7 @@ func parseContinueStatement(node *sitter.Node, sourceCode []byte, graph *CodeGra LineNumber: node.StartPoint().Row + 1, Name: "ContinueStmt", IsExternal: true, - CodeSnippet: node.Content(sourceCode), + SourceLocation: &SourceLocation{File: file, StartByte: node.StartByte(), EndByte: node.EndByte()}, File: file, isJavaSourceFile: isJava, ContinueStmt: continueNode, @@ -105,7 +105,7 @@ func parseAssertStatement(node *sitter.Node, sourceCode []byte, graph *CodeGraph LineNumber: node.StartPoint().Row + 1, Name: "AssertStmt", IsExternal: true, - CodeSnippet: node.Content(sourceCode), + SourceLocation: &SourceLocation{File: file, StartByte: node.StartByte(), EndByte: node.EndByte()}, File: file, isJavaSourceFile: isJava, AssertStmt: assertNode, @@ -124,7 +124,7 @@ func parseYieldStatement(node *sitter.Node, sourceCode []byte, graph *CodeGraph, LineNumber: node.StartPoint().Row + 1, Name: "YieldStmt", IsExternal: true, - CodeSnippet: node.Content(sourceCode), + SourceLocation: &SourceLocation{File: file, StartByte: node.StartByte(), EndByte: node.EndByte()}, File: file, isJavaSourceFile: isJavaSourceFile, YieldStmt: yieldNode, @@ -154,7 +154,7 @@ func parseIfStatement(node *sitter.Node, sourceCode []byte, graph *CodeGraph, fi Type: "IfStmt", Name: "IfStmt", IsExternal: true, - CodeSnippet: node.Content(sourceCode), + SourceLocation: &SourceLocation{File: file, StartByte: node.StartByte(), EndByte: node.EndByte()}, LineNumber: node.StartPoint().Row + 1, File: file, isJavaSourceFile: isJavaSourceFile, @@ -176,7 +176,7 @@ func parseWhileStatement(node *sitter.Node, sourceCode []byte, graph *CodeGraph, Type: "WhileStmt", Name: "WhileStmt", IsExternal: true, - CodeSnippet: node.Content(sourceCode), + SourceLocation: &SourceLocation{File: file, StartByte: node.StartByte(), EndByte: node.EndByte()}, LineNumber: node.StartPoint().Row + 1, File: file, isJavaSourceFile: isJavaSourceFile, @@ -198,7 +198,7 @@ func parseDoStatement(node *sitter.Node, sourceCode []byte, graph *CodeGraph, fi Type: "DoStmt", Name: "DoStmt", IsExternal: true, - CodeSnippet: node.Content(sourceCode), + SourceLocation: &SourceLocation{File: file, StartByte: node.StartByte(), EndByte: node.EndByte()}, LineNumber: node.StartPoint().Row + 1, File: file, isJavaSourceFile: isJavaSourceFile, @@ -229,7 +229,7 @@ func parseForStatement(node *sitter.Node, sourceCode []byte, graph *CodeGraph, f Type: "ForStmt", Name: "ForStmt", IsExternal: true, - CodeSnippet: node.Content(sourceCode), + SourceLocation: &SourceLocation{File: file, StartByte: node.StartByte(), EndByte: node.EndByte()}, LineNumber: node.StartPoint().Row + 1, File: file, isJavaSourceFile: isJavaSourceFile, diff --git a/sourcecode-parser/graph/query.go b/sourcecode-parser/graph/query.go index fb98d5d7..63a5cf3d 100644 --- a/sourcecode-parser/graph/query.go +++ b/sourcecode-parser/graph/query.go @@ -4,6 +4,7 @@ import ( "fmt" "log" "strings" + "sync" "github.com/expr-lang/expr" "github.com/shivasurya/code-pathfinder/sourcecode-parser/analytics" @@ -159,6 +160,238 @@ func (env *Env) GetBlockStmt() *model.BlockStmt { return env.Node.BlockStmt } +// Pool for small environment maps to reduce allocations. +var envMapPool = sync.Pool{ + New: func() interface{} { + return make(map[string]interface{}, 10) + }, +} + +// getEnvMapFromPool gets a map from the pool and clears it. +func getEnvMapFromPool() map[string]interface{} { + m := envMapPool.Get().(map[string]interface{}) + // Clear the map + for k := range m { + delete(m, k) + } + return m +} + +// returnEnvMapToPool returns a map to the pool. +func returnEnvMapToPool(m map[string]interface{}) { + if m != nil && len(m) < 100 { // Only pool reasonably-sized maps + envMapPool.Put(m) + } +} + +// buildEntityEnv creates the environment map for a specific entity type only. +// This avoids creating all 28 entity type maps when only 1 is needed. +func buildEntityEnv(proxyenv *Env, entityType string, _ string) map[string]interface{} { + switch entityType { + case "method_declaration": + return map[string]interface{}{ + "getVisibility": proxyenv.GetVisibility, + "getAnnotation": proxyenv.GetAnnotations, + "getReturnType": proxyenv.GetReturnType, + "getName": proxyenv.GetName, + "getArgumentType": proxyenv.GetArgumentTypes, + "getArgumentName": proxyenv.GetArgumentNames, + "getThrowsType": proxyenv.GetThrowsTypes, + "getDoc": proxyenv.GetDoc, + "toString": proxyenv.ToString, + } + case "class_declaration": + return map[string]interface{}{ + "getSuperClass": proxyenv.GetSuperClass, + "getName": proxyenv.GetName, + "getAnnotation": proxyenv.GetAnnotations, + "getVisibility": proxyenv.GetVisibility, + "getInterface": proxyenv.GetInterfaces, + "getDoc": proxyenv.GetDoc, + "toString": proxyenv.ToString, + } + case "method_invocation": + return map[string]interface{}{ + "getArgumentName": proxyenv.GetArgumentNames, + "getName": proxyenv.GetName, + "getDoc": proxyenv.GetDoc, + "toString": proxyenv.ToString, + } + case "variable_declaration": + return map[string]interface{}{ + "getName": proxyenv.GetName, + "getVisibility": proxyenv.GetVisibility, + "getVariableValue": proxyenv.GetVariableValue, + "getVariableDataType": proxyenv.GetVariableDataType, + "getScope": proxyenv.GetScope, + "getDoc": proxyenv.GetDoc, + "toString": proxyenv.ToString, + } + case "binary_expression": + return map[string]interface{}{ + "getLeftOperand": proxyenv.GetLeftOperand, + "getRightOperand": proxyenv.GetRightOperand, + "toString": proxyenv.ToString, + } + case "function_definition": + return map[string]interface{}{ + "getName": proxyenv.GetName, + "getArgumentName": proxyenv.GetArgumentNames, + "toString": proxyenv.ToString, + } + case "class_definition": + return map[string]interface{}{ + "getName": proxyenv.GetName, + "getInterface": proxyenv.GetInterfaces, + "toString": proxyenv.ToString, + } + case "add_expression", "sub_expression", "mul_expression", "div_expression": + op := "+" + switch entityType { + case "sub_expression": + op = "-" + case "mul_expression": + op = "*" + case "div_expression": + op = "/" + } + return map[string]interface{}{ + "getBinaryExpr": proxyenv.GetBinaryExpr, + "getOperator": op, + "toString": proxyenv.ToString, + } + case "comparison_expression", "equal_expression": + return map[string]interface{}{ + "getBinaryExpr": proxyenv.GetBinaryExpr, + "getOperator": "==", + "toString": proxyenv.ToString, + } + case "not_equal_expression": + return map[string]interface{}{ + "getBinaryExpr": proxyenv.GetBinaryExpr, + "getOperator": "!=", + "toString": proxyenv.ToString, + } + case "remainder_expression": + return map[string]interface{}{ + "getBinaryExpr": proxyenv.GetBinaryExpr, + "getOperator": "%", + "toString": proxyenv.ToString, + } + case "right_shift_expression": + return map[string]interface{}{ + "getBinaryExpr": proxyenv.GetBinaryExpr, + "getOperator": ">>", + "toString": proxyenv.ToString, + } + case "left_shift_expression": + return map[string]interface{}{ + "getBinaryExpr": proxyenv.GetBinaryExpr, + "getOperator": "<<", + "toString": proxyenv.ToString, + } + case "and_bitwise_expression": + return map[string]interface{}{ + "getBinaryExpr": proxyenv.GetBinaryExpr, + "getOperator": "&", + "toString": proxyenv.ToString, + } + case "and_logical_expression": + return map[string]interface{}{ + "getBinaryExpr": proxyenv.GetBinaryExpr, + "getOperator": "&&", + "toString": proxyenv.ToString, + } + case "or_logical_expression": + return map[string]interface{}{ + "getBinaryExpr": proxyenv.GetBinaryExpr, + "getOperator": "||", + "toString": proxyenv.ToString, + } + case "or_bitwise_expression": + return map[string]interface{}{ + "getBinaryExpr": proxyenv.GetBinaryExpr, + "getOperator": "|", + "toString": proxyenv.ToString, + } + case "unsigned_right_shift_expression": + return map[string]interface{}{ + "getBinaryExpr": proxyenv.GetBinaryExpr, + "getOperator": ">>>", + "toString": proxyenv.ToString, + } + case "xor_bitwise_expression": + return map[string]interface{}{ + "getBinaryExpr": proxyenv.GetBinaryExpr, + "getOperator": "^", + "toString": proxyenv.ToString, + } + case "ClassInstanceExpr": + return map[string]interface{}{ + "getName": proxyenv.GetName, + "getDoc": proxyenv.GetDoc, + "toString": proxyenv.ToString, + "getClassInstanceExpr": proxyenv.GetClassInstanceExpr, + } + case "IfStmt": + return map[string]interface{}{ + "getIfStmt": proxyenv.GetIfStmt, + "toString": proxyenv.ToString, + } + case "WhileStmt": + return map[string]interface{}{ + "getWhileStmt": proxyenv.GetWhileStmt, + "toString": proxyenv.ToString, + } + case "DoStmt": + return map[string]interface{}{ + "getDoStmt": proxyenv.GetDoStmt, + "toString": proxyenv.ToString, + } + case "ForStmt": + return map[string]interface{}{ + "getForStmt": proxyenv.GetForStmt, + "toString": proxyenv.ToString, + } + case "BreakStmt": + return map[string]interface{}{ + "toString": proxyenv.ToString, + "getBreakStmt": proxyenv.GetBreakStmt, + } + case "ContinueStmt": + return map[string]interface{}{ + "toString": proxyenv.ToString, + "getContinueStmt": proxyenv.GetContinueStmt, + } + case "YieldStmt": + return map[string]interface{}{ + "toString": proxyenv.ToString, + "getYieldStmt": proxyenv.GetYieldStmt, + } + case "AssertStmt": + return map[string]interface{}{ + "toString": proxyenv.ToString, + "getAssertStmt": proxyenv.GetAssertStmt, + } + case "ReturnStmt": + return map[string]interface{}{ + "toString": proxyenv.ToString, + "getReturnStmt": proxyenv.GetReturnStmt, + } + case "BlockStmt": + return map[string]interface{}{ + "toString": proxyenv.ToString, + "getBlockStmt": proxyenv.GetBlockStmt, + } + default: + // Fallback for unknown types + return map[string]interface{}{ + "getName": proxyenv.GetName, + "toString": proxyenv.ToString, + } + } +} + func QueryEntities(graph *CodeGraph, query parser.Query) (nodes [][]*Node, output [][]interface{}) { result := make([][]*Node, 0) @@ -210,6 +443,7 @@ func generateOutput(nodeSet [][]*Node, query parser.Query) [][]interface{} { func evaluateExpression(node []*Node, expression string, query parser.Query) (interface{}, error) { env := generateProxyEnvForSet(node, query) + defer returnEnvMapToPool(env) // Return to pool when done program, err := expr.Compile(expression, expr.Env(env)) if err != nil { @@ -635,6 +869,7 @@ func FilterEntities(node []*Node, query parser.Query) bool { } env := generateProxyEnvForSet(node, query) + defer returnEnvMapToPool(env) // Return to pool when done expression = ReplacePredicateVariables(query) @@ -660,7 +895,7 @@ type classInstance struct { } func generateProxyEnvForSet(nodeSet []*Node, query parser.Query) map[string]interface{} { - env := make(map[string]interface{}) + env := getEnvMapFromPool() for i, entity := range query.SelectList { // Check if entity is a class type @@ -668,9 +903,10 @@ func generateProxyEnvForSet(nodeSet []*Node, query parser.Query) map[string]inte if classDecl != nil { env[entity.Alias] = createClassInstance(classDecl) } else { - // Handle existing node types - proxyEnv := generateProxyEnv(nodeSet[i], query) - env[entity.Alias] = proxyEnv[entity.Alias] + // OPTIMIZED: Only build the specific entity type map needed + // instead of creating all 28 entity type maps + proxyEnvWrapper := &Env{Node: nodeSet[i]} + env[entity.Alias] = buildEntityEnv(proxyEnvWrapper, entity.Entity, entity.Alias) } } return env diff --git a/sourcecode-parser/graph/types.go b/sourcecode-parser/graph/types.go index 39d0e606..99fac96d 100644 --- a/sourcecode-parser/graph/types.go +++ b/sourcecode-parser/graph/types.go @@ -2,13 +2,21 @@ package graph import "github.com/shivasurya/code-pathfinder/sourcecode-parser/model" +// SourceLocation stores the file location of a code snippet for lazy loading. +type SourceLocation struct { + File string + StartByte uint32 + EndByte uint32 +} + // Node represents a node in the code graph with various properties // describing code elements like classes, methods, variables, etc. type Node struct { ID string Type string Name string - CodeSnippet string + CodeSnippet string // DEPRECATED: Will be removed, use GetCodeSnippet() instead + SourceLocation *SourceLocation LineNumber uint32 OutgoingEdges []*Edge IsExternal bool @@ -44,6 +52,26 @@ type Node struct { BlockStmt *model.BlockStmt } +// GetCodeSnippet returns the code snippet for this node. +// If SourceLocation is set, it reads from the file (lazy loading). +// Otherwise, it returns the deprecated CodeSnippet field for backward compatibility. +func (n *Node) GetCodeSnippet() string { + // If we have a source location, read from file (lazy load) + if n.SourceLocation != nil { + content, err := readFile(n.SourceLocation.File) + if err != nil { + // Fallback to CodeSnippet if file read fails + return n.CodeSnippet + } + // Extract the specific range + if n.SourceLocation.EndByte <= uint32(len(content)) { + return string(content[n.SourceLocation.StartByte:n.SourceLocation.EndByte]) + } + } + // Fallback to deprecated CodeSnippet field + return n.CodeSnippet +} + // Edge represents a directed edge between two nodes in the code graph. type Edge struct { From *Node diff --git a/test_memory.csv b/test_memory.csv new file mode 100644 index 00000000..2cbab90c --- /dev/null +++ b/test_memory.csv @@ -0,0 +1 @@ +timestamp,rss_mb,vsz_mb