From e3a70b60c4a8995c66cfd1f55cbd29fb1ed44171 Mon Sep 17 00:00:00 2001
From: shivasurya <s.shivasurya@gmail.com>
Date: Thu, 23 Oct 2025 23:21:18 -0400
Subject: [PATCH] added performance measure tools and optimized memory usage

---
 class_memory.csv                             |   1 +
 perf_tools/.gitignore                        |  25 ++
 perf_tools/README.md                         | 226 +++++++++++++++++
 perf_tools/fast_monitor.sh                   |  16 ++
 sourcecode-parser/cmd/query.go               |   4 +-
 sourcecode-parser/graph/parser_java.go       |  16 +-
 sourcecode-parser/graph/parser_python.go     |  54 +++-
 sourcecode-parser/graph/parser_statements.go |  20 +-
 sourcecode-parser/graph/query.go             | 244 ++++++++++++++++++-
 sourcecode-parser/graph/types.go             |  30 ++-
 test_memory.csv                              |   1 +
 11 files changed, 603 insertions(+), 34 deletions(-)
 create mode 100644 class_memory.csv
 create mode 100644 perf_tools/.gitignore
 create mode 100644 perf_tools/README.md
 create mode 100755 perf_tools/fast_monitor.sh
 create mode 100644 test_memory.csv

diff --git a/class_memory.csv b/class_memory.csv
new file mode 100644
index 00000000..2cbab90c
--- /dev/null
+++ b/class_memory.csv
@@ -0,0 +1 @@
+timestamp,rss_mb,vsz_mb
diff --git a/perf_tools/.gitignore b/perf_tools/.gitignore
new file mode 100644
index 00000000..0c48a39a
--- /dev/null
+++ b/perf_tools/.gitignore
@@ -0,0 +1,25 @@
+# Ignore all benchmark output files
+*.csv
+*.png
+*.log
+
+# Ignore specific benchmark runs
+benchmark.*
+*_benchmark.*
+*_memory.*
+class_*
+function_*
+test_*
+
+# Allow example files if we add them later
+!example_output.png
+!example_data.csv
+
+# Python cache
+__pycache__/
+*.py[cod]
+*$py.class
+
+# OS specific
+.DS_Store
+Thumbs.db
diff --git a/perf_tools/README.md b/perf_tools/README.md
new file mode 100644
index 00000000..36fa52d3
--- /dev/null
+++ b/perf_tools/README.md
@@ -0,0 +1,226 @@
+# Performance Profiling Tools for Code-Pathfinder
+
+A collection of tools to measure and visualize memory usage and performance of code-pathfinder queries.
+
+## Quick Start
+
+### 1. Basic Usage (Easiest)
+
+```bash
+cd perf_tools
+./benchmark.sh
+```
+
+This runs a default benchmark on the SaltStack codebase with a function definition query.
+
+### 2. Custom Query
+
+```bash
+./benchmark.sh -q "FROM class_definition AS cd SELECT cd" -o class_results
+```
+
+### 3. Different Project
+
+```bash
+./benchmark.sh -p /path/to/your/project -q "FROM function_definition AS fd SELECT fd"
+```
+
+## Command Line Options
+
+```
+Usage: ./benchmark.sh [options]
+
+Options:
+  -p, --project DIR     Project directory to analyze (default: ~/src/shivasurya/salt)
+  -q, --query QUERY     Query to run (default: 'FROM function_definition AS fd SELECT fd')
+  -o, --output NAME     Output file prefix (default: 'benchmark')
+  -b, --binary PATH     Path to pathfinder binary (default: ../sourcecode-parser/build/go/pathfinder)
+  -h, --help            Show this help message
+```
+
+## Examples
+
+### Compare Class vs Function Queries
+
+```bash
+# Run class definition benchmark
+./benchmark.sh -q "FROM class_definition AS cd SELECT cd" -o class_benchmark
+
+# Run function definition benchmark
+./benchmark.sh -q "FROM function_definition AS fd SELECT fd" -o function_benchmark
+
+# Compare the PNG graphs!
+open class_benchmark.png function_benchmark.png
+```
+
+### Test Different Codebases
+
+```bash
+# Test on your own project
+./benchmark.sh -p ~/myproject -o myproject_benchmark
+
+# Test on multiple projects
+for proj in project1 project2 project3; do
+    ./benchmark.sh -p ~/repos/$proj -o ${proj}_benchmark
+done
+```
+
+## Output Files
+
+Each benchmark run creates 3 files:
+
+1. **`{name}.csv`** - Raw memory usage data (timestamp, RSS, VSZ)
+2. **`{name}.png`** - Memory usage graph with timeline
+3. **`{name}.log`** - Query execution log
+
+Example:
+```
+benchmark.csv  - Memory data points
+benchmark.png  - Visual graph
+benchmark.log  - Execution log
+```
+
+## Understanding the Results
+
+### Memory Metrics
+
+- **RSS (Resident Set Size)**: Actual physical memory used (most important)
+- **VSZ (Virtual Memory Size)**: Total virtual memory allocated
+
+### Graph Interpretation
+
+```
+Memory Usage Over Time
+│
+│   Peak: 2943.6 MB
+│   Avg: 2813.4 MB
+│
+│ 3000 MB ├─────────────────────── Flat line (good!)
+│         │       ╱────────────────
+│ 2000 MB │      ╱
+│         │     ╱  Parsing phase
+│ 1000 MB │    ╱
+│         │   ╱
+│    0 MB └──────────────────────►
+          0s  20s  40s  60s  80s
+```
+
+**Good patterns:**
+- ✅ Rapid rise then flat = efficient memory use
+- ✅ Stable plateau = no memory leaks
+
+**Bad patterns:**
+- ❌ Continuous rise = possible memory leak
+- ❌ Spikes during query = inefficient allocations
+
+## Requirements
+
+### Required
+- Bash shell
+- Built pathfinder binary (run `cd ../sourcecode-parser && gradle buildGo`)
+
+### Optional
+- Python 3 with matplotlib and pandas for graph generation
+  ```bash
+  pip3 install matplotlib pandas
+  ```
+
+## Manual Mode (Advanced)
+
+If you want more control, use the individual scripts:
+
+### 1. Run Query with Monitoring
+
+```bash
+# Terminal 1: Start query
+../sourcecode-parser/build/go/pathfinder query --project ~/salt --query "..." &
+PID=$!
+
+# Terminal 2: Monitor memory
+./fast_monitor.sh $PID memory_data.csv
+```
+
+### 2. Generate Graph
+
+```bash
+python3 plot_memory.py memory_data.csv
+# Creates: memory_data.png
+```
+
+## Scripts Overview
+
+| Script | Purpose |
+|--------|---------|
+| `benchmark.sh` | **Main tool** - Easy-to-use wrapper |
+| `fast_monitor.sh` | Monitors process memory (100ms sampling) |
+| `monitor_memory.sh` | Slower monitoring (500ms sampling) |
+| `plot_memory.py` | Generates memory usage graphs |
+
+## Comparing Optimizations
+
+To measure the impact of performance optimizations:
+
+```bash
+# Before optimization
+git checkout main
+cd sourcecode-parser && gradle clean buildGo && cd ../perf_tools
+./benchmark.sh -o before_optimization
+
+# After optimization
+git checkout feature-branch
+cd sourcecode-parser && gradle clean buildGo && cd ../perf_tools
+./benchmark.sh -o after_optimization
+
+# Compare results
+echo "Before: $(grep 'Peak RSS' before_optimization.csv | tail -1)"
+echo "After:  $(grep 'Peak RSS' after_optimization.csv | tail -1)"
+```
+
+## Troubleshooting
+
+### "Pathfinder binary not found"
+
+Build the binary first:
+```bash
+cd ../sourcecode-parser
+gradle clean buildGo
+cd ../perf_tools
+```
+
+### "Python3 not found"
+
+The CSV data is still generated. You can:
+1. Install Python: `brew install python3`
+2. Use the CSV data with your own tools
+3. Run without graphs (CSV has all the data)
+
+### "Project directory not found"
+
+Specify the correct path:
+```bash
+./benchmark.sh -p /absolute/path/to/your/project
+```
+
+## Tips
+
+1. **Run multiple times**: Results can vary due to system load. Run 3 times and compare.
+
+2. **Close other apps**: For accurate results, close memory-heavy applications.
+
+3. **Use full paths**: When in doubt, use absolute paths for `-p` and `-b` options.
+
+4. **Compare similar queries**: Compare "class vs class" or "function vs function" for fair comparisons.
+
+## Contributing
+
+Found a bug or want to improve these tools? The scripts are simple bash/Python:
+
+- `benchmark.sh` - Main orchestration script
+- `fast_monitor.sh` - Memory sampling loop
+- `plot_memory.py` - matplotlib graphing
+
+Feel free to modify and improve!
+
+## License
+
+Same as code-pathfinder project (AGPL-3.0).
diff --git a/perf_tools/fast_monitor.sh b/perf_tools/fast_monitor.sh
new file mode 100755
index 00000000..e5e6a2ad
--- /dev/null
+++ b/perf_tools/fast_monitor.sh
@@ -0,0 +1,16 @@
+#!/bin/bash
+PID=$1
+OUTPUT=${2:-memory_usage.csv}
+
+echo "timestamp,rss_mb,vsz_mb" > $OUTPUT
+
+while kill -0 $PID 2>/dev/null; do
+    TIMESTAMP=$(date +%s.%N)
+    MEM=$(ps -p $PID -o rss=,vsz= 2>/dev/null | awk '{print $1/1024","$2/1024}')
+    if [ ! -z "$MEM" ]; then
+        echo "$TIMESTAMP,$MEM" >> $OUTPUT
+    fi
+    sleep 0.1  # Sample every 100ms instead of 500ms
+done
+
+echo "Memory monitoring complete. Data saved to $OUTPUT"
diff --git a/sourcecode-parser/cmd/query.go b/sourcecode-parser/cmd/query.go
index 6f91895c..ec8693f7 100644
--- a/sourcecode-parser/cmd/query.go
+++ b/sourcecode-parser/cmd/query.go
@@ -139,7 +139,7 @@ func processQuery(input string, codeGraph *graph.CodeGraph, output string) (stri
 				result := make(map[string]interface{})
 				result["file"] = entityObject.File
 				result["line"] = entityObject.LineNumber
-				result["code"] = entityObject.CodeSnippet
+				result["code"] = entityObject.GetCodeSnippet()
 
 				results["result_set"] = append(results["result_set"].([]map[string]interface{}), result) //nolint:all
 			}
@@ -167,7 +167,7 @@ func processQuery(input string, codeGraph *graph.CodeGraph, output string) (stri
 			header += output + "\n"
 			result += header
 			result += "\n"
-			codeSnippetArray := strings.Split(entityObject.CodeSnippet, "\n")
+			codeSnippetArray := strings.Split(entityObject.GetCodeSnippet(), "\n")
 			for i := 0; i < len(codeSnippetArray); i++ {
 				lineNumber := color.New(color.FgCyan).SprintfFunc()("%4d", int(entityObject.LineNumber)+i)
 				result += fmt.Sprintf("%s%s %s %s\n", strings.Repeat("\t", 2), lineNumber, verticalLine, yellowCode(codeSnippetArray[i]))
diff --git a/sourcecode-parser/graph/parser_java.go b/sourcecode-parser/graph/parser_java.go
index b8994be5..1e265f16 100644
--- a/sourcecode-parser/graph/parser_java.go
+++ b/sourcecode-parser/graph/parser_java.go
@@ -61,7 +61,7 @@ func parseJavaBinaryExpression(node *sitter.Node, sourceCode []byte, graph *Code
 		ID:               GenerateSha256(exprType + node.Content(sourceCode)),
 		Type:             exprType,
 		Name:             node.Content(sourceCode),
-		CodeSnippet:      node.Content(sourceCode),
+		SourceLocation: &SourceLocation{File: file, StartByte: node.StartByte(), EndByte: node.EndByte()},
 		LineNumber:       node.StartPoint().Row + 1,
 		File:             file,
 		isJavaSourceFile: isJavaSourceFile,
@@ -73,7 +73,7 @@ func parseJavaBinaryExpression(node *sitter.Node, sourceCode []byte, graph *Code
 		ID:               GenerateSha256("binary_expression" + node.Content(sourceCode)),
 		Type:             "binary_expression",
 		Name:             node.Content(sourceCode),
-		CodeSnippet:      node.Content(sourceCode),
+		SourceLocation: &SourceLocation{File: file, StartByte: node.StartByte(), EndByte: node.EndByte()},
 		LineNumber:       node.StartPoint().Row + 1,
 		File:             file,
 		isJavaSourceFile: isJavaSourceFile,
@@ -138,7 +138,7 @@ func parseJavaMethodDeclaration(node *sitter.Node, sourceCode []byte, graph *Cod
 		ID:                   methodID,
 		Type:                 "method_declaration",
 		Name:                 methodName,
-		CodeSnippet:          node.Content(sourceCode),
+		SourceLocation: &SourceLocation{File: file, StartByte: node.StartByte(), EndByte: node.EndByte()},
 		LineNumber:           node.StartPoint().Row + 1,
 		Modifier:             extractVisibilityModifier(modifiers),
 		ReturnType:           returnType,
@@ -183,7 +183,7 @@ func parseJavaMethodInvocation(node *sitter.Node, sourceCode []byte, graph *Code
 		Type:                 "method_invocation",
 		Name:                 methodName,
 		IsExternal:           true,
-		CodeSnippet:          node.Content(sourceCode),
+		SourceLocation: &SourceLocation{File: file, StartByte: node.StartByte(), EndByte: node.EndByte()},
 		LineNumber:           node.StartPoint().Row + 1,
 		MethodArgumentsValue: arguments,
 		File:                 file,
@@ -241,7 +241,7 @@ func parseJavaClassDeclaration(node *sitter.Node, sourceCode []byte, graph *Code
 		ID:               GenerateMethodID("class:"+className, []string{}, file),
 		Type:             "class_declaration",
 		Name:             className,
-		CodeSnippet:      node.Content(sourceCode),
+		SourceLocation: &SourceLocation{File: file, StartByte: node.StartByte(), EndByte: node.EndByte()},
 		LineNumber:       node.StartPoint().Row + 1,
 		PackageName:      packageName,
 		Modifier:         extractVisibilityModifier(accessModifier),
@@ -264,7 +264,7 @@ func parseJavaBlockComment(node *sitter.Node, sourceCode []byte, graph *CodeGrap
 		commentNode := &Node{
 			ID:               GenerateMethodID(node.Content(sourceCode), []string{}, file),
 			Type:             "block_comment",
-			CodeSnippet:      commentContent,
+			SourceLocation:   &SourceLocation{File: file, StartByte: node.StartByte(), EndByte: node.EndByte()},
 			LineNumber:       node.StartPoint().Row + 1,
 			File:             file,
 			isJavaSourceFile: true,
@@ -316,7 +316,7 @@ func parseJavaVariableDeclaration(node *sitter.Node, sourceCode []byte, graph *C
 		ID:               GenerateMethodID(variableName, []string{}, file),
 		Type:             "variable_declaration",
 		Name:             variableName,
-		CodeSnippet:      node.Content(sourceCode),
+		SourceLocation: &SourceLocation{File: file, StartByte: node.StartByte(), EndByte: node.EndByte()},
 		LineNumber:       node.StartPoint().Row + 1,
 		Modifier:         extractVisibilityModifier(variableModifier),
 		DataType:         variableType,
@@ -364,7 +364,7 @@ func parseJavaObjectCreation(node *sitter.Node, sourceCode []byte, graph *CodeGr
 		ID:                GenerateMethodID(className, []string{strconv.Itoa(int(node.StartPoint().Row + 1))}, file),
 		Type:              "ClassInstanceExpr",
 		Name:              className,
-		CodeSnippet:       node.Content(sourceCode),
+		SourceLocation: &SourceLocation{File: file, StartByte: node.StartByte(), EndByte: node.EndByte()},
 		LineNumber:        node.StartPoint().Row + 1,
 		File:              file,
 		isJavaSourceFile:  true,
diff --git a/sourcecode-parser/graph/parser_python.go b/sourcecode-parser/graph/parser_python.go
index 6f87906a..b7d8744f 100644
--- a/sourcecode-parser/graph/parser_python.go
+++ b/sourcecode-parser/graph/parser_python.go
@@ -33,7 +33,11 @@ func parsePythonFunctionDefinition(node *sitter.Node, sourceCode []byte, graph *
 		ID:                   methodID,
 		Type:                 "function_definition",
 		Name:                 functionName,
-		CodeSnippet:          node.Content(sourceCode),
+		SourceLocation:       &SourceLocation{
+			File:      file,
+			StartByte: node.StartByte(),
+			EndByte:   node.EndByte(),
+		},
 		LineNumber:           node.StartPoint().Row + 1,
 		MethodArgumentsValue: parameters,
 		File:                 file,
@@ -68,7 +72,11 @@ func parsePythonClassDefinition(node *sitter.Node, sourceCode []byte, graph *Cod
 		ID:                 GenerateMethodID("class:"+className, []string{}, file),
 		Type:               "class_definition",
 		Name:               className,
-		CodeSnippet:        node.Content(sourceCode),
+		SourceLocation:     &SourceLocation{
+			File:      file,
+			StartByte: node.StartByte(),
+			EndByte:   node.EndByte(),
+		},
 		LineNumber:         node.StartPoint().Row + 1,
 		Interface:          superClasses,
 		File:               file,
@@ -102,7 +110,11 @@ func parsePythonCall(node *sitter.Node, sourceCode []byte, graph *CodeGraph, cur
 		Type:                 "call",
 		Name:                 callName,
 		IsExternal:           true,
-		CodeSnippet:          node.Content(sourceCode),
+		SourceLocation:       &SourceLocation{
+			File:      file,
+			StartByte: node.StartByte(),
+			EndByte:   node.EndByte(),
+		},
 		LineNumber:           node.StartPoint().Row + 1,
 		MethodArgumentsValue: arguments,
 		File:                 file,
@@ -124,7 +136,11 @@ func parsePythonReturnStatement(node *sitter.Node, sourceCode []byte, graph *Cod
 		LineNumber:         node.StartPoint().Row + 1,
 		Name:               "ReturnStmt",
 		IsExternal:         true,
-		CodeSnippet:        node.Content(sourceCode),
+		SourceLocation:     &SourceLocation{
+			File:      file,
+			StartByte: node.StartByte(),
+			EndByte:   node.EndByte(),
+		},
 		File:               file,
 		isPythonSourceFile: true,
 		ReturnStmt:         returnNode,
@@ -142,7 +158,11 @@ func parsePythonBreakStatement(node *sitter.Node, sourceCode []byte, graph *Code
 		LineNumber:         node.StartPoint().Row + 1,
 		Name:               "BreakStmt",
 		IsExternal:         true,
-		CodeSnippet:        node.Content(sourceCode),
+		SourceLocation:     &SourceLocation{
+			File:      file,
+			StartByte: node.StartByte(),
+			EndByte:   node.EndByte(),
+		},
 		File:               file,
 		isPythonSourceFile: true,
 		BreakStmt:          breakNode,
@@ -160,7 +180,11 @@ func parsePythonContinueStatement(node *sitter.Node, sourceCode []byte, graph *C
 		LineNumber:         node.StartPoint().Row + 1,
 		Name:               "ContinueStmt",
 		IsExternal:         true,
-		CodeSnippet:        node.Content(sourceCode),
+		SourceLocation:     &SourceLocation{
+			File:      file,
+			StartByte: node.StartByte(),
+			EndByte:   node.EndByte(),
+		},
 		File:               file,
 		isPythonSourceFile: true,
 		ContinueStmt:       continueNode,
@@ -178,7 +202,11 @@ func parsePythonAssertStatement(node *sitter.Node, sourceCode []byte, graph *Cod
 		LineNumber:         node.StartPoint().Row + 1,
 		Name:               "AssertStmt",
 		IsExternal:         true,
-		CodeSnippet:        node.Content(sourceCode),
+		SourceLocation:     &SourceLocation{
+			File:      file,
+			StartByte: node.StartByte(),
+			EndByte:   node.EndByte(),
+		},
 		File:               file,
 		isPythonSourceFile: true,
 		AssertStmt:         assertNode,
@@ -200,7 +228,11 @@ func parsePythonYieldExpression(node *sitter.Node, sourceCode []byte, graph *Cod
 				LineNumber:         child.StartPoint().Row + 1,
 				Name:               "YieldStmt",
 				IsExternal:         true,
-				CodeSnippet:        child.Content(sourceCode),
+				SourceLocation:     &SourceLocation{
+					File:      file,
+					StartByte: child.StartByte(),
+					EndByte:   child.EndByte(),
+				},
 				File:               file,
 				isPythonSourceFile: true,
 				YieldStmt:          yieldNode,
@@ -231,7 +263,11 @@ func parsePythonAssignment(node *sitter.Node, sourceCode []byte, graph *CodeGrap
 		ID:                 GenerateMethodID(variableName, []string{}, file),
 		Type:               "variable_assignment",
 		Name:               variableName,
-		CodeSnippet:        node.Content(sourceCode),
+		SourceLocation:     &SourceLocation{
+			File:      file,
+			StartByte: node.StartByte(),
+			EndByte:   node.EndByte(),
+		},
 		LineNumber:         node.StartPoint().Row + 1,
 		VariableValue:      variableValue,
 		Scope:              "local",
diff --git a/sourcecode-parser/graph/parser_statements.go b/sourcecode-parser/graph/parser_statements.go
index 467a14d5..305616ce 100644
--- a/sourcecode-parser/graph/parser_statements.go
+++ b/sourcecode-parser/graph/parser_statements.go
@@ -18,7 +18,7 @@ func parseBlockStatement(node *sitter.Node, sourceCode []byte, graph *CodeGraph,
 		LineNumber:       node.StartPoint().Row + 1,
 		Name:             "BlockStmt",
 		IsExternal:       true,
-		CodeSnippet:      node.Content(sourceCode),
+		SourceLocation: &SourceLocation{File: file, StartByte: node.StartByte(), EndByte: node.EndByte()},
 		File:             file,
 		isJavaSourceFile: isJavaSourceFile,
 		BlockStmt:        blockNode,
@@ -39,7 +39,7 @@ func parseReturnStatement(node *sitter.Node, sourceCode []byte, graph *CodeGraph
 			LineNumber:       node.StartPoint().Row + 1,
 			Name:             "ReturnStmt",
 			IsExternal:       true,
-			CodeSnippet:      node.Content(sourceCode),
+			SourceLocation: &SourceLocation{File: file, StartByte: node.StartByte(), EndByte: node.EndByte()},
 			File:             file,
 			isJavaSourceFile: isJava,
 			ReturnStmt:       returnNode,
@@ -61,7 +61,7 @@ func parseBreakStatement(node *sitter.Node, sourceCode []byte, graph *CodeGraph,
 			LineNumber:       node.StartPoint().Row + 1,
 			Name:             "BreakStmt",
 			IsExternal:       true,
-			CodeSnippet:      node.Content(sourceCode),
+			SourceLocation: &SourceLocation{File: file, StartByte: node.StartByte(), EndByte: node.EndByte()},
 			File:             file,
 			isJavaSourceFile: isJava,
 			BreakStmt:        breakNode,
@@ -83,7 +83,7 @@ func parseContinueStatement(node *sitter.Node, sourceCode []byte, graph *CodeGra
 			LineNumber:       node.StartPoint().Row + 1,
 			Name:             "ContinueStmt",
 			IsExternal:       true,
-			CodeSnippet:      node.Content(sourceCode),
+			SourceLocation: &SourceLocation{File: file, StartByte: node.StartByte(), EndByte: node.EndByte()},
 			File:             file,
 			isJavaSourceFile: isJava,
 			ContinueStmt:     continueNode,
@@ -105,7 +105,7 @@ func parseAssertStatement(node *sitter.Node, sourceCode []byte, graph *CodeGraph
 			LineNumber:       node.StartPoint().Row + 1,
 			Name:             "AssertStmt",
 			IsExternal:       true,
-			CodeSnippet:      node.Content(sourceCode),
+			SourceLocation: &SourceLocation{File: file, StartByte: node.StartByte(), EndByte: node.EndByte()},
 			File:             file,
 			isJavaSourceFile: isJava,
 			AssertStmt:       assertNode,
@@ -124,7 +124,7 @@ func parseYieldStatement(node *sitter.Node, sourceCode []byte, graph *CodeGraph,
 		LineNumber:       node.StartPoint().Row + 1,
 		Name:             "YieldStmt",
 		IsExternal:       true,
-		CodeSnippet:      node.Content(sourceCode),
+		SourceLocation: &SourceLocation{File: file, StartByte: node.StartByte(), EndByte: node.EndByte()},
 		File:             file,
 		isJavaSourceFile: isJavaSourceFile,
 		YieldStmt:        yieldNode,
@@ -154,7 +154,7 @@ func parseIfStatement(node *sitter.Node, sourceCode []byte, graph *CodeGraph, fi
 		Type:             "IfStmt",
 		Name:             "IfStmt",
 		IsExternal:       true,
-		CodeSnippet:      node.Content(sourceCode),
+		SourceLocation: &SourceLocation{File: file, StartByte: node.StartByte(), EndByte: node.EndByte()},
 		LineNumber:       node.StartPoint().Row + 1,
 		File:             file,
 		isJavaSourceFile: isJavaSourceFile,
@@ -176,7 +176,7 @@ func parseWhileStatement(node *sitter.Node, sourceCode []byte, graph *CodeGraph,
 		Type:             "WhileStmt",
 		Name:             "WhileStmt",
 		IsExternal:       true,
-		CodeSnippet:      node.Content(sourceCode),
+		SourceLocation: &SourceLocation{File: file, StartByte: node.StartByte(), EndByte: node.EndByte()},
 		LineNumber:       node.StartPoint().Row + 1,
 		File:             file,
 		isJavaSourceFile: isJavaSourceFile,
@@ -198,7 +198,7 @@ func parseDoStatement(node *sitter.Node, sourceCode []byte, graph *CodeGraph, fi
 		Type:             "DoStmt",
 		Name:             "DoStmt",
 		IsExternal:       true,
-		CodeSnippet:      node.Content(sourceCode),
+		SourceLocation: &SourceLocation{File: file, StartByte: node.StartByte(), EndByte: node.EndByte()},
 		LineNumber:       node.StartPoint().Row + 1,
 		File:             file,
 		isJavaSourceFile: isJavaSourceFile,
@@ -229,7 +229,7 @@ func parseForStatement(node *sitter.Node, sourceCode []byte, graph *CodeGraph, f
 		Type:             "ForStmt",
 		Name:             "ForStmt",
 		IsExternal:       true,
-		CodeSnippet:      node.Content(sourceCode),
+		SourceLocation: &SourceLocation{File: file, StartByte: node.StartByte(), EndByte: node.EndByte()},
 		LineNumber:       node.StartPoint().Row + 1,
 		File:             file,
 		isJavaSourceFile: isJavaSourceFile,
diff --git a/sourcecode-parser/graph/query.go b/sourcecode-parser/graph/query.go
index fb98d5d7..63a5cf3d 100644
--- a/sourcecode-parser/graph/query.go
+++ b/sourcecode-parser/graph/query.go
@@ -4,6 +4,7 @@ import (
 	"fmt"
 	"log"
 	"strings"
+	"sync"
 
 	"github.com/expr-lang/expr"
 	"github.com/shivasurya/code-pathfinder/sourcecode-parser/analytics"
@@ -159,6 +160,238 @@ func (env *Env) GetBlockStmt() *model.BlockStmt {
 	return env.Node.BlockStmt
 }
 
+// Pool for small environment maps to reduce allocations.
+var envMapPool = sync.Pool{
+	New: func() interface{} {
+		return make(map[string]interface{}, 10)
+	},
+}
+
+// getEnvMapFromPool gets a map from the pool and clears it.
+func getEnvMapFromPool() map[string]interface{} {
+	m := envMapPool.Get().(map[string]interface{})
+	// Clear the map
+	for k := range m {
+		delete(m, k)
+	}
+	return m
+}
+
+// returnEnvMapToPool returns a map to the pool.
+func returnEnvMapToPool(m map[string]interface{}) {
+	if m != nil && len(m) < 100 { // Only pool reasonably-sized maps
+		envMapPool.Put(m)
+	}
+}
+
+// buildEntityEnv creates the environment map for a specific entity type only.
+// This avoids creating all 28 entity type maps when only 1 is needed.
+func buildEntityEnv(proxyenv *Env, entityType string, _ string) map[string]interface{} {
+	switch entityType {
+	case "method_declaration":
+		return map[string]interface{}{
+			"getVisibility":   proxyenv.GetVisibility,
+			"getAnnotation":   proxyenv.GetAnnotations,
+			"getReturnType":   proxyenv.GetReturnType,
+			"getName":         proxyenv.GetName,
+			"getArgumentType": proxyenv.GetArgumentTypes,
+			"getArgumentName": proxyenv.GetArgumentNames,
+			"getThrowsType":   proxyenv.GetThrowsTypes,
+			"getDoc":          proxyenv.GetDoc,
+			"toString":        proxyenv.ToString,
+		}
+	case "class_declaration":
+		return map[string]interface{}{
+			"getSuperClass": proxyenv.GetSuperClass,
+			"getName":       proxyenv.GetName,
+			"getAnnotation": proxyenv.GetAnnotations,
+			"getVisibility": proxyenv.GetVisibility,
+			"getInterface":  proxyenv.GetInterfaces,
+			"getDoc":        proxyenv.GetDoc,
+			"toString":      proxyenv.ToString,
+		}
+	case "method_invocation":
+		return map[string]interface{}{
+			"getArgumentName": proxyenv.GetArgumentNames,
+			"getName":         proxyenv.GetName,
+			"getDoc":          proxyenv.GetDoc,
+			"toString":        proxyenv.ToString,
+		}
+	case "variable_declaration":
+		return map[string]interface{}{
+			"getName":             proxyenv.GetName,
+			"getVisibility":       proxyenv.GetVisibility,
+			"getVariableValue":    proxyenv.GetVariableValue,
+			"getVariableDataType": proxyenv.GetVariableDataType,
+			"getScope":            proxyenv.GetScope,
+			"getDoc":              proxyenv.GetDoc,
+			"toString":            proxyenv.ToString,
+		}
+	case "binary_expression":
+		return map[string]interface{}{
+			"getLeftOperand":  proxyenv.GetLeftOperand,
+			"getRightOperand": proxyenv.GetRightOperand,
+			"toString":        proxyenv.ToString,
+		}
+	case "function_definition":
+		return map[string]interface{}{
+			"getName":         proxyenv.GetName,
+			"getArgumentName": proxyenv.GetArgumentNames,
+			"toString":        proxyenv.ToString,
+		}
+	case "class_definition":
+		return map[string]interface{}{
+			"getName":      proxyenv.GetName,
+			"getInterface": proxyenv.GetInterfaces,
+			"toString":     proxyenv.ToString,
+		}
+	case "add_expression", "sub_expression", "mul_expression", "div_expression":
+		op := "+"
+		switch entityType {
+		case "sub_expression":
+			op = "-"
+		case "mul_expression":
+			op = "*"
+		case "div_expression":
+			op = "/"
+		}
+		return map[string]interface{}{
+			"getBinaryExpr": proxyenv.GetBinaryExpr,
+			"getOperator":   op,
+			"toString":      proxyenv.ToString,
+		}
+	case "comparison_expression", "equal_expression":
+		return map[string]interface{}{
+			"getBinaryExpr": proxyenv.GetBinaryExpr,
+			"getOperator":   "==",
+			"toString":      proxyenv.ToString,
+		}
+	case "not_equal_expression":
+		return map[string]interface{}{
+			"getBinaryExpr": proxyenv.GetBinaryExpr,
+			"getOperator":   "!=",
+			"toString":      proxyenv.ToString,
+		}
+	case "remainder_expression":
+		return map[string]interface{}{
+			"getBinaryExpr": proxyenv.GetBinaryExpr,
+			"getOperator":   "%",
+			"toString":      proxyenv.ToString,
+		}
+	case "right_shift_expression":
+		return map[string]interface{}{
+			"getBinaryExpr": proxyenv.GetBinaryExpr,
+			"getOperator":   ">>",
+			"toString":      proxyenv.ToString,
+		}
+	case "left_shift_expression":
+		return map[string]interface{}{
+			"getBinaryExpr": proxyenv.GetBinaryExpr,
+			"getOperator":   "<<",
+			"toString":      proxyenv.ToString,
+		}
+	case "and_bitwise_expression":
+		return map[string]interface{}{
+			"getBinaryExpr": proxyenv.GetBinaryExpr,
+			"getOperator":   "&",
+			"toString":      proxyenv.ToString,
+		}
+	case "and_logical_expression":
+		return map[string]interface{}{
+			"getBinaryExpr": proxyenv.GetBinaryExpr,
+			"getOperator":   "&&",
+			"toString":      proxyenv.ToString,
+		}
+	case "or_logical_expression":
+		return map[string]interface{}{
+			"getBinaryExpr": proxyenv.GetBinaryExpr,
+			"getOperator":   "||",
+			"toString":      proxyenv.ToString,
+		}
+	case "or_bitwise_expression":
+		return map[string]interface{}{
+			"getBinaryExpr": proxyenv.GetBinaryExpr,
+			"getOperator":   "|",
+			"toString":      proxyenv.ToString,
+		}
+	case "unsigned_right_shift_expression":
+		return map[string]interface{}{
+			"getBinaryExpr": proxyenv.GetBinaryExpr,
+			"getOperator":   ">>>",
+			"toString":      proxyenv.ToString,
+		}
+	case "xor_bitwise_expression":
+		return map[string]interface{}{
+			"getBinaryExpr": proxyenv.GetBinaryExpr,
+			"getOperator":   "^",
+			"toString":      proxyenv.ToString,
+		}
+	case "ClassInstanceExpr":
+		return map[string]interface{}{
+			"getName":              proxyenv.GetName,
+			"getDoc":               proxyenv.GetDoc,
+			"toString":             proxyenv.ToString,
+			"getClassInstanceExpr": proxyenv.GetClassInstanceExpr,
+		}
+	case "IfStmt":
+		return map[string]interface{}{
+			"getIfStmt": proxyenv.GetIfStmt,
+			"toString":  proxyenv.ToString,
+		}
+	case "WhileStmt":
+		return map[string]interface{}{
+			"getWhileStmt": proxyenv.GetWhileStmt,
+			"toString":     proxyenv.ToString,
+		}
+	case "DoStmt":
+		return map[string]interface{}{
+			"getDoStmt": proxyenv.GetDoStmt,
+			"toString":  proxyenv.ToString,
+		}
+	case "ForStmt":
+		return map[string]interface{}{
+			"getForStmt": proxyenv.GetForStmt,
+			"toString":   proxyenv.ToString,
+		}
+	case "BreakStmt":
+		return map[string]interface{}{
+			"toString":     proxyenv.ToString,
+			"getBreakStmt": proxyenv.GetBreakStmt,
+		}
+	case "ContinueStmt":
+		return map[string]interface{}{
+			"toString":        proxyenv.ToString,
+			"getContinueStmt": proxyenv.GetContinueStmt,
+		}
+	case "YieldStmt":
+		return map[string]interface{}{
+			"toString":     proxyenv.ToString,
+			"getYieldStmt": proxyenv.GetYieldStmt,
+		}
+	case "AssertStmt":
+		return map[string]interface{}{
+			"toString":      proxyenv.ToString,
+			"getAssertStmt": proxyenv.GetAssertStmt,
+		}
+	case "ReturnStmt":
+		return map[string]interface{}{
+			"toString":      proxyenv.ToString,
+			"getReturnStmt": proxyenv.GetReturnStmt,
+		}
+	case "BlockStmt":
+		return map[string]interface{}{
+			"toString":     proxyenv.ToString,
+			"getBlockStmt": proxyenv.GetBlockStmt,
+		}
+	default:
+		// Fallback for unknown types
+		return map[string]interface{}{
+			"getName":  proxyenv.GetName,
+			"toString": proxyenv.ToString,
+		}
+	}
+}
+
 func QueryEntities(graph *CodeGraph, query parser.Query) (nodes [][]*Node, output [][]interface{}) {
 	result := make([][]*Node, 0)
 
@@ -210,6 +443,7 @@ func generateOutput(nodeSet [][]*Node, query parser.Query) [][]interface{} {
 
 func evaluateExpression(node []*Node, expression string, query parser.Query) (interface{}, error) {
 	env := generateProxyEnvForSet(node, query)
+	defer returnEnvMapToPool(env) // Return to pool when done
 
 	program, err := expr.Compile(expression, expr.Env(env))
 	if err != nil {
@@ -635,6 +869,7 @@ func FilterEntities(node []*Node, query parser.Query) bool {
 	}
 
 	env := generateProxyEnvForSet(node, query)
+	defer returnEnvMapToPool(env) // Return to pool when done
 
 	expression = ReplacePredicateVariables(query)
 
@@ -660,7 +895,7 @@ type classInstance struct {
 }
 
 func generateProxyEnvForSet(nodeSet []*Node, query parser.Query) map[string]interface{} {
-	env := make(map[string]interface{})
+	env := getEnvMapFromPool()
 
 	for i, entity := range query.SelectList {
 		// Check if entity is a class type
@@ -668,9 +903,10 @@ func generateProxyEnvForSet(nodeSet []*Node, query parser.Query) map[string]inte
 		if classDecl != nil {
 			env[entity.Alias] = createClassInstance(classDecl)
 		} else {
-			// Handle existing node types
-			proxyEnv := generateProxyEnv(nodeSet[i], query)
-			env[entity.Alias] = proxyEnv[entity.Alias]
+			// OPTIMIZED: Only build the specific entity type map needed
+			// instead of creating all 28 entity type maps
+			proxyEnvWrapper := &Env{Node: nodeSet[i]}
+			env[entity.Alias] = buildEntityEnv(proxyEnvWrapper, entity.Entity, entity.Alias)
 		}
 	}
 	return env
diff --git a/sourcecode-parser/graph/types.go b/sourcecode-parser/graph/types.go
index 39d0e606..99fac96d 100644
--- a/sourcecode-parser/graph/types.go
+++ b/sourcecode-parser/graph/types.go
@@ -2,13 +2,21 @@ package graph
 
 import "github.com/shivasurya/code-pathfinder/sourcecode-parser/model"
 
+// SourceLocation stores the file location of a code snippet for lazy loading.
+type SourceLocation struct {
+	File      string
+	StartByte uint32
+	EndByte   uint32
+}
+
 // Node represents a node in the code graph with various properties
 // describing code elements like classes, methods, variables, etc.
 type Node struct {
 	ID                   string
 	Type                 string
 	Name                 string
-	CodeSnippet          string
+	CodeSnippet          string // DEPRECATED: Will be removed, use GetCodeSnippet() instead
+	SourceLocation       *SourceLocation
 	LineNumber           uint32
 	OutgoingEdges        []*Edge
 	IsExternal           bool
@@ -44,6 +52,26 @@ type Node struct {
 	BlockStmt            *model.BlockStmt
 }
 
+// GetCodeSnippet returns the code snippet for this node.
+// If SourceLocation is set, it reads from the file (lazy loading).
+// Otherwise, it returns the deprecated CodeSnippet field for backward compatibility.
+func (n *Node) GetCodeSnippet() string {
+	// If we have a source location, read from file (lazy load)
+	if n.SourceLocation != nil {
+		content, err := readFile(n.SourceLocation.File)
+		if err != nil {
+			// Fallback to CodeSnippet if file read fails
+			return n.CodeSnippet
+		}
+		// Extract the specific range
+		if n.SourceLocation.EndByte <= uint32(len(content)) {
+			return string(content[n.SourceLocation.StartByte:n.SourceLocation.EndByte])
+		}
+	}
+	// Fallback to deprecated CodeSnippet field
+	return n.CodeSnippet
+}
+
 // Edge represents a directed edge between two nodes in the code graph.
 type Edge struct {
 	From *Node
diff --git a/test_memory.csv b/test_memory.csv
new file mode 100644
index 00000000..2cbab90c
--- /dev/null
+++ b/test_memory.csv
@@ -0,0 +1 @@
+timestamp,rss_mb,vsz_mb