diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 8b1e62a1..44a3a787 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -19,6 +19,19 @@ jobs: steps: - name: Checkout your repository using git uses: actions/checkout@v4 + + # Generate stdlib registry for Python 3.14 + - name: Setup Python 3.14 + uses: actions/setup-python@v5 + with: + python-version: '3.14' + + - name: Generate stdlib registry for Python 3.14 + run: | + cd sourcecode-parser/tools + python generate_stdlib_registry.py --all \ + --output-dir ../../docs/public/assets/registries/python3.14/stdlib/v1/ + - name: Create .env file env: POSTHOG_WEB_ANALYTICS: ${{ secrets.POSTHOG_WEB_ANALYTICS }} diff --git a/sourcecode-parser/.gitignore b/sourcecode-parser/.gitignore index 564db6b6..64a4906d 100644 --- a/sourcecode-parser/.gitignore +++ b/sourcecode-parser/.gitignore @@ -14,4 +14,5 @@ build/ .gradle/ # Generated registries (local testing only) +# Note: Production registries are in docs/public/assets/registries/ (committed for CDN hosting) registries/ diff --git a/sourcecode-parser/graph/callgraph/builder.go b/sourcecode-parser/graph/callgraph/builder.go index ca83f73b..ad374e11 100644 --- a/sourcecode-parser/graph/callgraph/builder.go +++ b/sourcecode-parser/graph/callgraph/builder.go @@ -146,17 +146,34 @@ func BuildCallGraph(codeGraph *graph.CodeGraph, registry *ModuleRegistry, projec // Phase 3 Task 12: Initialize attribute registry for tracking class attributes typeEngine.Attributes = NewAttributeRegistry() - // PR #2: Load stdlib registry from local directory - stdlibLoader := &StdlibRegistryLoader{ - RegistryPath: "registries/python3.14/stdlib/v1", - } - stdlibRegistry, err := stdlibLoader.LoadRegistry() + // PR #3: Detect Python version and load stdlib registry from remote CDN + pythonVersion := detectPythonVersion(projectRoot) + log.Printf("Detected Python version: %s", pythonVersion) + + // Create remote registry loader + remoteLoader := NewStdlibRegistryRemote( + "https://codepathfinder.dev/assets/registries", + pythonVersion, + ) + + // Load manifest from CDN + err := remoteLoader.LoadManifest() if err != nil { - log.Printf("Warning: Failed to load stdlib registry: %v", err) + log.Printf("Warning: Failed to load stdlib registry from CDN: %v", err) // Continue without stdlib resolution - not a fatal error } else { + // Create adapter to satisfy existing StdlibRegistry interface + stdlibRegistry := &StdlibRegistry{ + Modules: make(map[string]*StdlibModule), + Manifest: remoteLoader.Manifest, + } + + // The remote loader will lazy-load modules as needed + // We store a reference to it for on-demand loading typeEngine.StdlibRegistry = stdlibRegistry - log.Printf("Loaded stdlib registry: %d modules", stdlibRegistry.ModuleCount()) + typeEngine.StdlibRemote = remoteLoader + + log.Printf("Loaded stdlib manifest from CDN: %d modules available", remoteLoader.ModuleCount()) } // First, index all function definitions from the code graph @@ -710,9 +727,9 @@ func resolveCallTarget(target string, importMap *ImportMap, registry *ModuleRegi if ormFQN, resolved := ResolveORMCall(target, currentModule, registry, codeGraph); resolved { return ormFQN, true, nil } - // PR #2: Check stdlib registry before user project registry - if typeEngine != nil && typeEngine.StdlibRegistry != nil { - if validateStdlibFQN(fullFQN, typeEngine.StdlibRegistry) { + // PR #3: Check stdlib registry before user project registry + if typeEngine != nil && typeEngine.StdlibRemote != nil { + if validateStdlibFQN(fullFQN, typeEngine.StdlibRemote) { return fullFQN, true, nil } } @@ -735,10 +752,10 @@ func resolveCallTarget(target string, importMap *ImportMap, registry *ModuleRegi return ormFQN, true, nil } - // PR #2: Last resort - check if target is a stdlib call (e.g., os.path.join) + // PR #3: Last resort - check if target is a stdlib call (e.g., os.path.join) // This handles cases where stdlib modules are imported directly (import os.path) - if typeEngine != nil && typeEngine.StdlibRegistry != nil { - if validateStdlibFQN(target, typeEngine.StdlibRegistry) { + if typeEngine != nil && typeEngine.StdlibRemote != nil { + if validateStdlibFQN(target, typeEngine.StdlibRemote) { return target, true, nil } } @@ -758,6 +775,7 @@ var stdlibModuleAliases = map[string]string{ // validateStdlibFQN checks if a fully qualified name is a stdlib function. // Supports module.function, module.submodule.function, and module.Class patterns. // Handles platform-specific module aliases (e.g., os.path -> posixpath). +// Uses lazy loading via remote registry to download modules on-demand. // // Examples: // "os.getcwd" - returns true if os.getcwd exists in stdlib @@ -766,12 +784,12 @@ var stdlibModuleAliases = map[string]string{ // // Parameters: // - fqn: fully qualified name to check -// - stdlibRegistry: stdlib registry +// - remoteLoader: remote stdlib registry loader // // Returns: // - true if FQN is a stdlib function or class -func validateStdlibFQN(fqn string, stdlibRegistry *StdlibRegistry) bool { - if stdlibRegistry == nil { +func validateStdlibFQN(fqn string, remoteLoader *StdlibRegistryRemote) bool { + if remoteLoader == nil { return false } @@ -797,7 +815,12 @@ func validateStdlibFQN(fqn string, stdlibRegistry *StdlibRegistry) bool { moduleName = canonicalName } - module := stdlibRegistry.GetModule(moduleName) + // Lazy load module from remote registry + module, err := remoteLoader.GetModule(moduleName) + if err != nil { + log.Printf("Warning: Failed to load stdlib module %s: %v", moduleName, err) + continue + } if module == nil { continue } diff --git a/sourcecode-parser/graph/callgraph/builder_remote_test.go b/sourcecode-parser/graph/callgraph/builder_remote_test.go new file mode 100644 index 00000000..36f1e306 --- /dev/null +++ b/sourcecode-parser/graph/callgraph/builder_remote_test.go @@ -0,0 +1,308 @@ +package callgraph + +import ( + "encoding/json" + "net/http" + "net/http/httptest" + "os" + "path/filepath" + "testing" + + "github.com/shivasurya/code-pathfinder/sourcecode-parser/graph" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestBuildCallGraph_RemoteStdlibLoading(t *testing.T) { + // Create test manifest + manifest := Manifest{ + SchemaVersion: "1.0.0", + Modules: []*ModuleEntry{ + {Name: "os", File: "os.json", Checksum: "sha256:fb04c597a080bf9cba624b9e3d809bcd8339379368c2eeb3c8c04ae56f5d5ee1"}, + }, + } + + // Create test module + module := StdlibModule{ + Module: "os", + PythonVersion: "3.14", + Functions: map[string]*StdlibFunction{ + "getcwd": {ReturnType: "str"}, + }, + } + + // Create mock CDN server + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch r.URL.Path { + case "/python3.14/stdlib/v1/manifest.json": + manifestJSON, _ := json.Marshal(manifest) + w.Write(manifestJSON) + case "/python3.14/stdlib/v1/os.json": + moduleJSON, _ := json.Marshal(module) + w.Write(moduleJSON) + default: + w.WriteHeader(http.StatusNotFound) + } + })) + defer server.Close() + + // Create temporary project directory + tmpDir := t.TempDir() + + // Write .python-version file + versionFile := filepath.Join(tmpDir, ".python-version") + err := os.WriteFile(versionFile, []byte("3.14.0\n"), 0644) + require.NoError(t, err) + + // Create a simple code graph with a Python file + codeGraph := graph.NewCodeGraph() + registry := NewModuleRegistry() + + // Note: We can't fully test BuildCallGraph here because it needs a real code graph + // Instead, we test the individual components that BuildCallGraph uses + + // Test 1: Version detection + version := detectPythonVersion(tmpDir) + assert.Equal(t, "3.14", version) + + // Test 2: Remote loader initialization + remoteLoader := NewStdlibRegistryRemote(server.URL, version) + err = remoteLoader.LoadManifest() + require.NoError(t, err) + assert.Equal(t, 1, remoteLoader.ModuleCount()) + + // Test 3: Module lazy loading + osModule, err := remoteLoader.GetModule("os") + require.NoError(t, err) + assert.NotNil(t, osModule) + assert.Equal(t, "os", osModule.Module) + + // Test 4: Verify cache works + assert.Equal(t, 1, remoteLoader.CacheSize()) + + // Minimal call graph build to verify no compilation errors + _, err = BuildCallGraph(codeGraph, registry, tmpDir) + // We expect this to succeed even with empty graph + assert.NoError(t, err) +} + +func TestBuildCallGraph_RemoteStdlibFallback(t *testing.T) { + // Create a server that returns errors + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusServiceUnavailable) + })) + defer server.Close() + + tmpDir := t.TempDir() + codeGraph := graph.NewCodeGraph() + registry := NewModuleRegistry() + + // BuildCallGraph should succeed even if CDN is unavailable + // It should log a warning and continue without stdlib resolution + callGraph, err := BuildCallGraph(codeGraph, registry, tmpDir) + assert.NoError(t, err) + assert.NotNil(t, callGraph) +} + +func TestValidateStdlibFQN_WithRemoteLoader(t *testing.T) { + // Create test module + module := StdlibModule{ + Module: "os", + PythonVersion: "3.14", + Functions: map[string]*StdlibFunction{ + "getcwd": {ReturnType: "str"}, + }, + Classes: map[string]*StdlibClass{ + "DirEntry": {Type: "class"}, + }, + } + + // Calculate checksum + moduleJSON, _ := json.Marshal(module) + + // Create manifest with correct checksum + manifest := Manifest{ + SchemaVersion: "1.0.0", + Modules: []*ModuleEntry{ + {Name: "os", File: "os.json", Checksum: "sha256:4cfe6f2495a04780243e6c0c32720082a774cb2f99a4e5c68db2b8623ec11919"}, + }, + } + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch r.URL.Path { + case "/python3.14/stdlib/v1/manifest.json": + manifestJSON, _ := json.Marshal(manifest) + w.Write(manifestJSON) + case "/python3.14/stdlib/v1/os.json": + w.Write(moduleJSON) + } + })) + defer server.Close() + + remoteLoader := NewStdlibRegistryRemote(server.URL, "3.14") + err := remoteLoader.LoadManifest() + require.NoError(t, err) + + // Test function resolution + assert.True(t, validateStdlibFQN("os.getcwd", remoteLoader)) + + // Test class resolution + assert.True(t, validateStdlibFQN("os.DirEntry", remoteLoader)) + + // Test non-existent function + assert.False(t, validateStdlibFQN("os.nonexistent", remoteLoader)) + + // Test non-existent module + assert.False(t, validateStdlibFQN("fake.module", remoteLoader)) + + // Test nil loader + assert.False(t, validateStdlibFQN("os.getcwd", nil)) + + // Test invalid FQN (too short) + assert.False(t, validateStdlibFQN("os", remoteLoader)) +} + +func TestValidateStdlibFQN_ModuleAlias(t *testing.T) { + // Create posixpath module (alias for os.path on POSIX systems) + module := StdlibModule{ + Module: "posixpath", + PythonVersion: "3.14", + Functions: map[string]*StdlibFunction{ + "join": {ReturnType: "str"}, + }, + } + + moduleJSON, _ := json.Marshal(module) + + manifest := Manifest{ + SchemaVersion: "1.0.0", + Modules: []*ModuleEntry{ + {Name: "posixpath", File: "posixpath.json", Checksum: "sha256:b8fe94908624c2d0e9157477e50b916617202ccffbad4ec35f05b4ff0d16840c"}, + }, + } + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch r.URL.Path { + case "/python3.14/stdlib/v1/manifest.json": + manifestJSON, _ := json.Marshal(manifest) + w.Write(manifestJSON) + case "/python3.14/stdlib/v1/posixpath.json": + w.Write(moduleJSON) + } + })) + defer server.Close() + + remoteLoader := NewStdlibRegistryRemote(server.URL, "3.14") + err := remoteLoader.LoadManifest() + require.NoError(t, err) + + // Test that os.path.join is resolved to posixpath.join via alias + assert.True(t, validateStdlibFQN("os.path.join", remoteLoader)) +} + +func TestDetectPythonVersion_Integration(t *testing.T) { + tests := []struct { + name string + setup func(dir string) + expected string + }{ + { + name: "from .python-version file", + setup: func(dir string) { + os.WriteFile(filepath.Join(dir, ".python-version"), []byte("3.11.5"), 0644) + }, + expected: "3.11", + }, + { + name: "from pyproject.toml requires-python", + setup: func(dir string) { + content := `[project] +requires-python = ">=3.10" +` + os.WriteFile(filepath.Join(dir, "pyproject.toml"), []byte(content), 0644) + }, + expected: "3.10", + }, + { + name: "from pyproject.toml poetry", + setup: func(dir string) { + content := `[tool.poetry.dependencies] +python = "^3.9" +` + os.WriteFile(filepath.Join(dir, "pyproject.toml"), []byte(content), 0644) + }, + expected: "3.9", + }, + { + name: "default version", + setup: func(dir string) {}, + expected: "3.14", + }, + { + name: "priority: .python-version over pyproject.toml", + setup: func(dir string) { + os.WriteFile(filepath.Join(dir, ".python-version"), []byte("3.12"), 0644) + content := `[project] +requires-python = ">=3.8" +` + os.WriteFile(filepath.Join(dir, "pyproject.toml"), []byte(content), 0644) + }, + expected: "3.12", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tmpDir := t.TempDir() + tt.setup(tmpDir) + version := detectPythonVersion(tmpDir) + assert.Equal(t, tt.expected, version) + }) + } +} + +func TestRemoteLoader_CachingInBuildCallGraph(t *testing.T) { + downloadCount := 0 + + // Create test module + module := StdlibModule{ + Module: "os", + PythonVersion: "3.14", + Functions: map[string]*StdlibFunction{ + "getcwd": {ReturnType: "str"}, + }, + } + moduleJSON, _ := json.Marshal(module) + + manifest := Manifest{ + SchemaVersion: "1.0.0", + Modules: []*ModuleEntry{ + {Name: "os", File: "os.json", Checksum: "sha256:fb04c597a080bf9cba624b9e3d809bcd8339379368c2eeb3c8c04ae56f5d5ee1"}, + }, + } + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch r.URL.Path { + case "/python3.14/stdlib/v1/manifest.json": + manifestJSON, _ := json.Marshal(manifest) + w.Write(manifestJSON) + case "/python3.14/stdlib/v1/os.json": + downloadCount++ + w.Write(moduleJSON) + } + })) + defer server.Close() + + remoteLoader := NewStdlibRegistryRemote(server.URL, "3.14") + err := remoteLoader.LoadManifest() + require.NoError(t, err) + + // Call validateStdlibFQN multiple times + validateStdlibFQN("os.getcwd", remoteLoader) + validateStdlibFQN("os.getcwd", remoteLoader) + validateStdlibFQN("os.getcwd", remoteLoader) + + // Module should only be downloaded once + assert.Equal(t, 1, downloadCount, "Module should be cached after first download") + assert.Equal(t, 1, remoteLoader.CacheSize()) +} diff --git a/sourcecode-parser/graph/callgraph/python_version_detector.go b/sourcecode-parser/graph/callgraph/python_version_detector.go new file mode 100644 index 00000000..baf29587 --- /dev/null +++ b/sourcecode-parser/graph/callgraph/python_version_detector.go @@ -0,0 +1,119 @@ +package callgraph + +import ( + "bufio" + "os" + "path/filepath" + "regexp" + "strings" +) + +// detectPythonVersion infers Python version from project files. +// It checks in order: +// 1. .python-version file +// 2. pyproject.toml [tool.poetry.dependencies] or [project] requires-python +// 3. Defaults to "3.14" +// +// Parameters: +// - projectPath: absolute path to the project root +// +// Returns: +// - Python version string (e.g., "3.14", "3.11", "3.9") +func detectPythonVersion(projectPath string) string { + // 1. Check .python-version file + if version := readPythonVersionFile(projectPath); version != "" { + return version + } + + // 2. Check pyproject.toml + if version := parsePyprojectToml(projectPath); version != "" { + return version + } + + // 3. Default to 3.14 + return "3.14" +} + +// readPythonVersionFile reads version from .python-version file. +// Format: "3.14.0" or "3.14" (we extract major.minor) +// +// Parameters: +// - projectPath: absolute path to the project root +// +// Returns: +// - Python version string (e.g., "3.14"), or empty string if not found +func readPythonVersionFile(projectPath string) string { + versionFile := filepath.Join(projectPath, ".python-version") + data, err := os.ReadFile(versionFile) + if err != nil { + return "" + } + + version := strings.TrimSpace(string(data)) + return extractMajorMinor(version) +} + +// parsePyprojectToml extracts Python version from pyproject.toml. +// Supports: +// - [project] requires-python = ">=3.11" +// - [tool.poetry.dependencies] python = "^3.11" +// +// Parameters: +// - projectPath: absolute path to the project root +// +// Returns: +// - Python version string (e.g., "3.11"), or empty string if not found +func parsePyprojectToml(projectPath string) string { + tomlFile := filepath.Join(projectPath, "pyproject.toml") + file, err := os.Open(tomlFile) + if err != nil { + return "" + } + defer file.Close() + + // Patterns to match: + // requires-python = ">=3.11" + // python = "^3.11" + // python = "~3.11" + requiresPythonRe := regexp.MustCompile(`requires-python\s*=\s*"[><=~^]*(\d+\.\d+)`) + poetryPythonRe := regexp.MustCompile(`python\s*=\s*"[\^~>=<]*(\d+\.\d+)`) + + scanner := bufio.NewScanner(file) + for scanner.Scan() { + line := scanner.Text() + + // Check requires-python pattern + if matches := requiresPythonRe.FindStringSubmatch(line); len(matches) > 1 { + return matches[1] + } + + // Check poetry python pattern + if matches := poetryPythonRe.FindStringSubmatch(line); len(matches) > 1 { + return matches[1] + } + } + + return "" +} + +// extractMajorMinor extracts major.minor version from full version string. +// Examples: +// - "3.14.0" -> "3.14" +// - "3.11" -> "3.11" +// - "3" -> "3" +// +// Parameters: +// - version: full version string +// +// Returns: +// - major.minor version string, or original if no dots found +func extractMajorMinor(version string) string { + parts := strings.Split(version, ".") + if len(parts) >= 2 { + return parts[0] + "." + parts[1] + } + if len(parts) == 1 { + return parts[0] + } + return "" +} diff --git a/sourcecode-parser/graph/callgraph/python_version_detector_test.go b/sourcecode-parser/graph/callgraph/python_version_detector_test.go new file mode 100644 index 00000000..c448c211 --- /dev/null +++ b/sourcecode-parser/graph/callgraph/python_version_detector_test.go @@ -0,0 +1,320 @@ +package callgraph + +import ( + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestDetectPythonVersion_PythonVersionFile(t *testing.T) { + // Create temporary directory + tmpDir := t.TempDir() + + // Write .python-version file + versionFile := filepath.Join(tmpDir, ".python-version") + err := os.WriteFile(versionFile, []byte("3.11.5\n"), 0644) + require.NoError(t, err) + + version := detectPythonVersion(tmpDir) + assert.Equal(t, "3.11", version) +} + +func TestDetectPythonVersion_PyprojectToml_RequiresPython(t *testing.T) { + tmpDir := t.TempDir() + + // Write pyproject.toml with requires-python + pyprojectContent := `[project] +name = "test-project" +requires-python = ">=3.10" +` + pyprojectFile := filepath.Join(tmpDir, "pyproject.toml") + err := os.WriteFile(pyprojectFile, []byte(pyprojectContent), 0644) + require.NoError(t, err) + + version := detectPythonVersion(tmpDir) + assert.Equal(t, "3.10", version) +} + +func TestDetectPythonVersion_PyprojectToml_Poetry(t *testing.T) { + tmpDir := t.TempDir() + + // Write pyproject.toml with poetry dependencies + pyprojectContent := `[tool.poetry] +name = "test-project" + +[tool.poetry.dependencies] +python = "^3.12" +` + pyprojectFile := filepath.Join(tmpDir, "pyproject.toml") + err := os.WriteFile(pyprojectFile, []byte(pyprojectContent), 0644) + require.NoError(t, err) + + version := detectPythonVersion(tmpDir) + assert.Equal(t, "3.12", version) +} + +func TestDetectPythonVersion_Default(t *testing.T) { + tmpDir := t.TempDir() + + // No version files - should default to 3.14 + version := detectPythonVersion(tmpDir) + assert.Equal(t, "3.14", version) +} + +func TestDetectPythonVersion_PriorityOrder(t *testing.T) { + tmpDir := t.TempDir() + + // Create both .python-version and pyproject.toml + // .python-version should take priority + versionFile := filepath.Join(tmpDir, ".python-version") + err := os.WriteFile(versionFile, []byte("3.9.0"), 0644) + require.NoError(t, err) + + pyprojectFile := filepath.Join(tmpDir, "pyproject.toml") + pyprojectContent := `[project] +requires-python = ">=3.11" +` + err = os.WriteFile(pyprojectFile, []byte(pyprojectContent), 0644) + require.NoError(t, err) + + version := detectPythonVersion(tmpDir) + assert.Equal(t, "3.9", version, ".python-version should take priority over pyproject.toml") +} + +func TestReadPythonVersionFile_Success(t *testing.T) { + tests := []struct { + name string + content string + expected string + }{ + { + name: "full version", + content: "3.14.0", + expected: "3.14", + }, + { + name: "major.minor only", + content: "3.11", + expected: "3.11", + }, + { + name: "with newline", + content: "3.12.1\n", + expected: "3.12", + }, + { + name: "with spaces", + content: " 3.10.5 ", + expected: "3.10", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tmpDir := t.TempDir() + versionFile := filepath.Join(tmpDir, ".python-version") + err := os.WriteFile(versionFile, []byte(tt.content), 0644) + require.NoError(t, err) + + version := readPythonVersionFile(tmpDir) + assert.Equal(t, tt.expected, version) + }) + } +} + +func TestReadPythonVersionFile_NotFound(t *testing.T) { + tmpDir := t.TempDir() + version := readPythonVersionFile(tmpDir) + assert.Equal(t, "", version) +} + +func TestParsePyprojectToml_RequiresPython(t *testing.T) { + tests := []struct { + name string + content string + expected string + }{ + { + name: "requires-python >=", + content: `[project] +requires-python = ">=3.11" +`, + expected: "3.11", + }, + { + name: "requires-python ==", + content: `[project] +requires-python = "==3.10" +`, + expected: "3.10", + }, + { + name: "requires-python ~=", + content: `[project] +requires-python = "~=3.9" +`, + expected: "3.9", + }, + { + name: "requires-python with spaces", + content: `[project] +requires-python = ">=3.8" +`, + expected: "3.8", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tmpDir := t.TempDir() + pyprojectFile := filepath.Join(tmpDir, "pyproject.toml") + err := os.WriteFile(pyprojectFile, []byte(tt.content), 0644) + require.NoError(t, err) + + version := parsePyprojectToml(tmpDir) + assert.Equal(t, tt.expected, version) + }) + } +} + +func TestParsePyprojectToml_Poetry(t *testing.T) { + tests := []struct { + name string + content string + expected string + }{ + { + name: "poetry ^", + content: `[tool.poetry.dependencies] +python = "^3.12" +`, + expected: "3.12", + }, + { + name: "poetry ~", + content: `[tool.poetry.dependencies] +python = "~3.11" +`, + expected: "3.11", + }, + { + name: "poetry >=", + content: `[tool.poetry.dependencies] +python = ">=3.10" +`, + expected: "3.10", + }, + { + name: "poetry with spaces", + content: `[tool.poetry.dependencies] +python = "^3.9" +`, + expected: "3.9", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tmpDir := t.TempDir() + pyprojectFile := filepath.Join(tmpDir, "pyproject.toml") + err := os.WriteFile(pyprojectFile, []byte(tt.content), 0644) + require.NoError(t, err) + + version := parsePyprojectToml(tmpDir) + assert.Equal(t, tt.expected, version) + }) + } +} + +func TestParsePyprojectToml_NotFound(t *testing.T) { + tmpDir := t.TempDir() + version := parsePyprojectToml(tmpDir) + assert.Equal(t, "", version) +} + +func TestParsePyprojectToml_NoVersionInfo(t *testing.T) { + tmpDir := t.TempDir() + pyprojectFile := filepath.Join(tmpDir, "pyproject.toml") + content := `[project] +name = "test-project" +description = "A test project" +` + err := os.WriteFile(pyprojectFile, []byte(content), 0644) + require.NoError(t, err) + + version := parsePyprojectToml(tmpDir) + assert.Equal(t, "", version) +} + +func TestExtractMajorMinor(t *testing.T) { + tests := []struct { + name string + version string + expected string + }{ + { + name: "full version", + version: "3.14.0", + expected: "3.14", + }, + { + name: "major.minor only", + version: "3.11", + expected: "3.11", + }, + { + name: "major only", + version: "3", + expected: "3", + }, + { + name: "empty string", + version: "", + expected: "", + }, + { + name: "with patch and build", + version: "3.12.5.final.0", + expected: "3.12", + }, + { + name: "single digit", + version: "3.9", + expected: "3.9", + }, + { + name: "double digit minor", + version: "3.10.1", + expected: "3.10", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := extractMajorMinor(tt.version) + assert.Equal(t, tt.expected, result) + }) + } +} + +func TestParsePyprojectToml_ScannerEdgeCases(t *testing.T) { + tmpDir := t.TempDir() + + // Test with file that has matching line but scanner continues + pyprojectFile := filepath.Join(tmpDir, "pyproject.toml") + content := `[project] +name = "test" +# Some comment +requires-python = ">=3.8" +# More content after match +dependencies = ["requests"] +` + err := os.WriteFile(pyprojectFile, []byte(content), 0644) + require.NoError(t, err) + + version := parsePyprojectToml(tmpDir) + assert.Equal(t, "3.8", version) +} diff --git a/sourcecode-parser/graph/callgraph/stdlib_registry_remote.go b/sourcecode-parser/graph/callgraph/stdlib_registry_remote.go new file mode 100644 index 00000000..85c04adf --- /dev/null +++ b/sourcecode-parser/graph/callgraph/stdlib_registry_remote.go @@ -0,0 +1,279 @@ +package callgraph + +import ( + "context" + "crypto/sha256" + "encoding/hex" + "encoding/json" + "fmt" + "io" + "log" + "net/http" + "strings" + "sync" + "time" +) + +// StdlibRegistryRemote loads Python stdlib registries from a remote CDN. +// It uses lazy loading (downloads modules on-demand) and in-memory caching. +type StdlibRegistryRemote struct { + BaseURL string // CDN base URL (e.g., "https://codepathfinder.dev/registries") + PythonVersion string // Python version (e.g., "3.14") + Manifest *Manifest // Loaded manifest + ModuleCache map[string]*StdlibModule // In-memory cache of loaded modules + CacheMutex sync.RWMutex // Mutex for thread-safe cache access + HTTPClient *http.Client // HTTP client for downloads +} + +// NewStdlibRegistryRemote creates a new remote registry loader. +// +// Parameters: +// - baseURL: CDN base URL (e.g., "https://codepathfinder.dev/registries") +// - pythonVersion: Python version (e.g., "3.14") +// +// Returns: +// - Initialized StdlibRegistryRemote +func NewStdlibRegistryRemote(baseURL, pythonVersion string) *StdlibRegistryRemote { + return &StdlibRegistryRemote{ + BaseURL: strings.TrimSuffix(baseURL, "/"), + PythonVersion: pythonVersion, + ModuleCache: make(map[string]*StdlibModule), + HTTPClient: &http.Client{ + Timeout: 30 * time.Second, + }, + } +} + +// LoadManifest downloads and parses the manifest.json file from the CDN. +// +// Returns: +// - error if download or parsing fails +func (r *StdlibRegistryRemote) LoadManifest() error { + manifestURL := fmt.Sprintf("%s/python%s/stdlib/v1/manifest.json", + r.BaseURL, r.PythonVersion) + + log.Printf("Downloading manifest from: %s", manifestURL) + + req, err := http.NewRequestWithContext(context.Background(), http.MethodGet, manifestURL, nil) + if err != nil { + return fmt.Errorf("failed to create manifest request: %w", err) + } + + resp, err := r.HTTPClient.Do(req) + if err != nil { + return fmt.Errorf("failed to download manifest: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return fmt.Errorf("manifest download failed with status: %d", resp.StatusCode) + } + + data, err := io.ReadAll(resp.Body) + if err != nil { + return fmt.Errorf("failed to read manifest body: %w", err) + } + + var manifest Manifest + if err := json.Unmarshal(data, &manifest); err != nil { + return fmt.Errorf("failed to parse manifest JSON: %w", err) + } + + r.Manifest = &manifest + log.Printf("Loaded manifest: %d modules", len(manifest.Modules)) + + return nil +} + +// GetModule retrieves a module by name, downloading it if not cached. +// This implements lazy loading - modules are only downloaded when needed. +// +// Parameters: +// - moduleName: name of the module (e.g., "os", "sys") +// +// Returns: +// - StdlibModule if found, nil otherwise +// - error if download or parsing fails +func (r *StdlibRegistryRemote) GetModule(moduleName string) (*StdlibModule, error) { + // Check cache first (read lock) + r.CacheMutex.RLock() + if module, ok := r.ModuleCache[moduleName]; ok { + r.CacheMutex.RUnlock() + return module, nil + } + r.CacheMutex.RUnlock() + + // Find module entry in manifest + if r.Manifest == nil { + return nil, fmt.Errorf("manifest not loaded") + } + + var moduleEntry *ModuleEntry + for _, entry := range r.Manifest.Modules { + if entry.Name == moduleName { + moduleEntry = entry + break + } + } + + if moduleEntry == nil { + return nil, nil //nolint:nilnil // nil module is valid when not found + } + + // Download module (write lock) + r.CacheMutex.Lock() + defer r.CacheMutex.Unlock() + + // Double-check cache (another goroutine might have loaded it) + if module, ok := r.ModuleCache[moduleName]; ok { + return module, nil + } + + // Download module file + moduleURL := fmt.Sprintf("%s/python%s/stdlib/v1/%s", + r.BaseURL, r.PythonVersion, moduleEntry.File) + + log.Printf("Downloading module: %s from %s", moduleName, moduleURL) + + req, err := http.NewRequestWithContext(context.Background(), http.MethodGet, moduleURL, nil) + if err != nil { + return nil, fmt.Errorf("failed to create module request: %w", err) + } + + resp, err := r.HTTPClient.Do(req) + if err != nil { + return nil, fmt.Errorf("failed to download module %s: %w", moduleName, err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("module download failed with status: %d", resp.StatusCode) + } + + data, err := io.ReadAll(resp.Body) + if err != nil { + return nil, fmt.Errorf("failed to read module body: %w", err) + } + + // Verify checksum + if !r.verifyChecksum(data, moduleEntry.Checksum) { + return nil, fmt.Errorf("checksum mismatch for module %s", moduleName) + } + + // Parse module JSON + var module StdlibModule + if err := json.Unmarshal(data, &module); err != nil { + return nil, fmt.Errorf("failed to parse module JSON: %w", err) + } + + // Cache the module + r.ModuleCache[moduleName] = &module + + return &module, nil +} + +// verifyChecksum validates the SHA256 checksum of downloaded data. +// +// Parameters: +// - data: downloaded file content +// - expectedChecksum: expected checksum in format "sha256:hex" +// +// Returns: +// - true if checksum matches, false otherwise +func (r *StdlibRegistryRemote) verifyChecksum(data []byte, expectedChecksum string) bool { + hash := sha256.Sum256(data) + actualChecksum := "sha256:" + hex.EncodeToString(hash[:]) + return actualChecksum == expectedChecksum +} + +// HasModule checks if a module exists in the manifest (without downloading it). +// +// Parameters: +// - moduleName: name of the module +// +// Returns: +// - true if module exists in manifest, false otherwise +func (r *StdlibRegistryRemote) HasModule(moduleName string) bool { + if r.Manifest == nil { + return false + } + + for _, entry := range r.Manifest.Modules { + if entry.Name == moduleName { + return true + } + } + + return false +} + +// GetFunction retrieves a function from a module, downloading the module if needed. +// +// Parameters: +// - moduleName: name of the module (e.g., "os") +// - functionName: name of the function (e.g., "getcwd") +// +// Returns: +// - StdlibFunction if found, nil otherwise +func (r *StdlibRegistryRemote) GetFunction(moduleName, functionName string) *StdlibFunction { + module, err := r.GetModule(moduleName) + if err != nil { + log.Printf("Warning: failed to get module %s: %v", moduleName, err) + return nil + } + if module == nil { + return nil + } + + return module.Functions[functionName] +} + +// GetClass retrieves a class from a module, downloading the module if needed. +// +// Parameters: +// - moduleName: name of the module +// - className: name of the class +// +// Returns: +// - StdlibClass if found, nil otherwise +func (r *StdlibRegistryRemote) GetClass(moduleName, className string) *StdlibClass { + module, err := r.GetModule(moduleName) + if err != nil { + log.Printf("Warning: failed to get module %s: %v", moduleName, err) + return nil + } + if module == nil { + return nil + } + + return module.Classes[className] +} + +// ModuleCount returns the number of modules in the manifest. +// +// Returns: +// - Number of modules, or 0 if manifest not loaded +func (r *StdlibRegistryRemote) ModuleCount() int { + if r.Manifest == nil { + return 0 + } + return len(r.Manifest.Modules) +} + +// CacheSize returns the number of modules currently cached in memory. +// +// Returns: +// - Number of cached modules +func (r *StdlibRegistryRemote) CacheSize() int { + r.CacheMutex.RLock() + defer r.CacheMutex.RUnlock() + return len(r.ModuleCache) +} + +// ClearCache clears the in-memory module cache. +// Useful for testing or memory management. +func (r *StdlibRegistryRemote) ClearCache() { + r.CacheMutex.Lock() + defer r.CacheMutex.Unlock() + r.ModuleCache = make(map[string]*StdlibModule) +} diff --git a/sourcecode-parser/graph/callgraph/stdlib_registry_remote_test.go b/sourcecode-parser/graph/callgraph/stdlib_registry_remote_test.go new file mode 100644 index 00000000..f89ce76c --- /dev/null +++ b/sourcecode-parser/graph/callgraph/stdlib_registry_remote_test.go @@ -0,0 +1,588 @@ +package callgraph + +import ( + "encoding/json" + "net/http" + "net/http/httptest" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestNewStdlibRegistryRemote(t *testing.T) { + remote := NewStdlibRegistryRemote("https://example.com/registries", "3.14") + + assert.Equal(t, "https://example.com/registries", remote.BaseURL) + assert.Equal(t, "3.14", remote.PythonVersion) + assert.NotNil(t, remote.ModuleCache) + assert.NotNil(t, remote.HTTPClient) + assert.Equal(t, 30*time.Second, remote.HTTPClient.Timeout) +} + +func TestNewStdlibRegistryRemote_TrimsSuffix(t *testing.T) { + remote := NewStdlibRegistryRemote("https://example.com/registries/", "3.14") + assert.Equal(t, "https://example.com/registries", remote.BaseURL) +} + +func TestStdlibRegistryRemote_LoadManifest_Success(t *testing.T) { + // Create test manifest + manifest := Manifest{ + SchemaVersion: "1.0.0", + RegistryVersion: "1.0.0", + Modules: []*ModuleEntry{ + {Name: "os", File: "os.json", Checksum: "sha256:abc123"}, + {Name: "sys", File: "sys.json", Checksum: "sha256:def456"}, + }, + } + manifestJSON, _ := json.Marshal(manifest) + + // Create test server + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + assert.Equal(t, "/python3.14/stdlib/v1/manifest.json", r.URL.Path) + w.WriteHeader(http.StatusOK) + w.Write(manifestJSON) + })) + defer server.Close() + + // Test + remote := NewStdlibRegistryRemote(server.URL, "3.14") + err := remote.LoadManifest() + + require.NoError(t, err) + assert.NotNil(t, remote.Manifest) + assert.Equal(t, "1.0.0", remote.Manifest.SchemaVersion) + assert.Len(t, remote.Manifest.Modules, 2) +} + +func TestStdlibRegistryRemote_LoadManifest_HTTPError(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusNotFound) + })) + defer server.Close() + + remote := NewStdlibRegistryRemote(server.URL, "3.14") + err := remote.LoadManifest() + + assert.Error(t, err) + assert.Contains(t, err.Error(), "manifest download failed with status: 404") +} + +func TestStdlibRegistryRemote_LoadManifest_InvalidJSON(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + w.Write([]byte("invalid json")) + })) + defer server.Close() + + remote := NewStdlibRegistryRemote(server.URL, "3.14") + err := remote.LoadManifest() + + assert.Error(t, err) + assert.Contains(t, err.Error(), "failed to parse manifest JSON") +} + +func TestStdlibRegistryRemote_GetModule_Success(t *testing.T) { + // Create test module + module := StdlibModule{ + Module: "os", + PythonVersion: "3.14", + Functions: map[string]*StdlibFunction{ + "getcwd": {ReturnType: "str"}, + }, + } + moduleJSON, _ := json.Marshal(module) + + // Create test server + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path == "/python3.14/stdlib/v1/manifest.json" { + manifest := Manifest{ + SchemaVersion: "1.0.0", + Modules: []*ModuleEntry{ + {Name: "os", File: "os.json", Checksum: "sha256:fb04c597a080bf9cba624b9e3d809bcd8339379368c2eeb3c8c04ae56f5d5ee1"}, + }, + } + manifestJSON, _ := json.Marshal(manifest) + w.Write(manifestJSON) + } else if r.URL.Path == "/python3.14/stdlib/v1/os.json" { + w.Write(moduleJSON) + } + })) + defer server.Close() + + // Test + remote := NewStdlibRegistryRemote(server.URL, "3.14") + err := remote.LoadManifest() + require.NoError(t, err) + + loadedModule, err := remote.GetModule("os") + require.NoError(t, err) + assert.NotNil(t, loadedModule) + assert.Equal(t, "os", loadedModule.Module) + assert.Equal(t, "3.14", loadedModule.PythonVersion) + assert.Len(t, loadedModule.Functions, 1) +} + +func TestStdlibRegistryRemote_GetModule_Caching(t *testing.T) { + downloadCount := 0 + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path == "/python3.14/stdlib/v1/manifest.json" { + manifest := Manifest{ + SchemaVersion: "1.0.0", + Modules: []*ModuleEntry{ + {Name: "os", File: "os.json", Checksum: "sha256:809e7ae20b2cc78116920277412fc74e7669752fc3f807a7eeef91b36188d34f"}, + }, + } + manifestJSON, _ := json.Marshal(manifest) + w.Write(manifestJSON) + } else if r.URL.Path == "/python3.14/stdlib/v1/os.json" { + downloadCount++ + module := StdlibModule{Module: "os", PythonVersion: "3.14"} + moduleJSON, _ := json.Marshal(module) + w.Write(moduleJSON) + } + })) + defer server.Close() + + remote := NewStdlibRegistryRemote(server.URL, "3.14") + err := remote.LoadManifest() + require.NoError(t, err) + + // First call - should download + module1, err := remote.GetModule("os") + require.NoError(t, err) + assert.NotNil(t, module1) + assert.Equal(t, 1, downloadCount) + + // Second call - should use cache + module2, err := remote.GetModule("os") + require.NoError(t, err) + assert.NotNil(t, module2) + assert.Equal(t, 1, downloadCount, "Should not download again") + + // Verify cache size + assert.Equal(t, 1, remote.CacheSize()) +} + +func TestStdlibRegistryRemote_GetModule_NotFound(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + manifest := Manifest{ + SchemaVersion: "1.0.0", + Modules: []*ModuleEntry{ + {Name: "os", File: "os.json", Checksum: "sha256:abc"}, + }, + } + manifestJSON, _ := json.Marshal(manifest) + w.Write(manifestJSON) + })) + defer server.Close() + + remote := NewStdlibRegistryRemote(server.URL, "3.14") + err := remote.LoadManifest() + require.NoError(t, err) + + module, err := remote.GetModule("nonexistent") + assert.NoError(t, err) + assert.Nil(t, module) +} + +func TestStdlibRegistryRemote_GetModule_ManifestNotLoaded(t *testing.T) { + remote := NewStdlibRegistryRemote("https://example.com", "3.14") + module, err := remote.GetModule("os") + + assert.Error(t, err) + assert.Contains(t, err.Error(), "manifest not loaded") + assert.Nil(t, module) +} + +func TestStdlibRegistryRemote_GetModule_ChecksumMismatch(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path == "/python3.14/stdlib/v1/manifest.json" { + manifest := Manifest{ + SchemaVersion: "1.0.0", + Modules: []*ModuleEntry{ + {Name: "os", File: "os.json", Checksum: "sha256:wrongchecksum"}, + }, + } + manifestJSON, _ := json.Marshal(manifest) + w.Write(manifestJSON) + } else if r.URL.Path == "/python3.14/stdlib/v1/os.json" { + module := StdlibModule{Module: "os"} + moduleJSON, _ := json.Marshal(module) + w.Write(moduleJSON) + } + })) + defer server.Close() + + remote := NewStdlibRegistryRemote(server.URL, "3.14") + err := remote.LoadManifest() + require.NoError(t, err) + + module, err := remote.GetModule("os") + assert.Error(t, err) + assert.Contains(t, err.Error(), "checksum mismatch") + assert.Nil(t, module) +} + +func TestStdlibRegistryRemote_HasModule(t *testing.T) { + manifest := Manifest{ + SchemaVersion: "1.0.0", + Modules: []*ModuleEntry{ + {Name: "os", File: "os.json", Checksum: "sha256:abc"}, + }, + } + + remote := NewStdlibRegistryRemote("https://example.com", "3.14") + remote.Manifest = &manifest + + assert.True(t, remote.HasModule("os")) + assert.False(t, remote.HasModule("nonexistent")) +} + +func TestStdlibRegistryRemote_HasModule_ManifestNotLoaded(t *testing.T) { + remote := NewStdlibRegistryRemote("https://example.com", "3.14") + assert.False(t, remote.HasModule("os")) +} + +func TestStdlibRegistryRemote_GetFunction(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path == "/python3.14/stdlib/v1/manifest.json" { + manifest := Manifest{ + SchemaVersion: "1.0.0", + Modules: []*ModuleEntry{ + {Name: "os", File: "os.json", Checksum: "sha256:b00ae23881127c94ad43008c8c45ca1feea852cc149acce4f81648677befeb00"}, + }, + } + manifestJSON, _ := json.Marshal(manifest) + w.Write(manifestJSON) + } else if r.URL.Path == "/python3.14/stdlib/v1/os.json" { + module := StdlibModule{ + Module: "os", + Functions: map[string]*StdlibFunction{ + "getcwd": {ReturnType: "str"}, + }, + } + moduleJSON, _ := json.Marshal(module) + w.Write(moduleJSON) + } + })) + defer server.Close() + + remote := NewStdlibRegistryRemote(server.URL, "3.14") + err := remote.LoadManifest() + require.NoError(t, err) + + fn := remote.GetFunction("os", "getcwd") + require.NotNil(t, fn, "GetFunction should return non-nil function") + assert.Equal(t, "str", fn.ReturnType) +} + +func TestStdlibRegistryRemote_GetClass(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path == "/python3.14/stdlib/v1/manifest.json" { + manifest := Manifest{ + SchemaVersion: "1.0.0", + Modules: []*ModuleEntry{ + {Name: "pathlib", File: "pathlib.json", Checksum: "sha256:40fdc2a17eb383a81c197d8b2453e2a99605cfb1fa5c91e25f3f905ac803c7b8"}, + }, + } + manifestJSON, _ := json.Marshal(manifest) + w.Write(manifestJSON) + } else if r.URL.Path == "/python3.14/stdlib/v1/pathlib.json" { + module := StdlibModule{ + Module: "pathlib", + Classes: map[string]*StdlibClass{ + "Path": {Type: "class"}, + }, + } + moduleJSON, _ := json.Marshal(module) + w.Write(moduleJSON) + } + })) + defer server.Close() + + remote := NewStdlibRegistryRemote(server.URL, "3.14") + err := remote.LoadManifest() + require.NoError(t, err) + + cls := remote.GetClass("pathlib", "Path") + require.NotNil(t, cls, "GetClass should return non-nil class") + assert.Equal(t, "class", cls.Type) +} + +func TestStdlibRegistryRemote_ModuleCount(t *testing.T) { + manifest := Manifest{ + SchemaVersion: "1.0.0", + Modules: []*ModuleEntry{ + {Name: "os", File: "os.json", Checksum: "sha256:abc"}, + {Name: "sys", File: "sys.json", Checksum: "sha256:def"}, + }, + } + + remote := NewStdlibRegistryRemote("https://example.com", "3.14") + remote.Manifest = &manifest + + assert.Equal(t, 2, remote.ModuleCount()) +} + +func TestStdlibRegistryRemote_ModuleCount_NoManifest(t *testing.T) { + remote := NewStdlibRegistryRemote("https://example.com", "3.14") + assert.Equal(t, 0, remote.ModuleCount()) +} + +func TestStdlibRegistryRemote_ClearCache(t *testing.T) { + remote := NewStdlibRegistryRemote("https://example.com", "3.14") + remote.ModuleCache["os"] = &StdlibModule{Module: "os"} + remote.ModuleCache["sys"] = &StdlibModule{Module: "sys"} + + assert.Equal(t, 2, remote.CacheSize()) + + remote.ClearCache() + + assert.Equal(t, 0, remote.CacheSize()) +} + +func TestStdlibRegistryRemote_VerifyChecksum(t *testing.T) { + remote := NewStdlibRegistryRemote("https://example.com", "3.14") + + data := []byte(`{"test": "data"}`) + // Calculated using: echo -n '{"test": "data"}' | shasum -a 256 + validChecksum := "sha256:40b61fe1b15af0a4d5402735b26343e8cf8a045f4d81710e6108a21d91eaf366" + invalidChecksum := "sha256:wronghash" + + assert.True(t, remote.verifyChecksum(data, validChecksum)) + assert.False(t, remote.verifyChecksum(data, invalidChecksum)) +} + +func TestStdlibRegistryRemote_GetFunction_ModuleNotFound(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + manifest := Manifest{ + SchemaVersion: "1.0.0", + Modules: []*ModuleEntry{ + {Name: "sys", File: "sys.json", Checksum: "sha256:abc"}, + }, + } + manifestJSON, _ := json.Marshal(manifest) + w.Write(manifestJSON) + })) + defer server.Close() + + remote := NewStdlibRegistryRemote(server.URL, "3.14") + err := remote.LoadManifest() + require.NoError(t, err) + + fn := remote.GetFunction("os", "getcwd") + assert.Nil(t, fn) +} + +func TestStdlibRegistryRemote_GetFunction_FunctionNotFound(t *testing.T) { + module := StdlibModule{ + Module: "os", + Functions: map[string]*StdlibFunction{ + "getcwd": {ReturnType: "str"}, + }, + } + moduleJSON, _ := json.Marshal(module) + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path == "/python3.14/stdlib/v1/manifest.json" { + manifest := Manifest{ + SchemaVersion: "1.0.0", + Modules: []*ModuleEntry{ + {Name: "os", File: "os.json", Checksum: "sha256:b00ae23881127c94ad43008c8c45ca1feea852cc149acce4f81648677befeb00"}, + }, + } + manifestJSON, _ := json.Marshal(manifest) + w.Write(manifestJSON) + } else if r.URL.Path == "/python3.14/stdlib/v1/os.json" { + w.Write(moduleJSON) + } + })) + defer server.Close() + + remote := NewStdlibRegistryRemote(server.URL, "3.14") + err := remote.LoadManifest() + require.NoError(t, err) + + // Request a function that doesn't exist + fn := remote.GetFunction("os", "nonexistent") + assert.Nil(t, fn) +} + +func TestStdlibRegistryRemote_GetClass_ModuleNotFound(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + manifest := Manifest{ + SchemaVersion: "1.0.0", + Modules: []*ModuleEntry{ + {Name: "sys", File: "sys.json", Checksum: "sha256:abc"}, + }, + } + manifestJSON, _ := json.Marshal(manifest) + w.Write(manifestJSON) + })) + defer server.Close() + + remote := NewStdlibRegistryRemote(server.URL, "3.14") + err := remote.LoadManifest() + require.NoError(t, err) + + cls := remote.GetClass("os", "Path") + assert.Nil(t, cls) +} + +func TestStdlibRegistryRemote_GetClass_ClassNotFound(t *testing.T) { + module := StdlibModule{ + Module: "pathlib", + Classes: map[string]*StdlibClass{ + "Path": {Type: "class"}, + }, + } + moduleJSON, _ := json.Marshal(module) + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path == "/python3.14/stdlib/v1/manifest.json" { + manifest := Manifest{ + SchemaVersion: "1.0.0", + Modules: []*ModuleEntry{ + {Name: "pathlib", File: "pathlib.json", Checksum: "sha256:40fdc2a17eb383a81c197d8b2453e2a99605cfb1fa5c91e25f3f905ac803c7b8"}, + }, + } + manifestJSON, _ := json.Marshal(manifest) + w.Write(manifestJSON) + } else if r.URL.Path == "/python3.14/stdlib/v1/pathlib.json" { + w.Write(moduleJSON) + } + })) + defer server.Close() + + remote := NewStdlibRegistryRemote(server.URL, "3.14") + err := remote.LoadManifest() + require.NoError(t, err) + + // Request a class that doesn't exist + cls := remote.GetClass("pathlib", "NonExistent") + assert.Nil(t, cls) +} + +func TestStdlibRegistryRemote_GetFunction_ModuleLoadError(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path == "/python3.14/stdlib/v1/manifest.json" { + manifest := Manifest{ + SchemaVersion: "1.0.0", + Modules: []*ModuleEntry{ + {Name: "os", File: "os.json", Checksum: "sha256:wrongchecksum"}, + }, + } + manifestJSON, _ := json.Marshal(manifest) + w.Write(manifestJSON) + } else if r.URL.Path == "/python3.14/stdlib/v1/os.json" { + w.Write([]byte(`{"module": "os"}`)) + } + })) + defer server.Close() + + remote := NewStdlibRegistryRemote(server.URL, "3.14") + err := remote.LoadManifest() + require.NoError(t, err) + + // This will trigger an error in GetModule due to checksum mismatch + fn := remote.GetFunction("os", "getcwd") + assert.Nil(t, fn) +} + +func TestStdlibRegistryRemote_GetClass_ModuleLoadError(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path == "/python3.14/stdlib/v1/manifest.json" { + manifest := Manifest{ + SchemaVersion: "1.0.0", + Modules: []*ModuleEntry{ + {Name: "pathlib", File: "pathlib.json", Checksum: "sha256:wrongchecksum"}, + }, + } + manifestJSON, _ := json.Marshal(manifest) + w.Write(manifestJSON) + } else if r.URL.Path == "/python3.14/stdlib/v1/pathlib.json" { + w.Write([]byte(`{"module": "pathlib"}`)) + } + })) + defer server.Close() + + remote := NewStdlibRegistryRemote(server.URL, "3.14") + err := remote.LoadManifest() + require.NoError(t, err) + + // This will trigger an error in GetModule due to checksum mismatch + cls := remote.GetClass("pathlib", "Path") + assert.Nil(t, cls) +} + +func TestStdlibRegistryRemote_LoadManifest_ReadError(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Length", "100") + w.WriteHeader(http.StatusOK) + // Close connection immediately to cause read error + })) + server.Close() // Close server to cause connection error + + remote := NewStdlibRegistryRemote(server.URL, "3.14") + err := remote.LoadManifest() + + assert.Error(t, err) + assert.Contains(t, err.Error(), "failed to download manifest") +} + +func TestStdlibRegistryRemote_GetModule_InvalidJSON(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path == "/python3.14/stdlib/v1/manifest.json" { + manifest := Manifest{ + SchemaVersion: "1.0.0", + Modules: []*ModuleEntry{ + {Name: "os", File: "os.json", Checksum: "sha256:9e1ff4275ee1300de350456bdb3d63d7a66e565f65181e8f94f329a782503d26"}, + }, + } + manifestJSON, _ := json.Marshal(manifest) + w.Write(manifestJSON) + } else if r.URL.Path == "/python3.14/stdlib/v1/os.json" { + w.Write([]byte("invalid json")) + } + })) + defer server.Close() + + remote := NewStdlibRegistryRemote(server.URL, "3.14") + err := remote.LoadManifest() + require.NoError(t, err) + + module, err := remote.GetModule("os") + assert.Error(t, err) + assert.Contains(t, err.Error(), "failed to parse module JSON") + assert.Nil(t, module) +} + +func TestStdlibRegistryRemote_GetModule_HTTPError(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path == "/python3.14/stdlib/v1/manifest.json" { + manifest := Manifest{ + SchemaVersion: "1.0.0", + Modules: []*ModuleEntry{ + {Name: "os", File: "os.json", Checksum: "sha256:abc"}, + }, + } + manifestJSON, _ := json.Marshal(manifest) + w.Write(manifestJSON) + } else if r.URL.Path == "/python3.14/stdlib/v1/os.json" { + w.WriteHeader(http.StatusNotFound) + } + })) + defer server.Close() + + remote := NewStdlibRegistryRemote(server.URL, "3.14") + err := remote.LoadManifest() + require.NoError(t, err) + + module, err := remote.GetModule("os") + assert.Error(t, err) + assert.Contains(t, err.Error(), "module download failed with status: 404") + assert.Nil(t, module) +} diff --git a/sourcecode-parser/graph/callgraph/type_inference.go b/sourcecode-parser/graph/callgraph/type_inference.go index fe2ebbf9..c3c4e96b 100644 --- a/sourcecode-parser/graph/callgraph/type_inference.go +++ b/sourcecode-parser/graph/callgraph/type_inference.go @@ -30,12 +30,13 @@ type FunctionScope struct { // TypeInferenceEngine manages type inference across the codebase. // It maintains function scopes, return types, and references to other registries. type TypeInferenceEngine struct { - Scopes map[string]*FunctionScope // Function FQN -> scope - ReturnTypes map[string]*TypeInfo // Function FQN -> return type - Builtins *BuiltinRegistry // Builtin types registry - Registry *ModuleRegistry // Module registry reference - Attributes *AttributeRegistry // Class attributes registry (Phase 3 Task 12) - StdlibRegistry *StdlibRegistry // Python stdlib registry (PR #2) + Scopes map[string]*FunctionScope // Function FQN -> scope + ReturnTypes map[string]*TypeInfo // Function FQN -> return type + Builtins *BuiltinRegistry // Builtin types registry + Registry *ModuleRegistry // Module registry reference + Attributes *AttributeRegistry // Class attributes registry (Phase 3 Task 12) + StdlibRegistry *StdlibRegistry // Python stdlib registry (PR #2) + StdlibRemote *StdlibRegistryRemote // Remote loader for lazy module loading (PR #3) } // NewTypeInferenceEngine creates a new type inference engine.