From f0d8f8221689913e51e182664d89d21f0053b13f Mon Sep 17 00:00:00 2001
From: dylan <dylan@marimo.io>
Date: Tue, 23 Sep 2025 10:02:21 -0700
Subject: [PATCH 1/3] feat: Streaming files for lint

---
 marimo/_lint/__init__.py |   8 +-
 marimo/_lint/linter.py   | 200 +++++++++++++++++++++++----------------
 marimo/_utils/files.py   |  67 ++++++++++++-
 3 files changed, 186 insertions(+), 89 deletions(-)

diff --git a/marimo/_lint/__init__.py b/marimo/_lint/__init__.py
index 17560820751..1df953d7114 100644
--- a/marimo/_lint/__init__.py
+++ b/marimo/_lint/__init__.py
@@ -7,7 +7,7 @@
 from marimo._lint.linter import FileStatus, Linter
 from marimo._lint.rule_engine import EarlyStoppingConfig, RuleEngine
 from marimo._lint.rules.base import LintRule
-from marimo._utils.files import expand_file_patterns
+from marimo._utils.files import async_expand_file_patterns
 
 if TYPE_CHECKING:
     from collections.abc import Callable
@@ -43,8 +43,8 @@ def run_check(
     Returns:
         Linter with per-file status and diagnostics
     """
-    # Expand patterns to actual files
-    files_to_check = expand_file_patterns(file_patterns)
+    # Get async generator for files
+    files_to_check = async_expand_file_patterns(file_patterns)
 
     linter = Linter(
         pipe=pipe,
@@ -100,7 +100,7 @@ def message_pipe(msg: str) -> None:
         rules=filtered_rules,
     )
 
-    files_to_check = expand_file_patterns(file_patterns)
+    files_to_check = async_expand_file_patterns(file_patterns)
     linter.run_streaming(files_to_check)
 
     return linter, "\n".join(messages)
diff --git a/marimo/_lint/linter.py b/marimo/_lint/linter.py
index 3ce31e4342e..db33b8bf5af 100644
--- a/marimo/_lint/linter.py
+++ b/marimo/_lint/linter.py
@@ -3,9 +3,10 @@
 
 import asyncio
 import re
+from collections.abc import AsyncIterator, Iterator
 from dataclasses import dataclass, field
 from pathlib import Path
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, Union
 
 from marimo._ast.load import get_notebook_status
 from marimo._ast.parse import MarimoFileError
@@ -17,7 +18,7 @@
 from marimo._schemas.serialization import NotebookSerialization
 
 if TYPE_CHECKING:
-    from collections.abc import AsyncIterator, Callable
+    from collections.abc import AsyncIterator, Callable, Iterator
 
     from marimo._lint.rules.base import LintRule
 
@@ -40,6 +41,20 @@ def _contents_differ_excluding_generated_with(
     return orig_cleaned != gen_cleaned
 
 
+async def _to_async_iterator(
+    files_to_check: Union[AsyncIterator[Path], Iterator[Path]],
+) -> AsyncIterator[Path]:
+    """Convert a regular iterator to an async iterator if needed."""
+    if hasattr(files_to_check, "__aiter__"):
+        # Already an async iterator
+        async for file_path in files_to_check:
+            yield file_path
+    else:
+        # Convert regular iterator to async
+        for file_path in files_to_check:
+            yield file_path
+
+
 @dataclass
 class FileStatus:
     """Processing status and results for a single file."""
@@ -91,105 +106,114 @@ def __init__(
         self.fixed_count: int = 0
         self.issues_count: int = 0
 
-    async def _run_stream(
-        self, files_to_check: list[Path]
-    ) -> AsyncIterator[FileStatus]:
-        """Asynchronously check files and yield results as they complete."""
+    async def _process_single_file(self, file: Path) -> FileStatus:
+        """Process a single file and return its status."""
+        file_path = str(file)
+        file_status = FileStatus(file=file_path)
+        # Check if file exists first
+        if not file.exists():
+            self.errored = True
+            file_status.failed = True
+            file_status.message = f"File not found: {file_path}"
+            file_status.details = [
+                f"FileNotFoundError: No such file or directory: '{file_path}'"
+            ]
+            return file_status
 
-        # Create tasks for all files
-        async def process_file(file: Path) -> FileStatus:
-            file_path = str(file)
-            file_status = FileStatus(file=file_path)
+        # Check if file is a supported notebook format
+        if not file_path.endswith((".py", ".md", ".qmd")):
+            file_status.skipped = True
+            file_status.message = (
+                f"Skipped: {file_path} (not a notebook file)"
+            )
+            return file_status
 
-            # Check if file is a supported notebook format
-            if not file_path.endswith((".py", ".md", ".qmd")):
+        try:
+            with capture_output() as (stdout, stderr, logs):
+                load_result = get_notebook_status(file_path)
+        except SyntaxError as e:
+            # Handle syntax errors in notebooks
+            self.errored = True
+            file_status.failed = True
+            file_status.message = f"Failed to parse: {file_path}"
+            file_status.details = [f"SyntaxError: {str(e)}"]
+            return file_status
+        except MarimoFileError as e:
+            # Handle syntax errors in notebooks
+            if self.ignore_scripts:
+                # Skip this file silently when ignore_scripts is enabled
                 file_status.skipped = True
                 file_status.message = (
-                    f"Skipped: {file_path} (not a notebook file)"
+                    f"Skipped: {file_path} (not a marimo notebook)"
                 )
                 return file_status
-
-            try:
-                with capture_output() as (stdout, stderr, logs):
-                    load_result = get_notebook_status(file_path)
-            except SyntaxError as e:
-                # Handle syntax errors in notebooks
+            else:
                 self.errored = True
                 file_status.failed = True
-                file_status.message = f"Failed to parse: {file_path}"
-                file_status.details = [f"SyntaxError: {str(e)}"]
+                file_status.message = (
+                    f"Not recognizable as a marimo notebook: {file_path}"
+                )
+                file_status.details = [f"MarimoFileError: {str(e)}"]
                 return file_status
-            except MarimoFileError as e:
-                # Handle syntax errors in notebooks
-                if self.ignore_scripts:
-                    # Skip this file silently when ignore_scripts is enabled
-                    file_status.skipped = True
-                    file_status.message = (
-                        f"Skipped: {file_path} (not a marimo notebook)"
-                    )
-                    return file_status
-                else:
-                    self.errored = True
-                    file_status.failed = True
-                    file_status.message = (
-                        f"Not recognizable as a marimo notebook: {file_path}"
-                    )
-                    file_status.details = [f"MarimoFileError: {str(e)}"]
-                    return file_status
 
-            file_status.notebook = load_result.notebook
-            file_status.contents = load_result.contents
+        file_status.notebook = load_result.notebook
+        file_status.contents = load_result.contents
 
-            if load_result.status == "empty":
+        if load_result.status == "empty":
+            file_status.skipped = True
+            file_status.message = f"Skipped: {file_path} (empty file)"
+        elif load_result.status == "invalid":
+            if self.ignore_scripts:
+                # Skip this file silently when ignore_scripts is enabled
                 file_status.skipped = True
-                file_status.message = f"Skipped: {file_path} (empty file)"
-            elif load_result.status == "invalid":
-                if self.ignore_scripts:
-                    # Skip this file silently when ignore_scripts is enabled
-                    file_status.skipped = True
-                    file_status.message = (
-                        f"Skipped: {file_path} (not a marimo notebook)"
-                    )
-                    return file_status
-                else:
-                    file_status.failed = True
-                    file_status.message = (
-                        f"Failed to parse: {file_path} (not a valid notebook)"
-                    )
-            elif load_result.notebook is not None:
-                try:
-                    # Check notebook with all rules including parsing
-                    file_status.diagnostics = (
-                        await self.rule_engine.check_notebook(
-                            load_result.notebook,
-                            # Add parsing rule if there's captured output
-                            stdout=stdout.getvalue().strip(),
-                            stderr=stderr.getvalue().strip(),
-                            logs=logs,
-                        )
-                    )
-                except Exception as e:
-                    # Handle other parsing errors
-                    self.errored = True
-                    file_status.failed = True
-                    file_status.message = f"Failed to process {file_path}"
-                    file_status.details = [str(e)]
+                file_status.message = (
+                    f"Skipped: {file_path} (not a marimo notebook)"
+                )
+                return file_status
             else:
-                # Status is valid but no notebook - shouldn't happen but handle gracefully
-                file_status.skipped = True
+                file_status.failed = True
                 file_status.message = (
-                    f"Skipped: {file_path} (no notebook content)"
+                    f"Failed to parse: {file_path} (not a valid notebook)"
                 )
+        elif load_result.notebook is not None:
+            try:
+                # Check notebook with all rules including parsing
+                file_status.diagnostics = (
+                    await self.rule_engine.check_notebook(
+                        load_result.notebook,
+                        # Add parsing rule if there's captured output
+                        stdout=stdout.getvalue().strip(),
+                        stderr=stderr.getvalue().strip(),
+                        logs=logs,
+                    )
+                )
+            except Exception as e:
+                # Handle other parsing errors
+                self.errored = True
+                file_status.failed = True
+                file_status.message = f"Failed to process {file_path}"
+                file_status.details = [str(e)]
+        else:
+            # Status is valid but no notebook - shouldn't happen but handle gracefully
+            file_status.skipped = True
+            file_status.message = (
+                f"Skipped: {file_path} (no notebook content)"
+            )
 
-            # Ensure diagnostics list is initialized for cases where no processing happened
-            if not hasattr(file_status, "diagnostics"):
-                file_status.diagnostics = []
+        # Ensure diagnostics list is initialized for cases where no processing happened
+        if not hasattr(file_status, "diagnostics"):
+            file_status.diagnostics = []
 
-            return file_status
+        return file_status
+
+    async def _run_stream(
+        self, files_to_check: list[Path]
+    ) -> AsyncIterator[FileStatus]:
+        """Asynchronously check files and yield results as they complete."""
 
         # Create tasks for all files
         tasks = [
-            asyncio.create_task(process_file(file_path))
+            asyncio.create_task(self._process_single_file(file_path))
             for file_path in files_to_check
         ]
 
@@ -254,15 +278,22 @@ def _generate_file_contents(file_status: FileStatus) -> str:
             file_status.notebook, file_status.file
         )
 
-    def run_streaming(self, files_to_check: list[Path]) -> None:
+    def run_streaming(
+        self, files_to_check: Union[AsyncIterator[Path], Iterator[Path]]
+    ) -> None:
         """Run linting checks with real-time streaming output."""
         asyncio.run(self._run_streaming_async(files_to_check))
 
-    async def _run_streaming_async(self, files_to_check: list[Path]) -> None:
+    async def _run_streaming_async(
+        self, files_to_check: Union[AsyncIterator[Path], Iterator[Path]]
+    ) -> None:
         """Internal async implementation of run_streaming."""
         # Process files as they complete
         fixed_count = 0
-        async for file_status in self._run_stream(files_to_check):
+
+        # Convert to async iterator and process
+        async for file_path in _to_async_iterator(files_to_check):
+            file_status = await self._process_single_file(file_path)
             self.files.append(file_status)
 
             # Stream output via pipe if available
@@ -278,6 +309,7 @@ async def _run_streaming_async(self, files_to_check: list[Path]) -> None:
                     fixed_count += 1
                     if self.pipe:
                         self.pipe(f"Updated: {file_status.file}")
+
         self.fixed_count = fixed_count
 
     async def fix(self, file_status: FileStatus) -> bool:
diff --git a/marimo/_utils/files.py b/marimo/_utils/files.py
index 2ad5a1394fc..55e205c0ba0 100644
--- a/marimo/_utils/files.py
+++ b/marimo/_utils/files.py
@@ -2,7 +2,7 @@
 import fnmatch
 import os
 import re
-from collections.abc import Generator
+from collections.abc import AsyncGenerator, Generator
 from pathlib import Path
 from typing import Union
 
@@ -27,6 +27,17 @@ def get_files(folder: str) -> Generator[Path, None, None]:
                 yield from get_files(item.path)
 
 
+async def async_get_files(folder: str) -> AsyncGenerator[Path, None]:
+    """Asynchronously recursively get all files from a folder."""
+    with os.scandir(folder) as scan:
+        for item in scan:
+            if item.is_file():
+                yield Path(item.path)
+            elif item.is_dir() and not item.name.startswith("."):
+                async for file_path in async_get_files(item.path):
+                    yield file_path
+
+
 def expand_file_patterns(file_patterns: tuple[str, ...]) -> list[Path]:
     """Expand file patterns to actual file paths.
 
@@ -70,3 +81,57 @@ def expand_file_patterns(file_patterns: tuple[str, ...]) -> list[Path]:
 
     # Remove duplicates and sort
     return sorted(set(files_to_check))
+
+
+async def async_expand_file_patterns(
+    file_patterns: tuple[str, ...],
+) -> AsyncGenerator[Path, None]:
+    """Asynchronously expand file patterns to file paths, yielding as discovered.
+
+    Args:
+        file_patterns: Tuple of file patterns (files, directories, or glob-like patterns)
+
+    Yields:
+        Path objects for matching files (including non-existent explicit files)
+    """
+    seen = set()
+
+    for pattern in file_patterns:
+        if os.path.isfile(pattern):
+            path = Path(pattern)
+            if path not in seen:
+                seen.add(path)
+                yield path
+        elif os.path.isdir(pattern):
+            async for file_path in async_get_files(pattern):
+                if file_path not in seen:
+                    seen.add(file_path)
+                    yield file_path
+        else:
+            # Handle glob patterns by walking from root and filtering
+            if "**" in pattern or "*" in pattern or "?" in pattern:
+                # Extract root directory to walk from
+                root = "."
+                parts = pattern.split("/")
+                for i, part in enumerate(parts):
+                    if "*" in part or "?" in part:
+                        root = "/".join(parts[:i]) if i > 0 else "."
+                        break
+                    elif os.path.isdir("/".join(parts[: i + 1])):
+                        root = "/".join(parts[: i + 1])
+
+                # Get all files from root and filter by pattern
+                if os.path.isdir(root):
+                    async for file_path in async_get_files(root):
+                        if (
+                            fnmatch.fnmatch(str(file_path), pattern)
+                            and file_path not in seen
+                        ):
+                            seen.add(file_path)
+                            yield file_path
+            else:
+                # Not a glob pattern but file doesn't exist - yield it anyway for error handling
+                path = Path(pattern)
+                if path not in seen:
+                    seen.add(path)
+                    yield path

From 5781f3ff0f9ac951314de55106c00c88ede25670 Mon Sep 17 00:00:00 2001
From: dylan <dylan@marimo.io>
Date: Tue, 23 Sep 2025 10:20:08 -0700
Subject: [PATCH 2/3] tests: non existent file

---
 tests/_lint/test_run_check.py | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/tests/_lint/test_run_check.py b/tests/_lint/test_run_check.py
index 81d36552cd8..4c1049d5077 100644
--- a/tests/_lint/test_run_check.py
+++ b/tests/_lint/test_run_check.py
@@ -368,4 +368,20 @@ def test_error_handling_in_run_check(self):
             assert result.files[0].failed is True
             # Note: errored might not be True as we changed error handling
             assert "Failed to parse" in result.files[0].message
-        """Test error handling in run_check."""
+
+    def test_run_check_with_nonexistent_file_pattern(self):
+        """Test run_check with a specific nonexistent file."""
+        result = run_check(("nonexistent_file.py",))
+
+        assert isinstance(result, Linter)
+        assert len(result.files) == 1  # Should create a failed file status
+        assert result.files[0].failed is True
+        assert "File not found" in result.files[0].message
+
+    def test_run_check_with_nonexistent_directory_pattern(self):
+        """Test run_check with nonexistent directory patterns."""
+        result = run_check(("nonexistent_dir/**/*.py",))
+
+        assert isinstance(result, Linter)
+        assert len(result.files) == 0
+        assert result.errored is False

From ca708f66aa4e0b1fc6a7fb58e9d61436e5d317d4 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 23 Sep 2025 17:25:17 +0000
Subject: [PATCH 3/3] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 marimo/_lint/linter.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/marimo/_lint/linter.py b/marimo/_lint/linter.py
index db33b8bf5af..c6688344a5c 100644
--- a/marimo/_lint/linter.py
+++ b/marimo/_lint/linter.py
@@ -123,9 +123,7 @@ async def _process_single_file(self, file: Path) -> FileStatus:
         # Check if file is a supported notebook format
         if not file_path.endswith((".py", ".md", ".qmd")):
             file_status.skipped = True
-            file_status.message = (
-                f"Skipped: {file_path} (not a notebook file)"
-            )
+            file_status.message = f"Skipped: {file_path} (not a notebook file)"
             return file_status
 
         try:
@@ -196,9 +194,7 @@ async def _process_single_file(self, file: Path) -> FileStatus:
         else:
             # Status is valid but no notebook - shouldn't happen but handle gracefully
             file_status.skipped = True
-            file_status.message = (
-                f"Skipped: {file_path} (no notebook content)"
-            )
+            file_status.message = f"Skipped: {file_path} (no notebook content)"
 
         # Ensure diagnostics list is initialized for cases where no processing happened
         if not hasattr(file_status, "diagnostics"):