Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions marimo/_lint/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from marimo._lint.linter import FileStatus, Linter
from marimo._lint.rule_engine import EarlyStoppingConfig, RuleEngine
from marimo._lint.rules.base import LintRule
from marimo._utils.files import expand_file_patterns
from marimo._utils.files import async_expand_file_patterns

if TYPE_CHECKING:
from collections.abc import Callable
Expand Down Expand Up @@ -43,8 +43,8 @@ def run_check(
Returns:
Linter with per-file status and diagnostics
"""
# Expand patterns to actual files
files_to_check = expand_file_patterns(file_patterns)
# Get async generator for files
files_to_check = async_expand_file_patterns(file_patterns)

linter = Linter(
pipe=pipe,
Expand Down Expand Up @@ -100,7 +100,7 @@ def message_pipe(msg: str) -> None:
rules=filtered_rules,
)

files_to_check = expand_file_patterns(file_patterns)
files_to_check = async_expand_file_patterns(file_patterns)
linter.run_streaming(files_to_check)

return linter, "\n".join(messages)
Expand Down
196 changes: 112 additions & 84 deletions marimo/_lint/linter.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@

import asyncio
import re
from collections.abc import AsyncIterator, Iterator
from dataclasses import dataclass, field
from pathlib import Path
from typing import TYPE_CHECKING
from typing import TYPE_CHECKING, Union

from marimo._ast.load import get_notebook_status
from marimo._ast.parse import MarimoFileError
Expand All @@ -17,7 +18,7 @@
from marimo._schemas.serialization import NotebookSerialization

if TYPE_CHECKING:
from collections.abc import AsyncIterator, Callable
from collections.abc import AsyncIterator, Callable, Iterator

from marimo._lint.rules.base import LintRule

Expand All @@ -40,6 +41,20 @@ def _contents_differ_excluding_generated_with(
return orig_cleaned != gen_cleaned


async def _to_async_iterator(
files_to_check: Union[AsyncIterator[Path], Iterator[Path]],
) -> AsyncIterator[Path]:
"""Convert a regular iterator to an async iterator if needed."""
if hasattr(files_to_check, "__aiter__"):
# Already an async iterator
async for file_path in files_to_check:
yield file_path
else:
# Convert regular iterator to async
for file_path in files_to_check:
yield file_path


@dataclass
class FileStatus:
"""Processing status and results for a single file."""
Expand Down Expand Up @@ -91,105 +106,110 @@ def __init__(
self.fixed_count: int = 0
self.issues_count: int = 0

async def _run_stream(
self, files_to_check: list[Path]
) -> AsyncIterator[FileStatus]:
"""Asynchronously check files and yield results as they complete."""
async def _process_single_file(self, file: Path) -> FileStatus:
"""Process a single file and return its status."""
file_path = str(file)
file_status = FileStatus(file=file_path)
# Check if file exists first
if not file.exists():
self.errored = True
file_status.failed = True
file_status.message = f"File not found: {file_path}"
file_status.details = [
f"FileNotFoundError: No such file or directory: '{file_path}'"
]
return file_status

# Create tasks for all files
async def process_file(file: Path) -> FileStatus:
file_path = str(file)
file_status = FileStatus(file=file_path)
# Check if file is a supported notebook format
if not file_path.endswith((".py", ".md", ".qmd")):
file_status.skipped = True
file_status.message = f"Skipped: {file_path} (not a notebook file)"
return file_status

# Check if file is a supported notebook format
if not file_path.endswith((".py", ".md", ".qmd")):
try:
with capture_output() as (stdout, stderr, logs):
load_result = get_notebook_status(file_path)
except SyntaxError as e:
# Handle syntax errors in notebooks
self.errored = True
file_status.failed = True
file_status.message = f"Failed to parse: {file_path}"
file_status.details = [f"SyntaxError: {str(e)}"]
return file_status
except MarimoFileError as e:
# Handle syntax errors in notebooks
if self.ignore_scripts:
# Skip this file silently when ignore_scripts is enabled
file_status.skipped = True
file_status.message = (
f"Skipped: {file_path} (not a notebook file)"
f"Skipped: {file_path} (not a marimo notebook)"
)
return file_status

try:
with capture_output() as (stdout, stderr, logs):
load_result = get_notebook_status(file_path)
except SyntaxError as e:
# Handle syntax errors in notebooks
else:
self.errored = True
file_status.failed = True
file_status.message = f"Failed to parse: {file_path}"
file_status.details = [f"SyntaxError: {str(e)}"]
file_status.message = (
f"Not recognizable as a marimo notebook: {file_path}"
)
file_status.details = [f"MarimoFileError: {str(e)}"]
return file_status
except MarimoFileError as e:
# Handle syntax errors in notebooks
if self.ignore_scripts:
# Skip this file silently when ignore_scripts is enabled
file_status.skipped = True
file_status.message = (
f"Skipped: {file_path} (not a marimo notebook)"
)
return file_status
else:
self.errored = True
file_status.failed = True
file_status.message = (
f"Not recognizable as a marimo notebook: {file_path}"
)
file_status.details = [f"MarimoFileError: {str(e)}"]
return file_status

file_status.notebook = load_result.notebook
file_status.contents = load_result.contents
file_status.notebook = load_result.notebook
file_status.contents = load_result.contents

if load_result.status == "empty":
if load_result.status == "empty":
file_status.skipped = True
file_status.message = f"Skipped: {file_path} (empty file)"
elif load_result.status == "invalid":
if self.ignore_scripts:
# Skip this file silently when ignore_scripts is enabled
file_status.skipped = True
file_status.message = f"Skipped: {file_path} (empty file)"
elif load_result.status == "invalid":
if self.ignore_scripts:
# Skip this file silently when ignore_scripts is enabled
file_status.skipped = True
file_status.message = (
f"Skipped: {file_path} (not a marimo notebook)"
)
return file_status
else:
file_status.failed = True
file_status.message = (
f"Failed to parse: {file_path} (not a valid notebook)"
)
elif load_result.notebook is not None:
try:
# Check notebook with all rules including parsing
file_status.diagnostics = (
await self.rule_engine.check_notebook(
load_result.notebook,
# Add parsing rule if there's captured output
stdout=stdout.getvalue().strip(),
stderr=stderr.getvalue().strip(),
logs=logs,
)
)
except Exception as e:
# Handle other parsing errors
self.errored = True
file_status.failed = True
file_status.message = f"Failed to process {file_path}"
file_status.details = [str(e)]
file_status.message = (
f"Skipped: {file_path} (not a marimo notebook)"
)
return file_status
else:
# Status is valid but no notebook - shouldn't happen but handle gracefully
file_status.skipped = True
file_status.failed = True
file_status.message = (
f"Skipped: {file_path} (no notebook content)"
f"Failed to parse: {file_path} (not a valid notebook)"
)
elif load_result.notebook is not None:
try:
# Check notebook with all rules including parsing
file_status.diagnostics = (
await self.rule_engine.check_notebook(
load_result.notebook,
# Add parsing rule if there's captured output
stdout=stdout.getvalue().strip(),
stderr=stderr.getvalue().strip(),
logs=logs,
)
)
except Exception as e:
# Handle other parsing errors
self.errored = True
file_status.failed = True
file_status.message = f"Failed to process {file_path}"
file_status.details = [str(e)]
else:
# Status is valid but no notebook - shouldn't happen but handle gracefully
file_status.skipped = True
file_status.message = f"Skipped: {file_path} (no notebook content)"

# Ensure diagnostics list is initialized for cases where no processing happened
if not hasattr(file_status, "diagnostics"):
file_status.diagnostics = []
# Ensure diagnostics list is initialized for cases where no processing happened
if not hasattr(file_status, "diagnostics"):
file_status.diagnostics = []

return file_status
return file_status

async def _run_stream(
self, files_to_check: list[Path]
) -> AsyncIterator[FileStatus]:
"""Asynchronously check files and yield results as they complete."""

# Create tasks for all files
tasks = [
asyncio.create_task(process_file(file_path))
asyncio.create_task(self._process_single_file(file_path))
for file_path in files_to_check
]

Expand Down Expand Up @@ -254,15 +274,22 @@ def _generate_file_contents(file_status: FileStatus) -> str:
file_status.notebook, file_status.file
)

def run_streaming(self, files_to_check: list[Path]) -> None:
def run_streaming(
self, files_to_check: Union[AsyncIterator[Path], Iterator[Path]]
) -> None:
"""Run linting checks with real-time streaming output."""
asyncio.run(self._run_streaming_async(files_to_check))

async def _run_streaming_async(self, files_to_check: list[Path]) -> None:
async def _run_streaming_async(
self, files_to_check: Union[AsyncIterator[Path], Iterator[Path]]
) -> None:
"""Internal async implementation of run_streaming."""
# Process files as they complete
fixed_count = 0
async for file_status in self._run_stream(files_to_check):

# Convert to async iterator and process
async for file_path in _to_async_iterator(files_to_check):
file_status = await self._process_single_file(file_path)
self.files.append(file_status)

# Stream output via pipe if available
Expand All @@ -278,6 +305,7 @@ async def _run_streaming_async(self, files_to_check: list[Path]) -> None:
fixed_count += 1
if self.pipe:
self.pipe(f"Updated: {file_status.file}")

self.fixed_count = fixed_count

async def fix(self, file_status: FileStatus) -> bool:
Expand Down
67 changes: 66 additions & 1 deletion marimo/_utils/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import fnmatch
import os
import re
from collections.abc import Generator
from collections.abc import AsyncGenerator, Generator
from pathlib import Path
from typing import Union

Expand All @@ -27,6 +27,17 @@ def get_files(folder: str) -> Generator[Path, None, None]:
yield from get_files(item.path)


async def async_get_files(folder: str) -> AsyncGenerator[Path, None]:
"""Asynchronously recursively get all files from a folder."""
with os.scandir(folder) as scan:
for item in scan:
if item.is_file():
yield Path(item.path)
elif item.is_dir() and not item.name.startswith("."):
async for file_path in async_get_files(item.path):
yield file_path


def expand_file_patterns(file_patterns: tuple[str, ...]) -> list[Path]:
"""Expand file patterns to actual file paths.

Expand Down Expand Up @@ -70,3 +81,57 @@ def expand_file_patterns(file_patterns: tuple[str, ...]) -> list[Path]:

# Remove duplicates and sort
return sorted(set(files_to_check))


async def async_expand_file_patterns(
file_patterns: tuple[str, ...],
) -> AsyncGenerator[Path, None]:
"""Asynchronously expand file patterns to file paths, yielding as discovered.

Args:
file_patterns: Tuple of file patterns (files, directories, or glob-like patterns)

Yields:
Path objects for matching files (including non-existent explicit files)
"""
seen = set()

for pattern in file_patterns:
if os.path.isfile(pattern):
path = Path(pattern)
if path not in seen:
seen.add(path)
yield path
elif os.path.isdir(pattern):
async for file_path in async_get_files(pattern):
if file_path not in seen:
seen.add(file_path)
yield file_path
else:
# Handle glob patterns by walking from root and filtering
if "**" in pattern or "*" in pattern or "?" in pattern:
# Extract root directory to walk from
root = "."
parts = pattern.split("/")
for i, part in enumerate(parts):
if "*" in part or "?" in part:
root = "/".join(parts[:i]) if i > 0 else "."
break
elif os.path.isdir("/".join(parts[: i + 1])):
root = "/".join(parts[: i + 1])

# Get all files from root and filter by pattern
if os.path.isdir(root):
async for file_path in async_get_files(root):
if (
fnmatch.fnmatch(str(file_path), pattern)
and file_path not in seen
):
seen.add(file_path)
yield file_path
else:
# Not a glob pattern but file doesn't exist - yield it anyway for error handling
path = Path(pattern)
if path not in seen:
seen.add(path)
yield path
Loading
Loading