Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions marimo/_output/md.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,13 @@
import pymdownx.emoji # type: ignore

from marimo._output.hypertext import Html
from marimo._output.md_extensions.breakless_lists import (
BreaklessListsExtension,
)
from marimo._output.md_extensions.external_links import ExternalLinksExtension
from marimo._output.md_extensions.flexible_indent import (
FlexibleIndentExtension,
)
from marimo._output.md_extensions.iconify import IconifyExtension
from marimo._output.rich_help import mddoc
from marimo._utils.url import is_url
Expand Down Expand Up @@ -196,6 +202,10 @@ def _get_extensions() -> list[Union[str, markdown.Extension]]:
"footnotes",
# Sane lists, to include <ol start="n">
"sane_lists",
# Flexible indentation - supports 2 or 4 space indentation
FlexibleIndentExtension(),
# Breakless lists - more compact list formatting
BreaklessListsExtension(),
# Links
ExternalLinksExtension(),
# Iconify
Expand Down
1 change: 0 additions & 1 deletion marimo/_output/md_extensions/__init__.py

This file was deleted.

109 changes: 109 additions & 0 deletions marimo/_output/md_extensions/breakless_lists.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
# Copyright 2024 Marimo. All rights reserved.

import re
from xml.etree.ElementTree import Element

from markdown import ( # type: ignore
Extension,
Markdown,
preprocessors,
treeprocessors,
)


class BreaklessListsPreprocessor(preprocessors.Preprocessor): # type: ignore[misc]
"""
Enables CommonMark-style list interruption of paragraphs.

In CommonMark, lists can interrupt paragraphs without requiring a blank line.
Python-Markdown requires blank lines, so this preprocessor adds them automatically
when it detects a list immediately following a paragraph.
"""

# Pattern to match lines that start list items (ordered or unordered)
LIST_START_PATTERN = re.compile(r"^(\s*)([*+-]|\d+\.)(\s+)", re.MULTILINE)

def __init__(self, md: Markdown) -> None:
super().__init__(md)

def run(self, lines: list[str]) -> list[str]:
"""Process the lines and insert blank lines before lists that follow paragraphs."""
if not lines:
return lines

result_lines: list[str] = []
i = 0

while i < len(lines):
current_line = lines[i]
result_lines.append(current_line)

# Check if we need to look ahead for a list
if i + 1 < len(lines):
next_line = lines[i + 1]

# If current line is not empty and next line starts a list
if (
current_line.strip() # Current line has content
and self.LIST_START_PATTERN.match(next_line)
): # Next line starts a list
# Check if there's already a blank line
if current_line.strip():
# Insert blank line to enable list interruption
result_lines.append("")

i += 1

return result_lines


class BreaklessListsTreeProcessor(treeprocessors.Treeprocessor): # type: ignore[misc]
"""
Removes paragraph tags from list items to create compact lists.

This makes lists more compact by removing <p> tags within <li> elements.
"""

def run(self, root: Element) -> None:
def is_only_child(parent: Element, child: Element) -> bool:
return len(parent) == 1 and parent[0] is child

for element in root.iter(tag="li"):
for p in element.findall(".//p"):
# If paragraph has no attributes and is the only child
if not p.attrib and is_only_child(element, p):
# Swap the paragraph with the list item
element.text = p.text
element.tail = p.tail
# Copy over the children
for child in p:
element.append(child)
# Remove the paragraph tag
element.remove(p)


class BreaklessListsExtension(Extension): # type: ignore[misc]
"""
Extension to enable CommonMark-style list interruption of paragraphs.

This allows lists to follow paragraphs without requiring blank lines,
matching CommonMark specification behavior. Also makes lists compact
by removing paragraph tags within list items.
"""

def extendMarkdown(self, md: Markdown) -> None:
# Register preprocessor to enable list interruption
md.preprocessors.register(
BreaklessListsPreprocessor(md),
"breakless_lists_preproc",
# Run early in preprocessing, before other processors
30,
)

# Register tree processor to make lists compact
md.treeprocessors.register(
BreaklessListsTreeProcessor(md),
"breakless_lists_tree",
# Run after lists are parsed but before paragraph cleanup
10,
)
135 changes: 135 additions & 0 deletions marimo/_output/md_extensions/flexible_indent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
# Copyright 2024 Marimo. All rights reserved.

import re

from markdown import Extension, Markdown, preprocessors # type: ignore


class FlexibleIndentPreprocessor(preprocessors.Preprocessor): # type: ignore[misc]
"""
Preprocessor to standardize list indentation to specific levels.
Normalizes inconsistent indentation to match the allowed levels.
"""

# Pattern to match lines that start list items (ordered or unordered)
# Captures: (indentation, list_marker, trailing_space, content)
LIST_PATTERN = re.compile(r"^(\s*)([*+-]|\d+\.)(\s+)(.*)$", re.MULTILINE)
INDENT_LEVELS = [2, 4]
BASE_INDENT_SIZE = 4
FOUR_SPACES = " "

def __init__(self, md: Markdown) -> None:
super().__init__(md)

def _detect_base_indent(self, lines: list[str]) -> int:
"""
Detect the base indentation level used in the document.

Returns 2 for 2-space indentation or 4 for 4-space indentation.
"""
indents: list[int] = []
for line in lines:
match = self.LIST_PATTERN.match(line)
if match:
indent_str = match.group(1)
if indent_str: # Skip non-indented items
indent_count = len(
indent_str.replace("\t", self.FOUR_SPACES)
)
indents.append(indent_count)

if not indents:
return self.BASE_INDENT_SIZE

# Find the smallest non-zero indent - this is likely our base level
min_indent = min(indents)

# Choose the closest allowed indent level
if min_indent <= 2:
return 2
else:
return self.BASE_INDENT_SIZE

def _normalize_indentation(self, indent_str: str, base_level: int) -> str:
"""
Normalize indentation to consistent 2-space increments.

This ensures that both 2-space and 4-space indentation patterns
result in the same normalized output.

Args:
indent_str: The original indentation string
base_level: The detected base indentation level (2 or 4)

Returns:
Normalized indentation string using 2-space increments
"""
# Convert tabs to spaces (assuming 1 tab = 4 spaces)
normalized = indent_str.replace("\t", self.FOUR_SPACES)
indent_count = len(normalized)

if indent_count == 0:
return ""

# Calculate the intended nesting level based on the base level
nesting_level = max(1, round(indent_count / base_level))

# Always output using 4-space increments since that is what the markdown spec requires
return " " * (4 * nesting_level)

def _get_list_depth(self, indent_str: str, base_level: int = 2) -> int:
"""Calculate the nesting depth of a list item."""
normalized = indent_str.replace("\t", self.FOUR_SPACES)
indent_count = len(normalized)

if indent_count == 0:
return 0

# Calculate depth based on the base level
return max(1, round(indent_count / base_level))

def run(self, lines: list[str]) -> list[str]:
"""Process the lines and normalize list indentation."""
if not lines:
return lines

# Detect the base indentation level used in this document
base_level = self._detect_base_indent(lines)

result_lines: list[str] = []

for line in lines:
match = self.LIST_PATTERN.match(line)
if match:
indent, marker, space, content = match.groups()

# Normalize the indentation based on detected base level
normalized_indent = self._normalize_indentation(
indent, base_level
)

# Reconstruct the line with normalized indentation
normalized_line = (
f"{normalized_indent}{marker}{space}{content}"
)
result_lines.append(normalized_line)
else:
result_lines.append(line)

return result_lines


class FlexibleIndentExtension(Extension): # type: ignore[misc]
"""
Extension to provide flexible list indentation support.
"""

def extendMarkdown(self, md: Markdown) -> None:
"""Add the preprocessor to the markdown instance."""
# Register preprocessor to normalize indentation
md.preprocessors.register(
FlexibleIndentPreprocessor(md),
"flexible_indent",
# Run early, before breakless_lists and other list processing
35,
)
Loading
Loading