diff --git a/marimo/_output/md.py b/marimo/_output/md.py
index 62b6d6266ba..64f250f9df1 100644
--- a/marimo/_output/md.py
+++ b/marimo/_output/md.py
@@ -14,7 +14,13 @@
import pymdownx.emoji # type: ignore
from marimo._output.hypertext import Html
+from marimo._output.md_extensions.breakless_lists import (
+ BreaklessListsExtension,
+)
from marimo._output.md_extensions.external_links import ExternalLinksExtension
+from marimo._output.md_extensions.flexible_indent import (
+ FlexibleIndentExtension,
+)
from marimo._output.md_extensions.iconify import IconifyExtension
from marimo._output.rich_help import mddoc
from marimo._utils.url import is_url
@@ -196,6 +202,10 @@ def _get_extensions() -> list[Union[str, markdown.Extension]]:
"footnotes",
# Sane lists, to include
"sane_lists",
+ # Flexible indentation - supports 2 or 4 space indentation
+ FlexibleIndentExtension(),
+ # Breakless lists - more compact list formatting
+ BreaklessListsExtension(),
# Links
ExternalLinksExtension(),
# Iconify
diff --git a/marimo/_output/md_extensions/__init__.py b/marimo/_output/md_extensions/__init__.py
deleted file mode 100644
index be52407b506..00000000000
--- a/marimo/_output/md_extensions/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-# Copyright 2024 Marimo. All rights reserved.
diff --git a/marimo/_output/md_extensions/breakless_lists.py b/marimo/_output/md_extensions/breakless_lists.py
new file mode 100644
index 00000000000..78a981506ed
--- /dev/null
+++ b/marimo/_output/md_extensions/breakless_lists.py
@@ -0,0 +1,109 @@
+# Copyright 2024 Marimo. All rights reserved.
+
+import re
+from xml.etree.ElementTree import Element
+
+from markdown import ( # type: ignore
+ Extension,
+ Markdown,
+ preprocessors,
+ treeprocessors,
+)
+
+
+class BreaklessListsPreprocessor(preprocessors.Preprocessor): # type: ignore[misc]
+ """
+ Enables CommonMark-style list interruption of paragraphs.
+
+ In CommonMark, lists can interrupt paragraphs without requiring a blank line.
+ Python-Markdown requires blank lines, so this preprocessor adds them automatically
+ when it detects a list immediately following a paragraph.
+ """
+
+ # Pattern to match lines that start list items (ordered or unordered)
+ LIST_START_PATTERN = re.compile(r"^(\s*)([*+-]|\d+\.)(\s+)", re.MULTILINE)
+
+ def __init__(self, md: Markdown) -> None:
+ super().__init__(md)
+
+ def run(self, lines: list[str]) -> list[str]:
+ """Process the lines and insert blank lines before lists that follow paragraphs."""
+ if not lines:
+ return lines
+
+ result_lines: list[str] = []
+ i = 0
+
+ while i < len(lines):
+ current_line = lines[i]
+ result_lines.append(current_line)
+
+ # Check if we need to look ahead for a list
+ if i + 1 < len(lines):
+ next_line = lines[i + 1]
+
+ # If current line is not empty and next line starts a list
+ if (
+ current_line.strip() # Current line has content
+ and self.LIST_START_PATTERN.match(next_line)
+ ): # Next line starts a list
+ # Check if there's already a blank line
+ if current_line.strip():
+ # Insert blank line to enable list interruption
+ result_lines.append("")
+
+ i += 1
+
+ return result_lines
+
+
+class BreaklessListsTreeProcessor(treeprocessors.Treeprocessor): # type: ignore[misc]
+ """
+ Removes paragraph tags from list items to create compact lists.
+
+ This makes lists more compact by removing tags within
- elements.
+ """
+
+ def run(self, root: Element) -> None:
+ def is_only_child(parent: Element, child: Element) -> bool:
+ return len(parent) == 1 and parent[0] is child
+
+ for element in root.iter(tag="li"):
+ for p in element.findall(".//p"):
+ # If paragraph has no attributes and is the only child
+ if not p.attrib and is_only_child(element, p):
+ # Swap the paragraph with the list item
+ element.text = p.text
+ element.tail = p.tail
+ # Copy over the children
+ for child in p:
+ element.append(child)
+ # Remove the paragraph tag
+ element.remove(p)
+
+
+class BreaklessListsExtension(Extension): # type: ignore[misc]
+ """
+ Extension to enable CommonMark-style list interruption of paragraphs.
+
+ This allows lists to follow paragraphs without requiring blank lines,
+ matching CommonMark specification behavior. Also makes lists compact
+ by removing paragraph tags within list items.
+ """
+
+ def extendMarkdown(self, md: Markdown) -> None:
+ # Register preprocessor to enable list interruption
+ md.preprocessors.register(
+ BreaklessListsPreprocessor(md),
+ "breakless_lists_preproc",
+ # Run early in preprocessing, before other processors
+ 30,
+ )
+
+ # Register tree processor to make lists compact
+ md.treeprocessors.register(
+ BreaklessListsTreeProcessor(md),
+ "breakless_lists_tree",
+ # Run after lists are parsed but before paragraph cleanup
+ 10,
+ )
diff --git a/marimo/_output/md_extensions/flexible_indent.py b/marimo/_output/md_extensions/flexible_indent.py
new file mode 100644
index 00000000000..efc674f3824
--- /dev/null
+++ b/marimo/_output/md_extensions/flexible_indent.py
@@ -0,0 +1,135 @@
+# Copyright 2024 Marimo. All rights reserved.
+
+import re
+
+from markdown import Extension, Markdown, preprocessors # type: ignore
+
+
+class FlexibleIndentPreprocessor(preprocessors.Preprocessor): # type: ignore[misc]
+ """
+ Preprocessor to standardize list indentation to specific levels.
+ Normalizes inconsistent indentation to match the allowed levels.
+ """
+
+ # Pattern to match lines that start list items (ordered or unordered)
+ # Captures: (indentation, list_marker, trailing_space, content)
+ LIST_PATTERN = re.compile(r"^(\s*)([*+-]|\d+\.)(\s+)(.*)$", re.MULTILINE)
+ INDENT_LEVELS = [2, 4]
+ BASE_INDENT_SIZE = 4
+ FOUR_SPACES = " "
+
+ def __init__(self, md: Markdown) -> None:
+ super().__init__(md)
+
+ def _detect_base_indent(self, lines: list[str]) -> int:
+ """
+ Detect the base indentation level used in the document.
+
+ Returns 2 for 2-space indentation or 4 for 4-space indentation.
+ """
+ indents: list[int] = []
+ for line in lines:
+ match = self.LIST_PATTERN.match(line)
+ if match:
+ indent_str = match.group(1)
+ if indent_str: # Skip non-indented items
+ indent_count = len(
+ indent_str.replace("\t", self.FOUR_SPACES)
+ )
+ indents.append(indent_count)
+
+ if not indents:
+ return self.BASE_INDENT_SIZE
+
+ # Find the smallest non-zero indent - this is likely our base level
+ min_indent = min(indents)
+
+ # Choose the closest allowed indent level
+ if min_indent <= 2:
+ return 2
+ else:
+ return self.BASE_INDENT_SIZE
+
+ def _normalize_indentation(self, indent_str: str, base_level: int) -> str:
+ """
+ Normalize indentation to consistent 2-space increments.
+
+ This ensures that both 2-space and 4-space indentation patterns
+ result in the same normalized output.
+
+ Args:
+ indent_str: The original indentation string
+ base_level: The detected base indentation level (2 or 4)
+
+ Returns:
+ Normalized indentation string using 2-space increments
+ """
+ # Convert tabs to spaces (assuming 1 tab = 4 spaces)
+ normalized = indent_str.replace("\t", self.FOUR_SPACES)
+ indent_count = len(normalized)
+
+ if indent_count == 0:
+ return ""
+
+ # Calculate the intended nesting level based on the base level
+ nesting_level = max(1, round(indent_count / base_level))
+
+ # Always output using 4-space increments since that is what the markdown spec requires
+ return " " * (4 * nesting_level)
+
+ def _get_list_depth(self, indent_str: str, base_level: int = 2) -> int:
+ """Calculate the nesting depth of a list item."""
+ normalized = indent_str.replace("\t", self.FOUR_SPACES)
+ indent_count = len(normalized)
+
+ if indent_count == 0:
+ return 0
+
+ # Calculate depth based on the base level
+ return max(1, round(indent_count / base_level))
+
+ def run(self, lines: list[str]) -> list[str]:
+ """Process the lines and normalize list indentation."""
+ if not lines:
+ return lines
+
+ # Detect the base indentation level used in this document
+ base_level = self._detect_base_indent(lines)
+
+ result_lines: list[str] = []
+
+ for line in lines:
+ match = self.LIST_PATTERN.match(line)
+ if match:
+ indent, marker, space, content = match.groups()
+
+ # Normalize the indentation based on detected base level
+ normalized_indent = self._normalize_indentation(
+ indent, base_level
+ )
+
+ # Reconstruct the line with normalized indentation
+ normalized_line = (
+ f"{normalized_indent}{marker}{space}{content}"
+ )
+ result_lines.append(normalized_line)
+ else:
+ result_lines.append(line)
+
+ return result_lines
+
+
+class FlexibleIndentExtension(Extension): # type: ignore[misc]
+ """
+ Extension to provide flexible list indentation support.
+ """
+
+ def extendMarkdown(self, md: Markdown) -> None:
+ """Add the preprocessor to the markdown instance."""
+ # Register preprocessor to normalize indentation
+ md.preprocessors.register(
+ FlexibleIndentPreprocessor(md),
+ "flexible_indent",
+ # Run early, before breakless_lists and other list processing
+ 35,
+ )
diff --git a/marimo/_smoke_tests/markdown/sane_lists.py b/marimo/_smoke_tests/markdown/sane_lists.py
index a6c72071cf9..0147fbcaca9 100644
--- a/marimo/_smoke_tests/markdown/sane_lists.py
+++ b/marimo/_smoke_tests/markdown/sane_lists.py
@@ -1,17 +1,23 @@
import marimo
-__generated_with = "0.15.5"
-app = marimo.App(width="medium")
+__generated_with = "0.16.2"
+app = marimo.App(width="columns")
+
+
+@app.cell(column=0, hide_code=True)
+def _(mo):
+ mo.md(r"""## Random numbering""")
+ return
@app.cell
def _(mo):
mo.md(
"""
- 2. hey
- 2. hey
- 2. hey
- """
+ 2. hey
+ 2. hey
+ 2. hey
+ """
)
return
@@ -20,10 +26,74 @@ def _(mo):
def _(mo):
mo.md(
"""
- 1. hey
- 1. hey
- 1. hey
- """
+ 1. hey
+ 1. hey
+ 1. hey
+ """
+ )
+ return
+
+
+@app.cell
+def _(mo):
+ mo.md(
+ r"""
+ 1. hey
+ 2. hey
+ 2. hey
+ """
+ )
+ return
+
+
+@app.cell(column=1, hide_code=True)
+def _(mo):
+ mo.md(r"""## List without breaks""")
+ return
+
+
+@app.cell
+def _(mo):
+ mo.md(
+ r"""
+ Lists with new line break (GitHub supports this)
+ - hey
+ - hey
+ - hey
+ """
+ )
+ return
+
+
+@app.cell
+def _(mo):
+ mo.md(
+ r"""
+ Lists with new line break (GitHub supports this)
+ 1. one
+ 2. two
+ - two two
+ """
+ )
+ return
+
+
+@app.cell(column=2, hide_code=True)
+def _(mo):
+ mo.md(r"""## List with 2 or 4 indentation""")
+ return
+
+
+@app.cell
+def _(mo):
+ mo.md(
+ r"""
+ Lists with 2-space indents (GitHub supports this)
+
+ - hey
+ - hey
+ - hey
+ """
)
return
@@ -32,10 +102,37 @@ def _(mo):
def _(mo):
mo.md(
r"""
+ Lists with 4-space indents (GitHub supports this)
+
+ 1. hey
1. hey
2. hey
- 2. hey
- """
+ """
+ )
+ return
+
+
+@app.cell
+def _(mo):
+ mo.md(
+ r"""
+ 1. Item 1
+ 1. Nested ordered item
+ 1. Deep nested ordered
+ 2. Item 2
+ """
+ )
+ return
+
+
+@app.cell
+def _(mo):
+ mo.md(
+ r"""
+ - List
+ - indent with 3 spaces
+ - indent with 6 spaces
+ """
)
return
diff --git a/pyproject.toml b/pyproject.toml
index 84f8cb86d3e..638a9907c2e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -221,6 +221,7 @@ extra-dependencies = [
"pytest-codecov~=0.6.1",
"pytest-rerunfailures~=15.1",
"pytest-asyncio~=0.26.0",
+ "inline-snapshot~=0.29.0",
]
[[tool.hatch.envs.test.matrix]]
@@ -251,6 +252,7 @@ extra-dependencies = [
"pytest-codecov~=0.6.1",
"pytest-rerunfailures~=15.1",
"pytest-asyncio~=0.26.0",
+ "inline-snapshot~=0.29.0",
"python-lsp-server>=1.10.0",
# For testing mo.ui.chart, table, ...
"vl-convert-python",
diff --git a/tests/_output/test_md.py b/tests/_output/test_md.py
index a667271f3e3..9b38a3470e2 100644
--- a/tests/_output/test_md.py
+++ b/tests/_output/test_md.py
@@ -3,6 +3,9 @@
from typing import TYPE_CHECKING
from unittest.mock import MagicMock, patch
+import pytest
+from inline_snapshot import snapshot
+
from marimo._output.md import _md, latex
if TYPE_CHECKING:
@@ -81,7 +84,7 @@ def test_md_footnotes() -> None:
footnote_input = (
"Here is a footnote reference[^1].\n\n[^1]: Here is the footnote."
)
- expected_output = 'Here is a footnote reference.\n' # noqa: E501
+ expected_output = 'Here is a footnote reference.\n' # noqa: E501
assert _md(footnote_input, apply_markdown_class=False).text == (
expected_output
)
@@ -115,6 +118,185 @@ def test_md_sane_lists() -> None:
assert _md(input_text, apply_markdown_class=False).text == expected_output
+def test_md_breakless_lists() -> None:
+ # Test that breakless lists removes
tags from within
- elements
+ # for more compact list formatting
+ input_text = """- Item 1
+
+- Item 2
+
+- Item 3"""
+
+ result = _md(input_text, apply_markdown_class=False).text
+
+ assert result == snapshot(
+ """\
+
+- Item 1
+- Item 2
+- Item 3
+
\
+"""
+ )
+
+ # Ensure no tags are present in list items
+ assert "
" not in result
+ assert "
" not in result
+
+
+@pytest.mark.parametrize("spaces", [2, 4])
+def test_md_flexible_indent_spaces(spaces: int) -> None:
+ indent = " " * spaces
+ input_text = f"""- Item 1
+{indent}- Nested item
+{indent}{indent}- Deep nested
+- Item 2"""
+
+ result = _md(input_text, apply_markdown_class=False).text
+
+ # Should create properly nested lists with 2-space or 4-space indentation
+ assert result == snapshot(
+ """\
+\
+"""
+ )
+
+
+def test_md_flexible_indent_mixed_normalization() -> None:
+ # Test that inconsistent indentation gets normalized
+ input_text = """- Item 1
+ - Nested item (3 spaces, should normalize to 2 or 4)
+ - Deep nested (6 spaces)
+- Item 2"""
+
+ result = _md(input_text, apply_markdown_class=False).text
+
+ # Should normalize inconsistent spacing and still create valid lists
+ assert result == snapshot(
+ """\
+
+-
+Item 1
+
+-
+Nested item (3 spaces, should normalize to 2 or 4)
+
+- Deep nested (6 spaces)
+
+
+
+
+- Item 2
+
\
+"""
+ )
+
+
+@pytest.mark.parametrize("spaces", [2, 4])
+def test_md_flexible_indent_ordered_lists(spaces: int) -> None:
+ indent = " " * spaces
+ input_text = f"""1. Item 1
+{indent}1. Nested ordered item
+{indent}{indent}1. Deep nested ordered
+2. Item 2"""
+
+ result = _md(input_text, apply_markdown_class=False).text
+
+ # Should create properly nested ordered lists
+ assert result == snapshot(
+ """\
+
+-
+Item 1
+
+-
+Nested ordered item
+
+- Deep nested ordered
+
+
+
+
+- Item 2
+
\
+"""
+ )
+
+
+def test_md_breakless_lists_with_paragraphs() -> None:
+ # Test CommonMark-style list interruption
+ input_text_no_blank = """This is GitHub-flavored markdown:
+- Item 1
+- Item 2
+- Item 3"""
+ result = _md(input_text_no_blank, apply_markdown_class=False).text
+ assert result == snapshot(
+ """\
+This is GitHub-flavored markdown:
+
+- Item 1
+- Item 2
+- Item 3
+
\
+"""
+ )
+
+ # Test 2: With blank line (should work the same)
+ input_text_with_blank = """This is GitHub-flavored markdown:
+
+- Item 1
+- Item 2
+- Item 3"""
+ next_result = _md(input_text_with_blank, apply_markdown_class=False).text
+ # Same as previous result
+ assert result == next_result
+
+
+def test_md_commonmark_examples() -> None:
+ # Test the exact CommonMark spec example (Example 283)
+ commonmark_input = """Foo
+- bar
+- baz"""
+ result = _md(commonmark_input, apply_markdown_class=False).text
+ assert result == snapshot(
+ """\
+Foo
+\
+"""
+ )
+
+ # Test with ordered lists too
+ ordered_input = """Text here
+1. First item
+2. Second item"""
+ result = _md(ordered_input, apply_markdown_class=False).text
+ assert result == snapshot(
+ """\
+Text here
+
+- First item
+- Second item
+
\
+"""
+ )
+
+
def test_md_pycon_detection() -> None:
# Test basic pycon detection with >>> prompts
pycon_input = """```