diff --git a/marimo/_output/md.py b/marimo/_output/md.py index 62b6d6266ba..64f250f9df1 100644 --- a/marimo/_output/md.py +++ b/marimo/_output/md.py @@ -14,7 +14,13 @@ import pymdownx.emoji # type: ignore from marimo._output.hypertext import Html +from marimo._output.md_extensions.breakless_lists import ( + BreaklessListsExtension, +) from marimo._output.md_extensions.external_links import ExternalLinksExtension +from marimo._output.md_extensions.flexible_indent import ( + FlexibleIndentExtension, +) from marimo._output.md_extensions.iconify import IconifyExtension from marimo._output.rich_help import mddoc from marimo._utils.url import is_url @@ -196,6 +202,10 @@ def _get_extensions() -> list[Union[str, markdown.Extension]]: "footnotes", # Sane lists, to include
    "sane_lists", + # Flexible indentation - supports 2 or 4 space indentation + FlexibleIndentExtension(), + # Breakless lists - more compact list formatting + BreaklessListsExtension(), # Links ExternalLinksExtension(), # Iconify diff --git a/marimo/_output/md_extensions/__init__.py b/marimo/_output/md_extensions/__init__.py deleted file mode 100644 index be52407b506..00000000000 --- a/marimo/_output/md_extensions/__init__.py +++ /dev/null @@ -1 +0,0 @@ -# Copyright 2024 Marimo. All rights reserved. diff --git a/marimo/_output/md_extensions/breakless_lists.py b/marimo/_output/md_extensions/breakless_lists.py new file mode 100644 index 00000000000..78a981506ed --- /dev/null +++ b/marimo/_output/md_extensions/breakless_lists.py @@ -0,0 +1,109 @@ +# Copyright 2024 Marimo. All rights reserved. + +import re +from xml.etree.ElementTree import Element + +from markdown import ( # type: ignore + Extension, + Markdown, + preprocessors, + treeprocessors, +) + + +class BreaklessListsPreprocessor(preprocessors.Preprocessor): # type: ignore[misc] + """ + Enables CommonMark-style list interruption of paragraphs. + + In CommonMark, lists can interrupt paragraphs without requiring a blank line. + Python-Markdown requires blank lines, so this preprocessor adds them automatically + when it detects a list immediately following a paragraph. + """ + + # Pattern to match lines that start list items (ordered or unordered) + LIST_START_PATTERN = re.compile(r"^(\s*)([*+-]|\d+\.)(\s+)", re.MULTILINE) + + def __init__(self, md: Markdown) -> None: + super().__init__(md) + + def run(self, lines: list[str]) -> list[str]: + """Process the lines and insert blank lines before lists that follow paragraphs.""" + if not lines: + return lines + + result_lines: list[str] = [] + i = 0 + + while i < len(lines): + current_line = lines[i] + result_lines.append(current_line) + + # Check if we need to look ahead for a list + if i + 1 < len(lines): + next_line = lines[i + 1] + + # If current line is not empty and next line starts a list + if ( + current_line.strip() # Current line has content + and self.LIST_START_PATTERN.match(next_line) + ): # Next line starts a list + # Check if there's already a blank line + if current_line.strip(): + # Insert blank line to enable list interruption + result_lines.append("") + + i += 1 + + return result_lines + + +class BreaklessListsTreeProcessor(treeprocessors.Treeprocessor): # type: ignore[misc] + """ + Removes paragraph tags from list items to create compact lists. + + This makes lists more compact by removing

    tags within

  1. elements. + """ + + def run(self, root: Element) -> None: + def is_only_child(parent: Element, child: Element) -> bool: + return len(parent) == 1 and parent[0] is child + + for element in root.iter(tag="li"): + for p in element.findall(".//p"): + # If paragraph has no attributes and is the only child + if not p.attrib and is_only_child(element, p): + # Swap the paragraph with the list item + element.text = p.text + element.tail = p.tail + # Copy over the children + for child in p: + element.append(child) + # Remove the paragraph tag + element.remove(p) + + +class BreaklessListsExtension(Extension): # type: ignore[misc] + """ + Extension to enable CommonMark-style list interruption of paragraphs. + + This allows lists to follow paragraphs without requiring blank lines, + matching CommonMark specification behavior. Also makes lists compact + by removing paragraph tags within list items. + """ + + def extendMarkdown(self, md: Markdown) -> None: + # Register preprocessor to enable list interruption + md.preprocessors.register( + BreaklessListsPreprocessor(md), + "breakless_lists_preproc", + # Run early in preprocessing, before other processors + 30, + ) + + # Register tree processor to make lists compact + md.treeprocessors.register( + BreaklessListsTreeProcessor(md), + "breakless_lists_tree", + # Run after lists are parsed but before paragraph cleanup + 10, + ) diff --git a/marimo/_output/md_extensions/flexible_indent.py b/marimo/_output/md_extensions/flexible_indent.py new file mode 100644 index 00000000000..efc674f3824 --- /dev/null +++ b/marimo/_output/md_extensions/flexible_indent.py @@ -0,0 +1,135 @@ +# Copyright 2024 Marimo. All rights reserved. + +import re + +from markdown import Extension, Markdown, preprocessors # type: ignore + + +class FlexibleIndentPreprocessor(preprocessors.Preprocessor): # type: ignore[misc] + """ + Preprocessor to standardize list indentation to specific levels. + Normalizes inconsistent indentation to match the allowed levels. + """ + + # Pattern to match lines that start list items (ordered or unordered) + # Captures: (indentation, list_marker, trailing_space, content) + LIST_PATTERN = re.compile(r"^(\s*)([*+-]|\d+\.)(\s+)(.*)$", re.MULTILINE) + INDENT_LEVELS = [2, 4] + BASE_INDENT_SIZE = 4 + FOUR_SPACES = " " + + def __init__(self, md: Markdown) -> None: + super().__init__(md) + + def _detect_base_indent(self, lines: list[str]) -> int: + """ + Detect the base indentation level used in the document. + + Returns 2 for 2-space indentation or 4 for 4-space indentation. + """ + indents: list[int] = [] + for line in lines: + match = self.LIST_PATTERN.match(line) + if match: + indent_str = match.group(1) + if indent_str: # Skip non-indented items + indent_count = len( + indent_str.replace("\t", self.FOUR_SPACES) + ) + indents.append(indent_count) + + if not indents: + return self.BASE_INDENT_SIZE + + # Find the smallest non-zero indent - this is likely our base level + min_indent = min(indents) + + # Choose the closest allowed indent level + if min_indent <= 2: + return 2 + else: + return self.BASE_INDENT_SIZE + + def _normalize_indentation(self, indent_str: str, base_level: int) -> str: + """ + Normalize indentation to consistent 2-space increments. + + This ensures that both 2-space and 4-space indentation patterns + result in the same normalized output. + + Args: + indent_str: The original indentation string + base_level: The detected base indentation level (2 or 4) + + Returns: + Normalized indentation string using 2-space increments + """ + # Convert tabs to spaces (assuming 1 tab = 4 spaces) + normalized = indent_str.replace("\t", self.FOUR_SPACES) + indent_count = len(normalized) + + if indent_count == 0: + return "" + + # Calculate the intended nesting level based on the base level + nesting_level = max(1, round(indent_count / base_level)) + + # Always output using 4-space increments since that is what the markdown spec requires + return " " * (4 * nesting_level) + + def _get_list_depth(self, indent_str: str, base_level: int = 2) -> int: + """Calculate the nesting depth of a list item.""" + normalized = indent_str.replace("\t", self.FOUR_SPACES) + indent_count = len(normalized) + + if indent_count == 0: + return 0 + + # Calculate depth based on the base level + return max(1, round(indent_count / base_level)) + + def run(self, lines: list[str]) -> list[str]: + """Process the lines and normalize list indentation.""" + if not lines: + return lines + + # Detect the base indentation level used in this document + base_level = self._detect_base_indent(lines) + + result_lines: list[str] = [] + + for line in lines: + match = self.LIST_PATTERN.match(line) + if match: + indent, marker, space, content = match.groups() + + # Normalize the indentation based on detected base level + normalized_indent = self._normalize_indentation( + indent, base_level + ) + + # Reconstruct the line with normalized indentation + normalized_line = ( + f"{normalized_indent}{marker}{space}{content}" + ) + result_lines.append(normalized_line) + else: + result_lines.append(line) + + return result_lines + + +class FlexibleIndentExtension(Extension): # type: ignore[misc] + """ + Extension to provide flexible list indentation support. + """ + + def extendMarkdown(self, md: Markdown) -> None: + """Add the preprocessor to the markdown instance.""" + # Register preprocessor to normalize indentation + md.preprocessors.register( + FlexibleIndentPreprocessor(md), + "flexible_indent", + # Run early, before breakless_lists and other list processing + 35, + ) diff --git a/marimo/_smoke_tests/markdown/sane_lists.py b/marimo/_smoke_tests/markdown/sane_lists.py index a6c72071cf9..0147fbcaca9 100644 --- a/marimo/_smoke_tests/markdown/sane_lists.py +++ b/marimo/_smoke_tests/markdown/sane_lists.py @@ -1,17 +1,23 @@ import marimo -__generated_with = "0.15.5" -app = marimo.App(width="medium") +__generated_with = "0.16.2" +app = marimo.App(width="columns") + + +@app.cell(column=0, hide_code=True) +def _(mo): + mo.md(r"""## Random numbering""") + return @app.cell def _(mo): mo.md( """ - 2. hey - 2. hey - 2. hey - """ + 2. hey + 2. hey + 2. hey + """ ) return @@ -20,10 +26,74 @@ def _(mo): def _(mo): mo.md( """ - 1. hey - 1. hey - 1. hey - """ + 1. hey + 1. hey + 1. hey + """ + ) + return + + +@app.cell +def _(mo): + mo.md( + r""" + 1. hey + 2. hey + 2. hey + """ + ) + return + + +@app.cell(column=1, hide_code=True) +def _(mo): + mo.md(r"""## List without breaks""") + return + + +@app.cell +def _(mo): + mo.md( + r""" + Lists with new line break (GitHub supports this) + - hey + - hey + - hey + """ + ) + return + + +@app.cell +def _(mo): + mo.md( + r""" + Lists with new line break (GitHub supports this) + 1. one + 2. two + - two two + """ + ) + return + + +@app.cell(column=2, hide_code=True) +def _(mo): + mo.md(r"""## List with 2 or 4 indentation""") + return + + +@app.cell +def _(mo): + mo.md( + r""" + Lists with 2-space indents (GitHub supports this) + + - hey + - hey + - hey + """ ) return @@ -32,10 +102,37 @@ def _(mo): def _(mo): mo.md( r""" + Lists with 4-space indents (GitHub supports this) + + 1. hey 1. hey 2. hey - 2. hey - """ + """ + ) + return + + +@app.cell +def _(mo): + mo.md( + r""" + 1. Item 1 + 1. Nested ordered item + 1. Deep nested ordered + 2. Item 2 + """ + ) + return + + +@app.cell +def _(mo): + mo.md( + r""" + - List + - indent with 3 spaces + - indent with 6 spaces + """ ) return diff --git a/pyproject.toml b/pyproject.toml index 84f8cb86d3e..638a9907c2e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -221,6 +221,7 @@ extra-dependencies = [ "pytest-codecov~=0.6.1", "pytest-rerunfailures~=15.1", "pytest-asyncio~=0.26.0", + "inline-snapshot~=0.29.0", ] [[tool.hatch.envs.test.matrix]] @@ -251,6 +252,7 @@ extra-dependencies = [ "pytest-codecov~=0.6.1", "pytest-rerunfailures~=15.1", "pytest-asyncio~=0.26.0", + "inline-snapshot~=0.29.0", "python-lsp-server>=1.10.0", # For testing mo.ui.chart, table, ... "vl-convert-python", diff --git a/tests/_output/test_md.py b/tests/_output/test_md.py index a667271f3e3..9b38a3470e2 100644 --- a/tests/_output/test_md.py +++ b/tests/_output/test_md.py @@ -3,6 +3,9 @@ from typing import TYPE_CHECKING from unittest.mock import MagicMock, patch +import pytest +from inline_snapshot import snapshot + from marimo._output.md import _md, latex if TYPE_CHECKING: @@ -81,7 +84,7 @@ def test_md_footnotes() -> None: footnote_input = ( "Here is a footnote reference[^1].\n\n[^1]: Here is the footnote." ) - expected_output = 'Here is a footnote reference1.\n
    \n
    \n
      \n
    1. \nHere is the footnote. \n
    2. \n
    \n
    ' # noqa: E501 + expected_output = 'Here is a footnote reference1.\n
    \n
    \n
      \n
    1. Here is the footnote. 
    2. \n
    \n
    ' # noqa: E501 assert _md(footnote_input, apply_markdown_class=False).text == ( expected_output ) @@ -115,6 +118,185 @@ def test_md_sane_lists() -> None: assert _md(input_text, apply_markdown_class=False).text == expected_output +def test_md_breakless_lists() -> None: + # Test that breakless lists removes

    tags from within

  2. elements + # for more compact list formatting + input_text = """- Item 1 + +- Item 2 + +- Item 3""" + + result = _md(input_text, apply_markdown_class=False).text + + assert result == snapshot( + """\ +\ +""" + ) + + # Ensure no

    tags are present in list items + assert "

  3. " not in result + assert "

  4. " not in result + + +@pytest.mark.parametrize("spaces", [2, 4]) +def test_md_flexible_indent_spaces(spaces: int) -> None: + indent = " " * spaces + input_text = f"""- Item 1 +{indent}- Nested item +{indent}{indent}- Deep nested +- Item 2""" + + result = _md(input_text, apply_markdown_class=False).text + + # Should create properly nested lists with 2-space or 4-space indentation + assert result == snapshot( + """\ +\ +""" + ) + + +def test_md_flexible_indent_mixed_normalization() -> None: + # Test that inconsistent indentation gets normalized + input_text = """- Item 1 + - Nested item (3 spaces, should normalize to 2 or 4) + - Deep nested (6 spaces) +- Item 2""" + + result = _md(input_text, apply_markdown_class=False).text + + # Should normalize inconsistent spacing and still create valid lists + assert result == snapshot( + """\ +\ +""" + ) + + +@pytest.mark.parametrize("spaces", [2, 4]) +def test_md_flexible_indent_ordered_lists(spaces: int) -> None: + indent = " " * spaces + input_text = f"""1. Item 1 +{indent}1. Nested ordered item +{indent}{indent}1. Deep nested ordered +2. Item 2""" + + result = _md(input_text, apply_markdown_class=False).text + + # Should create properly nested ordered lists + assert result == snapshot( + """\ +
      +
    1. +Item 1 +
        +
      1. +Nested ordered item +
          +
        1. Deep nested ordered
        2. +
        +
      2. +
      +
    2. +
    3. Item 2
    4. +
    \ +""" + ) + + +def test_md_breakless_lists_with_paragraphs() -> None: + # Test CommonMark-style list interruption + input_text_no_blank = """This is GitHub-flavored markdown: +- Item 1 +- Item 2 +- Item 3""" + result = _md(input_text_no_blank, apply_markdown_class=False).text + assert result == snapshot( + """\ +This is GitHub-flavored markdown: +\ +""" + ) + + # Test 2: With blank line (should work the same) + input_text_with_blank = """This is GitHub-flavored markdown: + +- Item 1 +- Item 2 +- Item 3""" + next_result = _md(input_text_with_blank, apply_markdown_class=False).text + # Same as previous result + assert result == next_result + + +def test_md_commonmark_examples() -> None: + # Test the exact CommonMark spec example (Example 283) + commonmark_input = """Foo +- bar +- baz""" + result = _md(commonmark_input, apply_markdown_class=False).text + assert result == snapshot( + """\ +Foo +\ +""" + ) + + # Test with ordered lists too + ordered_input = """Text here +1. First item +2. Second item""" + result = _md(ordered_input, apply_markdown_class=False).text + assert result == snapshot( + """\ +Text here +
      +
    1. First item
    2. +
    3. Second item
    4. +
    \ +""" + ) + + def test_md_pycon_detection() -> None: # Test basic pycon detection with >>> prompts pycon_input = """```