marimo-team
diff --git a/‎marimo/_output/md.py‎
Lines changed: 10 additions & 0 deletions b/‎marimo/_output/md.py‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎marimo/_output/md_extensions/__init__.py‎
Lines changed: 0 additions & 1 deletion b/‎marimo/_output/md_extensions/__init__.py‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎marimo/_output/md_extensions/breakless_lists.py‎
Lines changed: 109 additions & 0 deletions b/‎marimo/_output/md_extensions/breakless_lists.py‎
Lines changed: 109 additions & 0 deletions
diff --git a/‎marimo/_output/md_extensions/flexible_indent.py‎
Lines changed: 135 additions & 0 deletions b/‎marimo/_output/md_extensions/flexible_indent.py‎
Lines changed: 135 additions & 0 deletions
@@ -14,7 +14,13 @@
 import pymdownx.emoji  # type: ignore
 
 from marimo._output.hypertext import Html
+from marimo._output.md_extensions.breakless_lists import (
+    BreaklessListsExtension,
+)
 from marimo._output.md_extensions.external_links import ExternalLinksExtension
+from marimo._output.md_extensions.flexible_indent import (
+    FlexibleIndentExtension,
+)
 from marimo._output.md_extensions.iconify import IconifyExtension
 from marimo._output.rich_help import mddoc
 from marimo._utils.url import is_url
@@ -196,6 +202,10 @@ def _get_extensions() -> list[Union[str, markdown.Extension]]:
         "footnotes",
         # Sane lists, to include <ol start="n">
         "sane_lists",
+        # Flexible indentation - supports 2 or 4 space indentation
+        FlexibleIndentExtension(),
+        # Breakless lists - more compact list formatting
+        BreaklessListsExtension(),
         # Links
         ExternalLinksExtension(),
         # Iconify
 
@@ -0,0 +1,109 @@
+# Copyright 2024 Marimo. All rights reserved.
+
+import re
+from xml.etree.ElementTree import Element
+
+from markdown import (  # type: ignore
+    Extension,
+    Markdown,
+    preprocessors,
+    treeprocessors,
+)
+
+
+class BreaklessListsPreprocessor(preprocessors.Preprocessor):  # type: ignore[misc]
+    """
+    Enables CommonMark-style list interruption of paragraphs.
+
+    In CommonMark, lists can interrupt paragraphs without requiring a blank line.
+    Python-Markdown requires blank lines, so this preprocessor adds them automatically
+    when it detects a list immediately following a paragraph.
+    """
+
+    # Pattern to match lines that start list items (ordered or unordered)
+    LIST_START_PATTERN = re.compile(r"^(\s*)([*+-]|\d+\.)(\s+)", re.MULTILINE)
+
+    def __init__(self, md: Markdown) -> None:
+        super().__init__(md)
+
+    def run(self, lines: list[str]) -> list[str]:
+        """Process the lines and insert blank lines before lists that follow paragraphs."""
+        if not lines:
+            return lines
+
+        result_lines: list[str] = []
+        i = 0
+
+        while i < len(lines):
+            current_line = lines[i]
+            result_lines.append(current_line)
+
+            # Check if we need to look ahead for a list
+            if i + 1 < len(lines):
+                next_line = lines[i + 1]
+
+                # If current line is not empty and next line starts a list
+                if (
+                    current_line.strip()  # Current line has content
+                    and self.LIST_START_PATTERN.match(next_line)
+                ):  # Next line starts a list
+                    # Check if there's already a blank line
+                    if current_line.strip():
+                        # Insert blank line to enable list interruption
+                        result_lines.append("")
+
+            i += 1
+
+        return result_lines
+
+
+class BreaklessListsTreeProcessor(treeprocessors.Treeprocessor):  # type: ignore[misc]
+    """
+    Removes paragraph tags from list items to create compact lists.
+
+    This makes lists more compact by removing <p> tags within <li> elements.
+    """
+
+    def run(self, root: Element) -> None:
+        def is_only_child(parent: Element, child: Element) -> bool:
+            return len(parent) == 1 and parent[0] is child
+
+        for element in root.iter(tag="li"):
+            for p in element.findall(".//p"):
+                # If paragraph has no attributes and is the only child
+                if not p.attrib and is_only_child(element, p):
+                    # Swap the paragraph with the list item
+                    element.text = p.text
+                    element.tail = p.tail
+                    # Copy over the children
+                    for child in p:
+                        element.append(child)
+                    # Remove the paragraph tag
+                    element.remove(p)
+
+
+class BreaklessListsExtension(Extension):  # type: ignore[misc]
+    """
+    Extension to enable CommonMark-style list interruption of paragraphs.
+
+    This allows lists to follow paragraphs without requiring blank lines,
+    matching CommonMark specification behavior. Also makes lists compact
+    by removing paragraph tags within list items.
+    """
+
+    def extendMarkdown(self, md: Markdown) -> None:
+        # Register preprocessor to enable list interruption
+        md.preprocessors.register(
+            BreaklessListsPreprocessor(md),
+            "breakless_lists_preproc",
+            # Run early in preprocessing, before other processors
+            30,
+        )
+
+        # Register tree processor to make lists compact
+        md.treeprocessors.register(
+            BreaklessListsTreeProcessor(md),
+            "breakless_lists_tree",
+            # Run after lists are parsed but before paragraph cleanup
+            10,
+        )
@@ -0,0 +1,135 @@
+# Copyright 2024 Marimo. All rights reserved.
+
+import re
+
+from markdown import Extension, Markdown, preprocessors  # type: ignore
+
+
+class FlexibleIndentPreprocessor(preprocessors.Preprocessor):  # type: ignore[misc]
+    """
+    Preprocessor to standardize list indentation to specific levels.
+    Normalizes inconsistent indentation to match the allowed levels.
+    """
+
+    # Pattern to match lines that start list items (ordered or unordered)
+    # Captures: (indentation, list_marker, trailing_space, content)
+    LIST_PATTERN = re.compile(r"^(\s*)([*+-]|\d+\.)(\s+)(.*)$", re.MULTILINE)
+    INDENT_LEVELS = [2, 4]
+    BASE_INDENT_SIZE = 4
+    FOUR_SPACES = "    "
+
+    def __init__(self, md: Markdown) -> None:
+        super().__init__(md)
+
+    def _detect_base_indent(self, lines: list[str]) -> int:
+        """
+        Detect the base indentation level used in the document.
+
+        Returns 2 for 2-space indentation or 4 for 4-space indentation.
+        """
+        indents: list[int] = []
+        for line in lines:
+            match = self.LIST_PATTERN.match(line)
+            if match:
+                indent_str = match.group(1)
+                if indent_str:  # Skip non-indented items
+                    indent_count = len(
+                        indent_str.replace("\t", self.FOUR_SPACES)
+                    )
+                    indents.append(indent_count)
+
+        if not indents:
+            return self.BASE_INDENT_SIZE
+
+        # Find the smallest non-zero indent - this is likely our base level
+        min_indent = min(indents)
+
+        # Choose the closest allowed indent level
+        if min_indent <= 2:
+            return 2
+        else:
+            return self.BASE_INDENT_SIZE
+
+    def _normalize_indentation(self, indent_str: str, base_level: int) -> str:
+        """
+        Normalize indentation to consistent 2-space increments.
+
+        This ensures that both 2-space and 4-space indentation patterns
+        result in the same normalized output.
+
+        Args:
+            indent_str: The original indentation string
+            base_level: The detected base indentation level (2 or 4)
+
+        Returns:
+            Normalized indentation string using 2-space increments
+        """
+        # Convert tabs to spaces (assuming 1 tab = 4 spaces)
+        normalized = indent_str.replace("\t", self.FOUR_SPACES)
+        indent_count = len(normalized)
+
+        if indent_count == 0:
+            return ""
+
+        # Calculate the intended nesting level based on the base level
+        nesting_level = max(1, round(indent_count / base_level))
+
+        # Always output using 4-space increments since that is what the markdown spec requires
+        return " " * (4 * nesting_level)
+
+    def _get_list_depth(self, indent_str: str, base_level: int = 2) -> int:
+        """Calculate the nesting depth of a list item."""
+        normalized = indent_str.replace("\t", self.FOUR_SPACES)
+        indent_count = len(normalized)
+
+        if indent_count == 0:
+            return 0
+
+        # Calculate depth based on the base level
+        return max(1, round(indent_count / base_level))
+
+    def run(self, lines: list[str]) -> list[str]:
+        """Process the lines and normalize list indentation."""
+        if not lines:
+            return lines
+
+        # Detect the base indentation level used in this document
+        base_level = self._detect_base_indent(lines)
+
+        result_lines: list[str] = []
+
+        for line in lines:
+            match = self.LIST_PATTERN.match(line)
+            if match:
+                indent, marker, space, content = match.groups()
+
+                # Normalize the indentation based on detected base level
+                normalized_indent = self._normalize_indentation(
+                    indent, base_level
+                )
+
+                # Reconstruct the line with normalized indentation
+                normalized_line = (
+                    f"{normalized_indent}{marker}{space}{content}"
+                )
+                result_lines.append(normalized_line)
+            else:
+                result_lines.append(line)
+
+        return result_lines
+
+
+class FlexibleIndentExtension(Extension):  # type: ignore[misc]
+    """
+    Extension to provide flexible list indentation support.
+    """
+
+    def extendMarkdown(self, md: Markdown) -> None:
+        """Add the preprocessor to the markdown instance."""
+        # Register preprocessor to normalize indentation
+        md.preprocessors.register(
+            FlexibleIndentPreprocessor(md),
+            "flexible_indent",
+            # Run early, before breakless_lists and other list processing
+            35,
+        )