diff --git a/marimo/_ast/cell.py b/marimo/_ast/cell.py index b8b53ae72d0..c7c3cdd24c0 100644 --- a/marimo/_ast/cell.py +++ b/marimo/_ast/cell.py @@ -173,6 +173,9 @@ class CellImpl: # unique id cell_id: CellId_t + # Markdown content of the cell if it exists + markdown: Optional[str] = None + # Mutable fields # explicit configuration of cell config: CellConfig = dataclasses.field(default_factory=CellConfig) diff --git a/marimo/_ast/compiler.py b/marimo/_ast/compiler.py index febfe9171d5..cb18137d915 100644 --- a/marimo/_ast/compiler.py +++ b/marimo/_ast/compiler.py @@ -152,6 +152,67 @@ def fix_source_position(node: Any, source_position: SourcePosition) -> Any: return node +def const_string(args: list[ast.stmt]) -> str: + (inner,) = args + if hasattr(inner, "values"): + (inner,) = inner.values + return f"{inner.value}" # type: ignore[attr-defined] + + +def const_or_id(args: ast.stmt) -> str: + if hasattr(args, "value"): + return f"{args.value}" # type: ignore[attr-defined] + return f"{args.id}" # type: ignore[attr-defined] + + +def _extract_markdown(tree: ast.Module) -> Optional[str]: + # Attribute Error handled by the outer try/except block. + # Wish there was a more compact to ignore ignore[attr-defined] for all. + try: + (body,) = tree.body + if body.value.func.attr == "md": # type: ignore[attr-defined, union-attr] + value = body.value # type: ignore[attr-defined, union-attr] + else: + return None + assert value.func.value.id == "mo" + if not value.args: # Handle mo.md() with no arguments + return None + md_lines = const_string(value.args).split("\n") + except (AssertionError, AttributeError, ValueError): + # No reason to explicitly catch exceptions if we can't parse out + # markdown. Just handle it as a code block. + return None + + # Dedent behavior is a little different that in marimo js, so handle + # accordingly. + md_lines = [line.rstrip() for line in md_lines] + md = ( + textwrap.dedent(md_lines[0]) + + "\n" + + textwrap.dedent("\n".join(md_lines[1:])) + ) + md = md.strip() + return md + + +def extract_markdown(code: str) -> Optional[str]: + code = code.strip() + count = 0 + # Early quitting for markdown extraction. + for line in code.strip().split("\n"): + if line.startswith("mo.md("): + count += 1 + if count > 1: + return None + if count == 0: + return None + + try: + return _extract_markdown(ast.parse(code)) + except SyntaxError: + return None + + def compile_cell( code: str, cell_id: CellId_t, @@ -291,6 +352,8 @@ def compile_cell( if previous_import_data == import_data: imported_defs.add(import_data.definition) + maybe_md = _extract_markdown(original_module) + return CellImpl( # keyed by original (user) code, for cache lookups key=code_key(code), @@ -310,6 +373,7 @@ def compile_cell( body=body, last_expr=last_expr, cell_id=cell_id, + markdown=maybe_md, _test=is_test, ) diff --git a/marimo/_runtime/runtime.py b/marimo/_runtime/runtime.py index 94de6431328..e08beda52dd 100644 --- a/marimo/_runtime/runtime.py +++ b/marimo/_runtime/runtime.py @@ -2009,7 +2009,15 @@ async def instantiate(self, request: CreationRequest) -> None: if self.graph.cells: del request LOGGER.debug("App already instantiated.") - elif request.auto_run: + return + + # Handle markdown cells specially during kernel-ready initialization + execution_requests = { + er.cell_id: er for er in request.execution_requests + } + self._handle_markdown_cells_on_instantiate(execution_requests) + + if request.auto_run: self.reset_ui_initializers() for ( object_id, @@ -2017,14 +2025,79 @@ async def instantiate(self, request: CreationRequest) -> None: ) in request.set_ui_element_value_request.ids_and_values: self.ui_initializers[object_id] = initial_value - await self.run(request.execution_requests) + await self.run(list(execution_requests.values())) self.reset_ui_initializers() else: - self._uninstantiated_execution_requests = { - er.cell_id: er for er in request.execution_requests - } - for cid in self._uninstantiated_execution_requests: - CellOp.broadcast_stale(cell_id=cid, stale=True) + self._uninstantiated_execution_requests = execution_requests + for cell_id in self._uninstantiated_execution_requests.keys(): + CellOp.broadcast_stale(cell_id=cell_id, stale=True) + + def _handle_markdown_cells_on_instantiate( + self, execution_requests: dict[CellId_t, ExecutionRequest] + ) -> None: + """Handle markdown cells during kernel-ready initialization. + + For cells that contain only markdown (mo.md calls), this method: + 1. Compiles the cells to extract markdown content + 2. Renders the markdown to HTML + 3. Broadcasts the rendered output immediately + 4. Marks the cells as completed (not stale) + 5. Removes them from uninstantiated requests + + NOTE: If 'mo' is not available in the graph definitions, all cells are + marked as stale. Regular cells are marked as stale as usual. + """ + # If 'mo' is not available in the graph, mark all cells as stale + markdown_cells: dict[CellId_t, str] = {} + exports_mo = False + for cid, er in execution_requests.items(): + # Check if cell already exists in graph (to avoid recompilation) + cell = self.graph.cells.get(cid) + error = None + + # If cell doesn't exist in graph, try to compile it + if cell is None: + # TODO: Don't bother compiling whole cell. + # However, since we still need to extract defs + # for mo / marimo, this is OK for now. + cell, error = self._try_compiling_cell(cid, er.code, []) + + if cell is None or error is not None: + continue + + # Check if this is a markdown cell + if cell.markdown is not None: + # Remove from uninstantiated requests since it's effectively "run" + markdown_cells[cid] = cell.markdown + else: + # Regular cell - mark as stale + exports_mo |= "mo" in cell.defs + + # Handle as default if no cells export 'mo' + if not exports_mo: + return + + # Since markdown cell, render and broadcast output + # Remove cell from outstanding requests + from marimo._output.md import md + + # Remove markdown cells from uninstantiated requests + for cell_id, content in markdown_cells.items(): + html_obj = md(content) + mimetype, html_content = html_obj._mime_() + + # Broadcast the markdown output + CellOp.broadcast_output( + channel=CellChannel.OUTPUT, + mimetype=mimetype, + data=html_content, + cell_id=cell_id, + status="idle", + ) + + # Mark the cell as not stale (already "run") + CellOp.broadcast_stale(cell_id=cell_id, stale=False) + del execution_requests[cell_id] def load_dotenv(self) -> None: dotenvs = self.user_config["runtime"].get("dotenv", []) diff --git a/marimo/_server/export/utils.py b/marimo/_server/export/utils.py index e9bdace03c5..4083c055b2e 100644 --- a/marimo/_server/export/utils.py +++ b/marimo/_server/export/utils.py @@ -4,10 +4,10 @@ import ast import os import re -from textwrap import dedent from typing import Optional, Union from marimo._ast.cell import Cell, CellImpl +from marimo._ast.compiler import const_or_id, extract_markdown def format_filename_title(filename: str) -> str: @@ -31,19 +31,6 @@ def get_download_filename(filename: Optional[str], extension: str) -> str: return f"{os.path.splitext(basename)[0]}.{extension}" -def _const_string(args: list[ast.stmt]) -> str: - (inner,) = args - if hasattr(inner, "values"): - (inner,) = inner.values - return f"{inner.value}" # type: ignore[attr-defined] - - -def _const_or_id(args: ast.stmt) -> str: - if hasattr(args, "value"): - return f"{args.value}" # type: ignore[attr-defined] - return f"{args.id}" # type: ignore[attr-defined] - - def get_markdown_from_cell( cell: Union[CellImpl, Cell], code: str ) -> Optional[str]: @@ -51,34 +38,7 @@ def get_markdown_from_cell( if not (cell.refs == {"mo"} and not cell.defs): return None - markdown_lines = [ - line for line in code.strip().split("\n") if line.startswith("mo.md(") - ] - if len(markdown_lines) > 1: - return None - - code = code.strip() - # Attribute Error handled by the outer try/except block. - # Wish there was a more compact to ignore ignore[attr-defined] for all. - try: - (body,) = ast.parse(code).body - if body.value.func.attr == "md": # type: ignore[attr-defined] - value = body.value # type: ignore[attr-defined] - else: - return None - assert value.func.value.id == "mo" - md_lines = _const_string(value.args).split("\n") - except (AssertionError, AttributeError, ValueError, SyntaxError): - # No reason to explicitly catch exceptions if we can't parse out - # markdown. Just handle it as a code block. - return None - - # Dedent behavior is a little different that in marimo js, so handle - # accordingly. - md_lines = [line.rstrip() for line in md_lines] - md = dedent(md_lines[0]) + "\n" + dedent("\n".join(md_lines[1:])) - md = md.strip() - return md + return extract_markdown(code) def get_sql_options_from_cell(code: str) -> Optional[dict[str, str]]: @@ -96,7 +56,7 @@ def get_sql_options_from_cell(code: str) -> Optional[dict[str, str]]: return None if value.keywords: for keyword in value.keywords: # type: ignore[attr-defined] - options[keyword.arg] = _const_or_id(keyword.value) # type: ignore[attr-defined] + options[keyword.arg] = const_or_id(keyword.value) # type: ignore[attr-defined] output = options.pop("output", "True").lower() if output == "false": options["hide_output"] = "True" diff --git a/tests/_runtime/test_runtime.py b/tests/_runtime/test_runtime.py index 5940db5b068..e381905cca0 100644 --- a/tests/_runtime/test_runtime.py +++ b/tests/_runtime/test_runtime.py @@ -3128,6 +3128,256 @@ def foo() -> A: assert k.globals["A"] == k.globals["anno"]["return"] +class TestMarkdownHandling: + """Test markdown cell handling during kernel instantiation.""" + + async def test_markdown_cells_rendered_on_instantiate( + self, mocked_kernel: MockedKernel + ) -> None: + """Test that markdown cells are rendered and marked as completed on instantiate.""" + k = mocked_kernel.k + stream = mocked_kernel.stream + + # Create execution requests with markdown and regular cells + markdown_cell_code = ( + 'mo.md("# Hello World\\n\\nThis is **markdown**.")' + ) + regular_cell_code = "x = 1" + + execution_requests = [ + ExecutionRequest(cell_id="md_cell", code=markdown_cell_code), + ExecutionRequest(cell_id="regular_cell", code=regular_cell_code), + ] + + # Create a creation request with auto_run=False to trigger the markdown handling + creation_request = CreationRequest( + execution_requests=execution_requests, + auto_run=False, + set_ui_element_value_request=SetUIElementValueRequest( + object_ids=[], + values=[], + ), + ) + + # Clear stream before instantiate + stream.messages.clear() + + # Add a cell that exports 'mo' to enable markdown processing + # This simulates the scenario where marimo has been imported + execution_requests.append( + ExecutionRequest(cell_id="mo_import", code="import marimo as mo") + ) + creation_request = CreationRequest( + execution_requests=execution_requests, + auto_run=False, + set_ui_element_value_request=SetUIElementValueRequest( + object_ids=[], + values=[], + ), + ) + + # Instantiate the kernel + await k.instantiate(creation_request) + + # Check that markdown cell was removed from uninstantiated requests + assert "md_cell" not in k._uninstantiated_execution_requests + # Regular cell should still be there + assert "regular_cell" in k._uninstantiated_execution_requests + + # Check that the markdown cell output was broadcast + cell_ops = [deserialize_kernel_message(msg) for msg in stream.messages] + cell_ops = [op for op in cell_ops if isinstance(op, CellOp)] + + # Find operations for the markdown cell + md_cell_ops = [op for op in cell_ops if op.cell_id == "md_cell"] + + # Should have at least one output operation and one stale operation + assert len(md_cell_ops) >= 2 + + # Check that there's an output operation with HTML content + output_ops = [op for op in md_cell_ops if op.output is not None] + assert len(output_ops) == 1 + + output_op = output_ops[0] + assert output_op.output.channel == CellChannel.OUTPUT + assert output_op.output.mimetype == "text/html" + assert "Hello World" in output_op.output.data + assert " None: + """Test that non-markdown cells are not affected by markdown handling.""" + k = mocked_kernel.k + stream = mocked_kernel.stream + + # Create execution requests with only regular cells + execution_requests = [ + ExecutionRequest(cell_id="cell1", code="x = 1"), + ExecutionRequest(cell_id="cell2", code="y = 2"), + ] + + creation_request = CreationRequest( + execution_requests=execution_requests, + auto_run=False, + set_ui_element_value_request=SetUIElementValueRequest( + object_ids=[], + values=[], + ), + ) + + stream.messages.clear() + await k.instantiate(creation_request) + + # All cells should remain in uninstantiated requests + assert "cell1" in k._uninstantiated_execution_requests + assert "cell2" in k._uninstantiated_execution_requests + + # Check that all cells were marked as stale + cell_ops = [deserialize_kernel_message(msg) for msg in stream.messages] + cell_ops = [op for op in cell_ops if isinstance(op, CellOp)] + + for cell_id in ["cell1", "cell2"]: + cell_ops_for_id = [op for op in cell_ops if op.cell_id == cell_id] + stale_ops = [ + op for op in cell_ops_for_id if op.stale_inputs is not None + ] + assert len(stale_ops) == 1 + assert stale_ops[0].stale_inputs is True + + async def test_malformed_markdown_cells_marked_stale( + self, mocked_kernel: MockedKernel + ) -> None: + """Test that cells with syntax errors are marked as stale, not processed.""" + k = mocked_kernel.k + stream = mocked_kernel.stream + + # Create execution requests with malformed markdown cell + execution_requests = [ + ExecutionRequest( + cell_id="bad_cell", code="mo.md(" + ), # Syntax error + ExecutionRequest(cell_id="good_md", code='mo.md("# Good")'), + ] + + creation_request = CreationRequest( + execution_requests=execution_requests, + auto_run=False, + set_ui_element_value_request=SetUIElementValueRequest( + object_ids=[], + values=[], + ), + ) + + stream.messages.clear() + + # Add a cell that exports 'mo' to enable markdown processing + # This simulates the scenario where marimo has been imported + execution_requests.append( + ExecutionRequest(cell_id="mo_import", code="import marimo as mo") + ) + creation_request = CreationRequest( + execution_requests=execution_requests, + auto_run=False, + set_ui_element_value_request=SetUIElementValueRequest( + object_ids=[], + values=[], + ), + ) + + await k.instantiate(creation_request) + + # Bad cell should remain in uninstantiated requests + assert "bad_cell" in k._uninstantiated_execution_requests + # Good markdown cell should be removed + assert "good_md" not in k._uninstantiated_execution_requests + + # Check operations + cell_ops = [deserialize_kernel_message(msg) for msg in stream.messages] + cell_ops = [op for op in cell_ops if isinstance(op, CellOp)] + + # Bad cell should be marked as stale + bad_cell_ops = [op for op in cell_ops if op.cell_id == "bad_cell"] + bad_stale_ops = [ + op for op in bad_cell_ops if op.stale_inputs is not None + ] + assert len(bad_stale_ops) == 1 + assert bad_stale_ops[0].stale_inputs is True + + # Good cell should have output and be marked as not stale + good_cell_ops = [op for op in cell_ops if op.cell_id == "good_md"] + good_output_ops = [op for op in good_cell_ops if op.output is not None] + assert len(good_output_ops) == 1 + assert "Good" in good_output_ops[0].output.data + + async def test_no_mo_available_all_cells_stale( + self, mocked_kernel: MockedKernel + ) -> None: + """Test that when 'mo' is not available, all cells are marked as stale.""" + k = mocked_kernel.k + stream = mocked_kernel.stream + + # Create execution requests with markdown cells + execution_requests = [ + ExecutionRequest(cell_id="md_cell1", code='mo.md("# Hello")'), + ExecutionRequest(cell_id="md_cell2", code='mo.md("## World")'), + ExecutionRequest(cell_id="regular_cell", code="x = 1"), + ] + + creation_request = CreationRequest( + execution_requests=execution_requests, + auto_run=False, + set_ui_element_value_request=SetUIElementValueRequest( + object_ids=[], + values=[], + ), + ) + + stream.messages.clear() + + # Ensure 'mo' is not in graph definitions by starting with empty graph + assert "mo" not in k.graph.definitions + + await k.instantiate(creation_request) + + # All cells should remain in uninstantiated requests since mo is not available + assert "md_cell1" in k._uninstantiated_execution_requests + assert "md_cell2" in k._uninstantiated_execution_requests + assert "regular_cell" in k._uninstantiated_execution_requests + + # Check that all cells were marked as stale + cell_ops = [deserialize_kernel_message(msg) for msg in stream.messages] + cell_ops = [op for op in cell_ops if isinstance(op, CellOp)] + + for cell_id in ["md_cell1", "md_cell2", "regular_cell"]: + cell_ops_for_id = [op for op in cell_ops if op.cell_id == cell_id] + stale_ops = [ + op for op in cell_ops_for_id if op.stale_inputs is not None + ] + assert len(stale_ops) == 1 + assert stale_ops[0].stale_inputs is True + + # No cells should have output operations + output_ops = [op for op in cell_ops if op.output is not None] + assert len(output_ops) == 0 + + def _parse_error_output(cell_op: CellOp) -> list[Error]: error_output = cell_op.output assert error_output is not None