fix: rename all unnamed columns to empty string for pd.multiIndex (#5093)

Light2Dark · web-flow · commit 7feba9767bf6 · 2025-05-27T07:26:24.000-05:00
diff --git a/marimo/_plugins/ui/_impl/tables/pandas_table.py b/marimo/_plugins/ui/_impl/tables/pandas_table.py
@@ -110,19 +110,28 @@ def to_json_str(
                     isinstance(result.index, pd.Index)
                     and not isinstance(result.index, pd.RangeIndex)
                 ):
-                    unnamed_indexes = result.index.names[0] is None
+                    index_names = result.index.names
+                    unnamed_indexes = any(
+                        idx is None for idx in result.index.names
+                    )
+
                     index_levels = result.index.nlevels
                     result = result.reset_index()
 
                     if unnamed_indexes:
-                        # We could rename, but it doesn't work cleanly for multi-col indexes
-                        result.columns = pd.Index(
-                            [""] + list(result.columns[1:])
-                        )
+                        # After reset_index, the index is converted to a column
+                        # We need to rename the new columns to empty strings
+                        # And it must be unique for each column
+                        # TODO: On the frontend this still displays the original index, not the renamed one
+                        empty_name = ""
+                        for i, idx_name in enumerate(index_names):
+                            if idx_name is None:
+                                result.columns.values[i] = empty_name
+                                empty_name += " "
 
                         if index_levels > 1:
                             LOGGER.warning(
-                                "Indexes with more than one level are not supported properly, call reset_index() to flatten"
+                                "Indexes with more than one level are not well supported, call reset_index() or use mo.plain(df)"
                             )
 
                 return sanitize_json_bigint(
diff --git a/marimo/_smoke_tests/tables/pandas_multi_idx.py b/marimo/_smoke_tests/tables/pandas_multi_idx.py
@@ -1,6 +1,6 @@
 import marimo
 
-__generated_with = "0.12.4"
+__generated_with = "0.13.12"
 app = marimo.App(width="medium")
 
 
@@ -12,46 +12,77 @@ def _():
 
 
 @app.cell
-def _(pd):
+def _(mo, pd):
     arrays = [
         ["bar", "bar"],
         ["one", "two"],
     ]
     tuples = list(zip(*arrays))
     index = pd.MultiIndex.from_tuples(tuples, names=["first", "second"])
     named_indexes = pd.Series([1, 2], index=index)
-    named_indexes
-    return arrays, index, named_indexes, tuples
+
+    mo.vstack([mo.md("## Named indexes (works)"), named_indexes])
+    return
 
 
 @app.cell
-def _(pd):
+def _(mo, pd):
     unnamed_indexes = pd.concat(
         {
             "a": pd.DataFrame({"foo": [1]}, index=["hello"]),
             "b": pd.DataFrame({"baz": [2.0]}, index=["world"]),
         }
     )
-    unnamed_indexes
-    # unnamed_indexes.reset_index() # this works
-    return (unnamed_indexes,)
+
+    mo.md(f"""
+    ## Unnamed indexes does not work correctly
+
+    {mo.vstack([mo.plain(unnamed_indexes), unnamed_indexes])}
+
+    ### Using reset_index works but changes structure
+    {mo.ui.table(unnamed_indexes.reset_index())}
+    """)
+    return
 
 
 @app.cell
 def _(mo, pd):
+    _multi_idx = pd.MultiIndex.from_tuples([("weight", "kg"), ("height", "m")])
+    _df = pd.DataFrame(
+        [[1.0, 2.0], [3.0, 4.0]], index=["cat", "dog"], columns=_multi_idx
+    )
+    _multi_col_stack = _df.stack(future_stack=True)
+    mo.vstack([mo.plain(_multi_col_stack), _multi_col_stack])
+
+    mo.vstack(
+        [
+            mo.md("## Row multi-idx with stack (not working correctly)"),
+            mo.plain(_multi_col_stack),
+            _multi_col_stack,
+        ]
+    )
+    return
+
+
+@app.cell
+def _(pd):
     cols = pd.MultiIndex.from_arrays(
         [["basic_amt"] * 2, ["NSW", "QLD"]], names=[None, "Faculty"]
     )
     idx = pd.Index(["All", "Full"])
     column_multi_idx = pd.DataFrame([(1, 1), (0, 1)], index=idx, columns=cols)
-
-    mo.ui.table(column_multi_idx)
-    return cols, column_multi_idx, idx
+    return (column_multi_idx,)
 
 
 @app.cell
-def _(column_multi_idx):
-    print("Raw data:\n", column_multi_idx)
+def _(column_multi_idx, mo):
+    mo.vstack(
+        [
+            mo.md("## Column multi index (we flatten)"),
+            mo.plain(column_multi_idx),
+            column_multi_idx,
+        ]
+    )
     return
 
 
diff --git a/tests/_plugins/ui/_impl/tables/test_pandas_table.py b/tests/_plugins/ui/_impl/tables/test_pandas_table.py
@@ -285,6 +285,42 @@ def test_to_json_multi_index_unnamed(self) -> None:
             {"level_0": "z", "level_1": 3, "a": 3, "b": 6},
         ]
 
+    def test_to_json_multi_index_unnamed_2(self) -> None:
+        # Create a DataFrame with a MultiIndex where second level is unnamed
+        df = pd.DataFrame(
+            {
+                "A": [1, 2, 3, 4],
+                "B": [5, 6, 7, 8],
+            },
+            index=pd.MultiIndex.from_tuples(
+                [("x", 1), ("x", 2), ("y", 1), ("y", 2)],
+                names=["level1", None],  # Second level is unnamed
+            ),
+        )
+
+        json_data = self.factory_create_json_from_df(df)
+        # Second level converted to empty string
+        assert json_data == [
+            {"level1": "x", "": 1, "A": 1, "B": 5},
+            {"level1": "x", "": 2, "A": 2, "B": 6},
+            {"level1": "y", "": 1, "A": 3, "B": 7},
+            {"level1": "y", "": 2, "A": 4, "B": 8},
+        ]
+
+    def test_to_json_multi_index_unnamed_3(self) -> None:
+        cols = pd.MultiIndex.from_tuples([("weight", "kg"), ("height", "m")])
+        df = pd.DataFrame(
+            [[1.0, 2.0], [3.0, 4.0]], index=["cat", "dog"], columns=cols
+        )
+        df = df.stack(future_stack=True)
+        json_data = self.factory_create_json_from_df(df)
+        assert json_data == [
+            {"": "cat", " ": "kg", "weight": 1.0, "height": None},
+            {"": "cat", " ": "m", "weight": None, "height": 2.0},
+            {"": "dog", " ": "kg", "weight": 3.0, "height": None},
+            {"": "dog", " ": "m", "weight": None, "height": 4.0},
+        ]
+
     def test_to_json_multi_col_index(self) -> None:
         cols = pd.MultiIndex.from_arrays(
             [["basic_amt"] * 2, ["NSW", "QLD"]], names=[None, "Faculty"]