Skip to content

Commit 7feba97

Browse files
authored
fix: rename all unnamed columns to empty string for pd.multiIndex (#5093)
1 parent 04a8224 commit 7feba97

File tree

3 files changed

+95
-19
lines changed

3 files changed

+95
-19
lines changed

marimo/_plugins/ui/_impl/tables/pandas_table.py

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -110,19 +110,28 @@ def to_json_str(
110110
isinstance(result.index, pd.Index)
111111
and not isinstance(result.index, pd.RangeIndex)
112112
):
113-
unnamed_indexes = result.index.names[0] is None
113+
index_names = result.index.names
114+
unnamed_indexes = any(
115+
idx is None for idx in result.index.names
116+
)
117+
114118
index_levels = result.index.nlevels
115119
result = result.reset_index()
116120

117121
if unnamed_indexes:
118-
# We could rename, but it doesn't work cleanly for multi-col indexes
119-
result.columns = pd.Index(
120-
[""] + list(result.columns[1:])
121-
)
122+
# After reset_index, the index is converted to a column
123+
# We need to rename the new columns to empty strings
124+
# And it must be unique for each column
125+
# TODO: On the frontend this still displays the original index, not the renamed one
126+
empty_name = ""
127+
for i, idx_name in enumerate(index_names):
128+
if idx_name is None:
129+
result.columns.values[i] = empty_name
130+
empty_name += " "
122131

123132
if index_levels > 1:
124133
LOGGER.warning(
125-
"Indexes with more than one level are not supported properly, call reset_index() to flatten"
134+
"Indexes with more than one level are not well supported, call reset_index() or use mo.plain(df)"
126135
)
127136

128137
return sanitize_json_bigint(

marimo/_smoke_tests/tables/pandas_multi_idx.py

Lines changed: 44 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import marimo
22

3-
__generated_with = "0.12.4"
3+
__generated_with = "0.13.12"
44
app = marimo.App(width="medium")
55

66

@@ -12,46 +12,77 @@ def _():
1212

1313

1414
@app.cell
15-
def _(pd):
15+
def _(mo, pd):
1616
arrays = [
1717
["bar", "bar"],
1818
["one", "two"],
1919
]
2020
tuples = list(zip(*arrays))
2121
index = pd.MultiIndex.from_tuples(tuples, names=["first", "second"])
2222
named_indexes = pd.Series([1, 2], index=index)
23-
named_indexes
24-
return arrays, index, named_indexes, tuples
23+
24+
mo.vstack([mo.md("## Named indexes (works)"), named_indexes])
25+
return
2526

2627

2728
@app.cell
28-
def _(pd):
29+
def _(mo, pd):
2930
unnamed_indexes = pd.concat(
3031
{
3132
"a": pd.DataFrame({"foo": [1]}, index=["hello"]),
3233
"b": pd.DataFrame({"baz": [2.0]}, index=["world"]),
3334
}
3435
)
35-
unnamed_indexes
36-
# unnamed_indexes.reset_index() # this works
37-
return (unnamed_indexes,)
36+
37+
mo.md(f"""
38+
## Unnamed indexes does not work correctly
39+
40+
{mo.vstack([mo.plain(unnamed_indexes), unnamed_indexes])}
41+
42+
### Using reset_index works but changes structure
43+
{mo.ui.table(unnamed_indexes.reset_index())}
44+
""")
45+
return
3846

3947

4048
@app.cell
4149
def _(mo, pd):
50+
_multi_idx = pd.MultiIndex.from_tuples([("weight", "kg"), ("height", "m")])
51+
_df = pd.DataFrame(
52+
[[1.0, 2.0], [3.0, 4.0]], index=["cat", "dog"], columns=_multi_idx
53+
)
54+
_multi_col_stack = _df.stack(future_stack=True)
55+
mo.vstack([mo.plain(_multi_col_stack), _multi_col_stack])
56+
57+
mo.vstack(
58+
[
59+
mo.md("## Row multi-idx with stack (not working correctly)"),
60+
mo.plain(_multi_col_stack),
61+
_multi_col_stack,
62+
]
63+
)
64+
return
65+
66+
67+
@app.cell
68+
def _(pd):
4269
cols = pd.MultiIndex.from_arrays(
4370
[["basic_amt"] * 2, ["NSW", "QLD"]], names=[None, "Faculty"]
4471
)
4572
idx = pd.Index(["All", "Full"])
4673
column_multi_idx = pd.DataFrame([(1, 1), (0, 1)], index=idx, columns=cols)
47-
48-
mo.ui.table(column_multi_idx)
49-
return cols, column_multi_idx, idx
74+
return (column_multi_idx,)
5075

5176

5277
@app.cell
53-
def _(column_multi_idx):
54-
print("Raw data:\n", column_multi_idx)
78+
def _(column_multi_idx, mo):
79+
mo.vstack(
80+
[
81+
mo.md("## Column multi index (we flatten)"),
82+
mo.plain(column_multi_idx),
83+
column_multi_idx,
84+
]
85+
)
5586
return
5687

5788

tests/_plugins/ui/_impl/tables/test_pandas_table.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,42 @@ def test_to_json_multi_index_unnamed(self) -> None:
285285
{"level_0": "z", "level_1": 3, "a": 3, "b": 6},
286286
]
287287

288+
def test_to_json_multi_index_unnamed_2(self) -> None:
289+
# Create a DataFrame with a MultiIndex where second level is unnamed
290+
df = pd.DataFrame(
291+
{
292+
"A": [1, 2, 3, 4],
293+
"B": [5, 6, 7, 8],
294+
},
295+
index=pd.MultiIndex.from_tuples(
296+
[("x", 1), ("x", 2), ("y", 1), ("y", 2)],
297+
names=["level1", None], # Second level is unnamed
298+
),
299+
)
300+
301+
json_data = self.factory_create_json_from_df(df)
302+
# Second level converted to empty string
303+
assert json_data == [
304+
{"level1": "x", "": 1, "A": 1, "B": 5},
305+
{"level1": "x", "": 2, "A": 2, "B": 6},
306+
{"level1": "y", "": 1, "A": 3, "B": 7},
307+
{"level1": "y", "": 2, "A": 4, "B": 8},
308+
]
309+
310+
def test_to_json_multi_index_unnamed_3(self) -> None:
311+
cols = pd.MultiIndex.from_tuples([("weight", "kg"), ("height", "m")])
312+
df = pd.DataFrame(
313+
[[1.0, 2.0], [3.0, 4.0]], index=["cat", "dog"], columns=cols
314+
)
315+
df = df.stack(future_stack=True)
316+
json_data = self.factory_create_json_from_df(df)
317+
assert json_data == [
318+
{"": "cat", " ": "kg", "weight": 1.0, "height": None},
319+
{"": "cat", " ": "m", "weight": None, "height": 2.0},
320+
{"": "dog", " ": "kg", "weight": 3.0, "height": None},
321+
{"": "dog", " ": "m", "weight": None, "height": 4.0},
322+
]
323+
288324
def test_to_json_multi_col_index(self) -> None:
289325
cols = pd.MultiIndex.from_arrays(
290326
[["basic_amt"] * 2, ["NSW", "QLD"]], names=[None, "Faculty"]

0 commit comments

Comments
 (0)