Skip to content

Commit ff0c9fe

Browse files
authored
fix: cast elements in list of enums to string for polars (#5572)
## 📝 Summary <!-- Provide a concise summary of what this pull request is addressing. If this PR fixes any issues, list them here by number (e.g., Fixes #123). --> Fixes #5562. ## 🔍 Description of Changes <!-- Detail the specific changes made in this pull request. Explain the problem addressed and how it was resolved. If applicable, provide before and after comparisons, screenshots, or any relevant details to help reviewers understand the changes easily. --> ## 📋 Checklist - [x] I have read the [contributor guidelines](https://github.com/marimo-team/marimo/blob/main/CONTRIBUTING.md). - [ ] For large changes, or changes that affect the public API: this change was discussed or approved through an issue, on [Discord](https://marimo.io/discord?ref=pr), or the community [discussions](https://github.com/marimo-team/marimo/discussions) (Please provide a link if applicable). - [x] I have added tests for the changes made. - [x] I have run the code and verified that it works as expected. ## 📜 Reviewers <!-- Tag potential reviewers from the community or maintainers who might be interested in reviewing this pull request. Your PR will be reviewed more quickly if you can figure out the right person to tag with @ @mscolnick (General, AI) @dmadisetti (Runtime, Caching, Fileformat, AST) @manzt (Widgets, Dependency Management, LSP) @Light2Dark (Tables, Plots, Layouts, SQL) @akshayka (Public API, Dependencies, UX/Styling, Backend, Docs, Integrations) -->
1 parent ecd6cc6 commit ff0c9fe

File tree

7 files changed

+81
-105
lines changed

7 files changed

+81
-105
lines changed

marimo/_plugins/ui/_impl/tables/polars_table.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,26 +123,45 @@ def to_json_str(
123123
LOGGER.info(
124124
"Failed to write json. Trying to convert columns to strings."
125125
)
126+
converted_columns = []
126127
for column in result.get_columns():
127128
dtype = column.dtype
128129
if isinstance(dtype, pl.Object):
129130
result = self._cast_object_to_string(
130131
result, column
131132
)
133+
converted_columns.append(column.name)
132134
elif str(dtype) == "Int128":
133135
# Use string comparison because pl.Int128 doesn't exist on older versions
134-
# As of writing this, Int128 is not supported by polars
136+
# As of writing this, Int128 to json is not supported by polars
135137
LOGGER.warning(
136138
"Column %s is of type Int128, which is not supported. Converting to string.",
137139
column.name,
138140
)
139141
result = result.with_columns(
140142
column.cast(pl.String)
141143
)
144+
converted_columns.append(column.name)
142145
elif isinstance(dtype, pl.Duration):
143146
result = self._convert_time_to_string(
144147
result, column
145148
)
149+
converted_columns.append(column.name)
150+
# https://github.com/pola-rs/polars/issues/23459
151+
elif isinstance(dtype, pl.List) and isinstance(
152+
dtype.inner, (pl.Enum, pl.Categorical)
153+
):
154+
# Convert each element in the list to a string
155+
result = result.with_columns(
156+
pl.col(column.name).cast(pl.List(pl.String))
157+
)
158+
converted_columns.append(column.name)
159+
160+
if converted_columns:
161+
LOGGER.info(
162+
"Converted columns %s to string.",
163+
", ".join(f"'{col}'" for col in converted_columns),
164+
)
146165

147166
return sanitize_json_bigint(result.write_json())
148167

marimo/_smoke_tests/tables/complex_types.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import marimo
22

3-
__generated_with = "0.13.15"
3+
__generated_with = "0.14.10"
44
app = marimo.App(width="medium")
55

66

@@ -45,6 +45,11 @@ def _():
4545
[[[1, 2]], [[3, 4]], [[5, 6]]],
4646
dtype=pl.List(pl.List(pl.Int64)),
4747
),
48+
"lists_with_enum": pl.Series(
49+
"lists_with_enum",
50+
[["A", "B"], ["A", "B"], ["A", "B"]],
51+
dtype=pl.List(pl.Enum(categories=["A", "B"])),
52+
),
4853
"arrays": pl.Series(
4954
"arrays",
5055
[[1, 2], [3, 4], [5, 6]],
@@ -160,11 +165,11 @@ def _(mo):
160165

161166

162167
@app.cell
163-
def _(pl):
164-
wow_data = pl.scan_parquet(
165-
"https://github.com/koaning/wow-avatar-datasets/raw/refs/heads/main/wow-full.parquet"
166-
)
167-
wow_data
168+
def _():
169+
# wow_data = pl.scan_parquet(
170+
# "https://github.com/koaning/wow-avatar-datasets/raw/refs/heads/main/wow-full.parquet"
171+
# )
172+
# wow_data
168173
# wow_data.collect()
169174
return
170175

marimo/_utils/requests.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
# Copyright 2025 Marimo. All rights reserved.
12
import json
23
import urllib.error
34
import urllib.parse
Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
strings,bool,int,large_int,float,datetime,date,struct,list,array,nulls,category,set,imaginary,time,duration,mixed_list
2-
a,true,1,18446744073709551616,1.0,2021-01-01T00:00:00.000000,2021-01-01,"{""a"":1,""b"":2}","1,2",1,,cat,"{1, 2}",(1+2j),12:30:00.000000000,1d,"1,two"
3-
b,false,2,36893488147419103233,2.0,2021-01-02T00:00:00.000000,2021-01-02,"{""a"":3,""b"":4}","3,4",2,data,dog,"{3, 4}",(3+4j),13:45:00.000000000,315µs,"3.0,0.0"
4-
c,true,3,73786976294838206466,3.0,2021-01-03T00:00:00.000000,2021-01-03,"{""a"":5,""b"":6}","5,6",3,,mouse,"{5, 6}",(5+6j),14:15:00.000000000,2h 30m,2021-01-01 00:00:00.000000
1+
strings,bool,int,large_int,float,datetime,date,struct,list,array,nulls,category,set,imaginary,time,duration,mixed_list,enum_list
2+
a,true,1,18446744073709551616,1.0,2021-01-01T00:00:00.000000,2021-01-01,"{""a"":1,""b"":2}","1,2",1,,cat,"{1, 2}",(1+2j),12:30:00.000000000,1d,"1,two","A,B,C"
3+
b,false,2,36893488147419103233,2.0,2021-01-02T00:00:00.000000,2021-01-02,"{""a"":3,""b"":4}","3,4",2,data,dog,"{3, 4}",(3+4j),13:45:00.000000000,315µs,"3.0,0.0","A,B,C"
4+
c,true,3,73786976294838206466,3.0,2021-01-03T00:00:00.000000,2021-01-03,"{""a"":5,""b"":6}","5,6",3,,mouse,"{5, 6}",(5+6j),14:15:00.000000000,2h 30m,2021-01-01 00:00:00.000000,"A,B,C"
Lines changed: 1 addition & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1 @@
1-
[
2-
["strings", ["string", "str"]],
3-
["bool", ["boolean", "bool"]],
4-
["int", ["integer", "i64"]],
5-
["large_int", ["integer", "i128"]],
6-
["float", ["number", "f64"]],
7-
["datetime", ["datetime", "datetime[\u03bcs]"]],
8-
["date", ["date", "date"]],
9-
["struct", ["unknown", "struct[2]"]],
10-
["list", ["unknown", "list[i64]"]],
11-
["nested_lists", ["unknown", "list[list[i64]]"]],
12-
["nested_arrays", ["unknown", "array[i64, (1, 2)]"]],
13-
["array", ["unknown", "array[i64, 1]"]],
14-
["nulls", ["string", "str"]],
15-
["category", ["string", "cat"]],
16-
["set", ["unknown", "object"]],
17-
["imaginary", ["unknown", "object"]],
18-
["time", ["time", "Time"]],
19-
["duration", ["number", "duration[\u03bcs]"]],
20-
["mixed_list", ["unknown", "list[str]"]],
21-
["structs_with_list", ["unknown", "struct[2]"]],
22-
["list_with_structs", ["unknown", "list[struct[2]]"]]
23-
]
1+
[["strings", ["string", "str"]], ["bool", ["boolean", "bool"]], ["int", ["integer", "i64"]], ["large_int", ["integer", "i128"]], ["float", ["number", "f64"]], ["datetime", ["datetime", "datetime[\u03bcs]"]], ["date", ["date", "date"]], ["struct", ["unknown", "struct[2]"]], ["list", ["unknown", "list[i64]"]], ["nested_lists", ["unknown", "list[list[i64]]"]], ["nested_arrays", ["unknown", "array[i64, (1, 2)]"]], ["array", ["unknown", "array[i64, 1]"]], ["nulls", ["string", "str"]], ["category", ["string", "cat"]], ["set", ["unknown", "object"]], ["imaginary", ["unknown", "object"]], ["time", ["time", "Time"]], ["duration", ["number", "duration[\u03bcs]"]], ["mixed_list", ["unknown", "list[str]"]], ["structs_with_list", ["unknown", "struct[2]"]], ["list_with_structs", ["unknown", "list[struct[2]]"]], ["enum_list", ["unknown", "list[enum]"]]]
Lines changed: 1 addition & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -1,71 +1 @@
1-
[
2-
{
3-
"strings": "a",
4-
"bool": true,
5-
"int": 1,
6-
"large_int": "18446744073709551616",
7-
"float": 1.0,
8-
"datetime": "2021-01-01 00:00:00",
9-
"date": "2021-01-01",
10-
"struct": { "a": 1, "b": 2 },
11-
"list": [1, 2],
12-
"nested_lists": [[1, 2]],
13-
"nested_arrays": [[1, 2]],
14-
"array": [1],
15-
"nulls": null,
16-
"category": "cat",
17-
"set": "{1, 2}",
18-
"imaginary": "(1+2j)",
19-
"time": "12:30:00",
20-
"duration": "1d",
21-
"mixed_list": ["1", "two"],
22-
"structs_with_list": { "a": [1, 2], "b": 2 },
23-
"list_with_structs": [{ "a": 1, "c": null }, { "a": null, "c": 3 }]
24-
},
25-
{
26-
"strings": "b",
27-
"bool": false,
28-
"int": 2,
29-
"large_int": "36893488147419103233",
30-
"float": 2.0,
31-
"datetime": "2021-01-02 00:00:00",
32-
"date": "2021-01-02",
33-
"struct": { "a": 3, "b": 4 },
34-
"list": [3, 4],
35-
"nested_lists": [[3, 4]],
36-
"nested_arrays": [[3, 4]],
37-
"array": [2],
38-
"nulls": "data",
39-
"category": "dog",
40-
"set": "{3, 4}",
41-
"imaginary": "(3+4j)",
42-
"time": "13:45:00",
43-
"duration": "315µs",
44-
"mixed_list": ["3.0", "0.0"],
45-
"structs_with_list": { "a": [3, 4], "b": 4 },
46-
"list_with_structs": [{ "a": null, "c": null }]
47-
},
48-
{
49-
"strings": "c",
50-
"bool": true,
51-
"int": 3,
52-
"large_int": "73786976294838206466",
53-
"float": 3.0,
54-
"datetime": "2021-01-03 00:00:00",
55-
"date": "2021-01-03",
56-
"struct": { "a": 5, "b": 6 },
57-
"list": [5, 6],
58-
"nested_lists": [[5, 6]],
59-
"nested_arrays": [[5, 6]],
60-
"array": [3],
61-
"nulls": null,
62-
"category": "mouse",
63-
"set": "{5, 6}",
64-
"imaginary": "(5+6j)",
65-
"time": "14:15:00",
66-
"duration": "2h 30m",
67-
"mixed_list": [null, "2021-01-01 00:00:00.000000"],
68-
"structs_with_list": { "a": null, "b": null },
69-
"list_with_structs": []
70-
}
71-
]
1+
[{"strings":"a","bool":true,"int":1,"large_int":"18446744073709551616","float":1.0,"datetime":"2021-01-01 00:00:00","date":"2021-01-01","struct":{"a":1,"b":2},"list":[1,2],"nested_lists":[[1,2]],"nested_arrays":[[1,2]],"array":[1],"nulls":null,"category":"cat","set":"{1, 2}","imaginary":"(1+2j)","time":"12:30:00","duration":"1d","mixed_list":["1","two"],"structs_with_list":{"a":[1,2],"b":2},"list_with_structs":[{"a":1,"c":null},{"a":null,"c":3}],"enum_list":["A","B","C"]},{"strings":"b","bool":false,"int":2,"large_int":"36893488147419103233","float":2.0,"datetime":"2021-01-02 00:00:00","date":"2021-01-02","struct":{"a":3,"b":4},"list":[3,4],"nested_lists":[[3,4]],"nested_arrays":[[3,4]],"array":[2],"nulls":"data","category":"dog","set":"{3, 4}","imaginary":"(3+4j)","time":"13:45:00","duration":"315\u00b5s","mixed_list":["3.0","0.0"],"structs_with_list":{"a":[3,4],"b":4},"list_with_structs":[{"a":null,"c":null}],"enum_list":["A","B","C"]},{"strings":"c","bool":true,"int":3,"large_int":"73786976294838206466","float":3.0,"datetime":"2021-01-03 00:00:00","date":"2021-01-03","struct":{"a":5,"b":6},"list":[5,6],"nested_lists":[[5,6]],"nested_arrays":[[5,6]],"array":[3],"nulls":null,"category":"mouse","set":"{5, 6}","imaginary":"(5+6j)","time":"14:15:00","duration":"2h 30m","mixed_list":[null,"2021-01-01 00:00:00.000000"],"structs_with_list":{"a":null,"b":null},"list_with_structs":[],"enum_list":["A","B","C"]}]

tests/_plugins/ui/_impl/tables/test_polars_table.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,10 @@ def get_complex_data(self) -> TableManager[Any]:
109109
[],
110110
],
111111
),
112+
"enum_list": pl.Series(
113+
[["A", "B", "C"], ["A", "B", "C"], ["A", "B", "C"]],
114+
dtype=pl.List(pl.Enum(categories=["A", "B", "C"])),
115+
),
112116
},
113117
strict=False,
114118
)
@@ -943,3 +947,42 @@ def test_to_json_bigint(self) -> None:
943947
# Large integers should be converted to strings
944948
assert json_data[1]["A"] == "9007199254740992"
945949
assert json_data[1]["B"] == "-9007199254740992"
950+
951+
def test_to_json_enum_list(self) -> None:
952+
import polars as pl
953+
954+
data = {"A": [["A", "B", "C"], ["A", "B", "C"], ["A", "B", "C"]]}
955+
956+
data_enum = pl.DataFrame(
957+
data, schema={"A": pl.List(pl.Enum(categories=["A", "B", "C"]))}
958+
)
959+
manager = self.factory.create()(data_enum)
960+
json_data = json.loads(manager.to_json())
961+
assert json_data[0]["A"] == ["A", "B", "C"]
962+
assert json_data[1]["A"] == ["A", "B", "C"]
963+
assert json_data[2]["A"] == ["A", "B", "C"]
964+
965+
data_categorical = pl.DataFrame(
966+
data, schema={"A": pl.List(pl.Categorical())}
967+
)
968+
manager = self.factory.create()(data_categorical)
969+
json_data = json.loads(manager.to_json())
970+
assert json_data[0]["A"] == ["A", "B", "C"]
971+
assert json_data[1]["A"] == ["A", "B", "C"]
972+
assert json_data[2]["A"] == ["A", "B", "C"]
973+
974+
def test_to_json_enum_list_not_supported(self) -> None:
975+
# When this is supported, we can remove the casting to string
976+
import polars as pl
977+
978+
data = {"A": [["A", "B", "C"], ["A", "B", "C"], ["A", "B", "C"]]}
979+
980+
data_enum = pl.DataFrame(
981+
data, schema={"A": pl.List(pl.Enum(categories=["A", "B", "C"]))}
982+
)
983+
with pytest.raises(pl.exceptions.PanicException):
984+
data_enum.write_json()
985+
986+
data_list = pl.DataFrame(data, schema={"A": pl.List(pl.Categorical())})
987+
with pytest.raises(pl.exceptions.PanicException):
988+
data_list.write_json()

0 commit comments

Comments
 (0)