Skip to content

Commit ebc0ac2

Browse files
authored
improvement: migrate mo.ui.dataframe to narwhals; support duckdb, ibis, pandas, polars (#6772)
This migrates `mo.ui.dataframe` logic to be backed by narwhals to both 1) reduce maintenance burden, 2) support more dataframe types. We now support: **duckdb, ibis, pandas, polars** This will also fix duckdb filtering when using `mo.ui.table()`
1 parent 03cb04d commit ebc0ac2

File tree

13 files changed

+1099
-1590
lines changed

13 files changed

+1099
-1590
lines changed

marimo/_plugins/ui/_impl/dataframes/dataframe.py

Lines changed: 35 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import sys
66
from dataclasses import dataclass
77
from typing import (
8+
TYPE_CHECKING,
89
Any,
910
Callable,
1011
Final,
@@ -13,6 +14,8 @@
1314
Union,
1415
)
1516

17+
import narwhals.stable.v2 as nw
18+
1619
from marimo._output.rich_help import mddoc
1720
from marimo._plugins.ui._core.ui_element import UIElement
1821
from marimo._plugins.ui._impl.dataframes.transforms.apply import (
@@ -44,8 +47,12 @@
4447
)
4548
from marimo._runtime.functions import EmptyArgs, Function
4649
from marimo._utils.memoize import memoize_last_value
50+
from marimo._utils.narwhals_utils import is_narwhals_lazyframe
4751
from marimo._utils.parse_dataclass import parse_raw
4852

53+
if TYPE_CHECKING:
54+
from narwhals.typing import IntoLazyFrame
55+
4956

5057
@dataclass
5158
class GetDataFrameResponse:
@@ -86,7 +93,7 @@ def __init__(self, error: str):
8693
class dataframe(UIElement[dict[str, Any], DataFrameType]):
8794
"""Run transformations on a DataFrame or series.
8895
89-
Currently only Pandas or Polars DataFrames are supported.
96+
Currently supports Pandas, Polars, Ibis, Pyarrow, and DuckDB.
9097
9198
Examples:
9299
```python
@@ -138,14 +145,17 @@ def __init__(
138145
except Exception:
139146
pass
140147

148+
# Make the dataframe lazy and keep track of whether it was lazy originally
149+
nw_df: nw.LazyFrame[Any] = nw.from_native(df, pass_through=False)
150+
self._was_lazy = is_narwhals_lazyframe(nw_df)
151+
nw_df = nw_df.lazy()
152+
141153
self._limit = limit
142154
self._dataframe_name = dataframe_name
143155
self._data = df
144156
self._handler = handler
145157
self._manager = self._get_cached_table_manager(df, self._limit)
146-
self._transform_container = TransformsContainer[DataFrameType](
147-
df, handler
148-
)
158+
self._transform_container = TransformsContainer(nw_df, handler)
149159
self._error: Optional[str] = None
150160
self._last_transforms = Transformations([])
151161
self._page_size = page_size or 5 # Default to 5 rows (.head())
@@ -210,12 +220,14 @@ def _get_dataframe(self, _args: EmptyArgs) -> GetDataFrameResponse:
210220
row_headers=manager.get_row_headers(),
211221
field_types=manager.get_field_types(),
212222
python_code=self._handler.as_python_code(
223+
self._transform_container._snapshot_df,
213224
self._dataframe_name,
214-
# manager.get_column_names(),
215225
self._manager.get_column_names(),
216226
self._last_transforms.transforms,
217227
),
218-
sql_code=self._handler.as_sql_code(manager.data),
228+
sql_code=self._handler.as_sql_code(
229+
self._transform_container._snapshot_df
230+
),
219231
)
220232

221233
def _get_column_values(
@@ -245,19 +257,22 @@ def _get_column_values(
245257
def _convert_value(self, value: dict[str, Any]) -> DataFrameType:
246258
if value is None:
247259
self._error = None
248-
return self._data
260+
return _maybe_collect(self._data, self._was_lazy)
249261

250262
try:
251263
transformations = parse_raw(value, Transformations)
252264
result = self._transform_container.apply(transformations)
253265
self._error = None
254266
self._last_transforms = transformations
255-
return result
267+
return _maybe_collect(result, self._was_lazy)
256268
except Exception as e:
257269
error = f"Error applying dataframe transform: {str(e)}\n\n"
258270
sys.stderr.write(error)
259271
self._error = error
260-
return self._data
272+
return _maybe_collect(
273+
nw.from_native(self._data, pass_through=False).lazy(),
274+
self._was_lazy,
275+
)
261276

262277
def _search(self, args: SearchTableArgs) -> SearchTableResponse:
263278
offset = args.page_number * args.page_size
@@ -304,7 +319,7 @@ def _apply_filters_query_sort(
304319
self,
305320
query: Optional[str],
306321
sort: Optional[list[SortArgs]],
307-
) -> TableManager[Any]:
322+
) -> TableManager[DataFrameType]:
308323
result = self._get_cached_table_manager(self._value, self._limit)
309324

310325
if query:
@@ -320,9 +335,17 @@ def _apply_filters_query_sort(
320335

321336
@memoize_last_value
322337
def _get_cached_table_manager(
323-
self, value: Any, limit: Optional[int]
324-
) -> TableManager[Any]:
338+
self, value: DataFrameType, limit: Optional[int]
339+
) -> TableManager[DataFrameType]:
325340
tm = get_table_manager(value)
326341
if limit is not None:
327342
tm = tm.take(limit, 0)
328343
return tm
344+
345+
346+
def _maybe_collect(
347+
df: nw.LazyFrame[IntoLazyFrame], was_lazy: bool
348+
) -> DataFrameType:
349+
if was_lazy:
350+
return df.collect().to_native() # type: ignore[no-any-return]
351+
return df.to_native()

marimo/_plugins/ui/_impl/dataframes/transforms/apply.py

Lines changed: 50 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,29 @@
11
# Copyright 2024 Marimo. All rights reserved.
22
from __future__ import annotations
33

4-
from typing import Any, Generic, TypeVar
4+
from typing import TYPE_CHECKING, TypeVar
55

6-
from narwhals.dependencies import is_narwhals_dataframe
7-
8-
from marimo._dependencies.dependencies import DependencyManager
96
from marimo._plugins.ui._impl.dataframes.transforms.handlers import (
10-
IbisTransformHandler,
11-
PandasTransformHandler,
12-
PolarsTransformHandler,
7+
NarwhalsTransformHandler,
138
)
149
from marimo._plugins.ui._impl.dataframes.transforms.types import (
10+
DataFrameType,
1511
Transform,
1612
Transformations,
1713
TransformHandler,
1814
TransformType,
1915
)
2016
from marimo._utils.assert_never import assert_never
17+
from marimo._utils.narwhals_utils import can_narwhalify, is_narwhals_lazyframe
2118

2219
T = TypeVar("T")
2320

2421

22+
if TYPE_CHECKING:
23+
import narwhals.stable.v2 as nw
24+
from narwhals.typing import IntoLazyFrame
25+
26+
2527
def _handle(df: T, handler: TransformHandler[T], transform: Transform) -> T:
2628
if transform.type is TransformType.COLUMN_CONVERSION:
2729
return handler.handle_column_conversion(df, transform)
@@ -50,6 +52,33 @@ def _handle(df: T, handler: TransformHandler[T], transform: Transform) -> T:
5052
assert_never(transform.type)
5153

5254

55+
def apply_transforms_to_df(
56+
df: DataFrameType, transform: Transform
57+
) -> DataFrameType:
58+
"""Apply a transform to a dataframe using NarwhalsTransformHandler."""
59+
if not can_narwhalify(df):
60+
raise ValueError(
61+
f"Unsupported dataframe type. Must be Pandas, Polars, Ibis, Pyarrow, or DuckDB. Got: {type(df)}"
62+
)
63+
64+
import narwhals.stable.v2 as nw
65+
66+
nw_df = nw.from_native(df)
67+
was_lazy = is_narwhals_lazyframe(nw_df)
68+
nw_df = nw_df.lazy()
69+
70+
result_nw = _apply_transforms(
71+
nw_df,
72+
NarwhalsTransformHandler(),
73+
Transformations(transforms=[transform]),
74+
)
75+
76+
if was_lazy:
77+
return result_nw.to_native()
78+
79+
return result_nw.collect().to_native() # type: ignore[no-any-return]
80+
81+
5382
def _apply_transforms(
5483
df: T, handler: TransformHandler[T], transforms: Transformations
5584
) -> T:
@@ -61,54 +90,39 @@ def _apply_transforms(
6190

6291

6392
def get_handler_for_dataframe(
64-
df: Any,
65-
) -> TransformHandler[Any]:
93+
df: DataFrameType,
94+
) -> NarwhalsTransformHandler:
6695
"""
6796
Gets the handler for the given dataframe.
6897
6998
raises ValueError if the dataframe type is not supported.
7099
"""
71-
if DependencyManager.pandas.imported():
72-
import pandas as pd
73-
74-
if isinstance(df, pd.DataFrame):
75-
return PandasTransformHandler()
76-
if DependencyManager.polars.imported():
77-
import polars as pl
78-
79-
if isinstance(df, pl.DataFrame):
80-
return PolarsTransformHandler()
100+
if not can_narwhalify(df):
101+
raise ValueError(
102+
f"Unsupported dataframe type. Must be Pandas, Polars, Ibis, Pyarrow, or DuckDB. Got: {type(df)}"
103+
)
81104

82-
if DependencyManager.ibis.imported():
83-
import ibis # type: ignore
84-
85-
if isinstance(df, ibis.Table):
86-
return IbisTransformHandler()
87-
88-
if DependencyManager.narwhals.imported():
89-
if is_narwhals_dataframe(df):
90-
return get_handler_for_dataframe(df.to_native())
91-
92-
raise ValueError(
93-
"Unsupported dataframe type. Must be Pandas or Polars."
94-
f" Got: {type(df)}"
95-
)
105+
return NarwhalsTransformHandler()
96106

97107

98-
class TransformsContainer(Generic[T]):
108+
class TransformsContainer:
99109
"""
100110
Keeps internal state of the last transformation applied to the dataframe.
101111
So that we can incrementally apply transformations.
102112
"""
103113

104-
def __init__(self, df: T, handler: TransformHandler[T]) -> None:
114+
def __init__(
115+
self,
116+
df: nw.LazyFrame[IntoLazyFrame],
117+
handler: NarwhalsTransformHandler,
118+
) -> None:
105119
self._original_df = df
106120
# The dataframe for the given transform.
107121
self._snapshot_df = df
108122
self._handler = handler
109123
self._transforms: list[Transform] = []
110124

111-
def apply(self, transform: Transformations) -> T:
125+
def apply(self, transform: Transformations) -> nw.LazyFrame[IntoLazyFrame]:
112126
"""
113127
Applies the given transformations to the dataframe.
114128
"""

0 commit comments

Comments
 (0)