Skip to content

Commit 18ae631

Browse files
committed
fix: better error handling in type-mismatch for mo.ui.altair_chart
1 parent 93d8d9a commit 18ae631

File tree

2 files changed

+131
-43
lines changed

2 files changed

+131
-43
lines changed

marimo/_plugins/ui/_impl/altair_chart.py

Lines changed: 90 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -155,28 +155,54 @@ def _filter_dataframe(
155155

156156
dtype = schema[field]
157157
resolved_values = _resolve_values(values, dtype)
158-
if is_point_selection:
159-
df = df.filter(nw.col(field).is_in(resolved_values))
160-
elif len(resolved_values) == 1:
161-
df = df.filter(nw.col(field) == resolved_values[0])
162-
# Range selection
163-
elif len(resolved_values) == 2 and _is_numeric_or_date(
164-
resolved_values[0]
158+
159+
# Validate that resolved values have compatible types
160+
# If coercion failed, the values will still be strings when they should be dates/numbers
161+
if nw.Date == dtype or (
162+
nw.Datetime == dtype and isinstance(dtype, nw.Datetime)
165163
):
166-
left_value, right_value = resolved_values
167-
df = df.filter(
168-
(nw.col(field) >= left_value)
169-
& (nw.col(field) <= right_value)
170-
)
171-
# Multi-selection via range
172-
# This can happen when you use an interval selection
173-
# on categorical data
174-
elif len(resolved_values) > 1:
175-
df = df.filter(nw.col(field).is_in(resolved_values))
176-
else:
177-
raise ValueError(
178-
f"Invalid selection: {field}={resolved_values}"
164+
# Check if any values are still strings (indicating failed coercion)
165+
if any(isinstance(v, str) for v in resolved_values):
166+
LOGGER.error(
167+
f"Type mismatch for field '{field}': Column has {dtype} type, "
168+
f"but values {resolved_values} could not be properly coerced. "
169+
"Skipping this filter condition."
170+
)
171+
continue
172+
173+
try:
174+
if is_point_selection:
175+
df = df.filter(nw.col(field).is_in(resolved_values))
176+
elif len(resolved_values) == 1:
177+
df = df.filter(nw.col(field) == resolved_values[0])
178+
# Range selection
179+
elif len(resolved_values) == 2 and _is_numeric_or_date(
180+
resolved_values[0]
181+
):
182+
left_value, right_value = resolved_values
183+
df = df.filter(
184+
(nw.col(field) >= left_value)
185+
& (nw.col(field) <= right_value)
186+
)
187+
# Multi-selection via range
188+
# This can happen when you use an interval selection
189+
# on categorical data
190+
elif len(resolved_values) > 1:
191+
df = df.filter(nw.col(field).is_in(resolved_values))
192+
else:
193+
raise ValueError(
194+
f"Invalid selection: {field}={resolved_values}"
195+
)
196+
except (TypeError, ValueError, Exception) as e:
197+
# Handle type comparison errors and other database errors gracefully
198+
# (e.g., DuckDB BinderException, Polars errors, etc.)
199+
LOGGER.error(
200+
f"Error during filter comparison for field '{field}': {e}. "
201+
f"Attempted to compare {dtype} column with values {resolved_values}. "
202+
"Skipping this filter condition."
179203
)
204+
# Continue without this filter - don't break the entire operation
205+
continue
180206

181207
if not is_lazy and is_narwhals_lazyframe(df):
182208
# Undo the lazy
@@ -189,30 +215,51 @@ def _resolve_values(values: Any, dtype: Any) -> list[Any]:
189215
def _coerce_value(value: Any, dtype: Any) -> Any:
190216
import zoneinfo
191217

192-
if nw.Date == dtype:
193-
if isinstance(value, str):
194-
return datetime.date.fromisoformat(value)
195-
# Value is milliseconds since epoch
196-
# so we convert to seconds since epoch
197-
return datetime.date.fromtimestamp(value / 1000)
198-
if nw.Datetime == dtype and isinstance(dtype, nw.Datetime):
199-
if isinstance(value, str):
200-
res = datetime.datetime.fromisoformat(value)
201-
# If dtype has no timezone, but value has timezone, remove timezone without shifting
202-
if dtype.time_zone is None and res.tzinfo is not None:
203-
return res.replace(tzinfo=None)
204-
return res
205-
206-
# Value is milliseconds since epoch
207-
# so we convert to seconds since epoch
208-
return datetime.datetime.fromtimestamp(
209-
value / 1000,
210-
tz=(
211-
zoneinfo.ZoneInfo(dtype.time_zone)
212-
if dtype.time_zone
213-
else None
214-
),
218+
try:
219+
if nw.Date == dtype:
220+
if isinstance(value, str):
221+
return datetime.date.fromisoformat(value)
222+
# Value is milliseconds since epoch
223+
# so we convert to seconds since epoch
224+
if isinstance(value, (int, float)):
225+
return datetime.date.fromtimestamp(value / 1000)
226+
# If value is already a date or datetime, return as-is
227+
if isinstance(value, datetime.date):
228+
return value
229+
# Otherwise, try to convert to string then parse
230+
return datetime.date.fromisoformat(str(value))
231+
if nw.Datetime == dtype and isinstance(dtype, nw.Datetime):
232+
if isinstance(value, str):
233+
res = datetime.datetime.fromisoformat(value)
234+
# If dtype has no timezone, but value has timezone, remove timezone without shifting
235+
if dtype.time_zone is None and res.tzinfo is not None:
236+
return res.replace(tzinfo=None)
237+
return res
238+
239+
# Value is milliseconds since epoch
240+
# so we convert to seconds since epoch
241+
if isinstance(value, (int, float)):
242+
return datetime.datetime.fromtimestamp(
243+
value / 1000,
244+
tz=(
245+
zoneinfo.ZoneInfo(dtype.time_zone)
246+
if dtype.time_zone
247+
else None
248+
),
249+
)
250+
# If value is already a datetime, return as-is
251+
if isinstance(value, datetime.datetime):
252+
return value
253+
# Otherwise, try to convert to string then parse
254+
return datetime.datetime.fromisoformat(str(value))
255+
except (ValueError, TypeError, OSError) as e:
256+
# Log the error but return the original value
257+
# to avoid breaking the filter entirely
258+
LOGGER.warning(
259+
f"Failed to coerce value {value!r} to {dtype}: {e}. "
260+
"Using original value."
215261
)
262+
return value
216263
return value
217264

218265
if isinstance(values, list):

tests/_plugins/ui/_impl/test_altair_chart.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,47 @@ def test_filter_dataframe_with_dates(
241241
assert str(first) == "value1"
242242
assert str(second) == "value2"
243243

244+
@staticmethod
245+
@pytest.mark.parametrize(
246+
"df",
247+
create_dataframes(
248+
{
249+
"field": ["value1", "value2", "value3"],
250+
"date_column": [
251+
datetime.date(2020, 1, 1),
252+
datetime.date(2020, 1, 8),
253+
datetime.date(2020, 1, 10),
254+
],
255+
},
256+
),
257+
)
258+
def test_filter_dataframe_with_dates_graceful_error(
259+
df: ChartDataType,
260+
) -> None:
261+
"""Test that invalid date comparisons are handled gracefully."""
262+
# Try with invalid date strings that can't be parsed
263+
interval_selection: ChartSelection = {
264+
"signal_channel": {"date_column": ["invalid_date", "also_invalid"]}
265+
}
266+
# Should not raise an error, but skip the filter condition
267+
# and return the original dataframe
268+
filtered_df = _filter_dataframe(df, interval_selection)
269+
# Since the filter failed gracefully, we should get the full dataframe
270+
assert get_len(filtered_df) == 3
271+
272+
# Try with mixed valid/invalid values - the coercion should handle it
273+
interval_selection = {
274+
"signal_channel": {
275+
"date_column": [
276+
datetime.date(2020, 1, 1).isoformat(),
277+
"not_a_valid_date",
278+
]
279+
}
280+
}
281+
# The filter should be skipped due to type error
282+
filtered_df = _filter_dataframe(df, interval_selection)
283+
assert get_len(filtered_df) == 3
284+
244285
@staticmethod
245286
@pytest.mark.skipif(
246287
not HAS_DEPS, reason="optional dependencies not installed"

0 commit comments

Comments
 (0)