Fix dataframe queries failing on empty datasets (#11846)

abey79 · web-flow · commit 7d1e66d41051 · 2025-11-10T15:53:58.000-05:00
### Related - Fixes RR-2819 ### What Title. This was due to matching selectors to actual schema, which involved sorbet stuff, which failed on missing `RowId` columns. Fixed by short-circuiting the empty dataset case. Also added a test which, like many more, should have been here in the first place 🤦🏻
diff --git a/crates/store/re_datafusion/src/dataframe_query_common.rs b/crates/store/re_datafusion/src/dataframe_query_common.rs
@@ -308,6 +308,12 @@ fn compute_schema_for_query(
     dataset_schema: &Schema,
     query_expression: &QueryExpression,
 ) -> Result<SchemaRef, DataFusionError> {
+    // Short circuit for empty datasets. Needed because `ChunkColumnDescriptors::try_from_arrow_fields`
+    // needs row ids, which we only have for non-empty datasets.
+    if dataset_schema.fields.is_empty() {
+        return Ok(Arc::new(Schema::empty()));
+    }
+
     // Schema returned from `get_dataset_schema` does not match the required ChunkColumnDescriptors ordering
     // which is row id, then time, then data. We don't need perfect ordering other than that.
     let mut fields = dataset_schema
diff --git a/rerun_py/tests/e2e_redap_tests/test_dataframe_query_view.py b/rerun_py/tests/e2e_redap_tests/test_dataframe_query_view.py
@@ -0,0 +1,22 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import pytest
+
+if TYPE_CHECKING:
+    from .conftest import ServerInstance
+
+
+# TODO(ab): quite obviously, there needs to be many more tests here.
+
+
+@pytest.mark.parametrize("index", [None, "does_not_exist"])
+def test_dataframe_query_empty_dataset(index: str | None, server_instance: ServerInstance) -> None:
+    client = server_instance.client
+
+    ds = client.create_dataset("empty_dataset")
+
+    df = ds.dataframe_query_view(index=index, contents="/**").df()
+
+    assert df.count() == 0