Skip to content

Commit 1f1416a

Browse files
committed
draft: Bump datafusion to 50 and arrow to 56
Signed-off-by: Adam Gutglick <[email protected]>
1 parent 29e5bb9 commit 1f1416a

File tree

8 files changed

+218
-198
lines changed

8 files changed

+218
-198
lines changed

Cargo.lock

Lines changed: 158 additions & 156 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 24 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -59,16 +59,16 @@ anyhow = "1.0.95"
5959
arbitrary = "1.3.2"
6060
arcref = "0.2.0"
6161
arrayref = "0.3.7"
62-
arrow-arith = "55.2.0"
63-
arrow-array = "55.2.0"
64-
arrow-buffer = "55.2.0"
65-
arrow-cast = "55.2.0"
66-
arrow-data = "55.2.0"
67-
arrow-ipc = "55.2.0"
68-
arrow-ord = "55.2.0"
69-
arrow-schema = "55.2.0"
70-
arrow-select = "55.2.0"
71-
arrow-string = "55.2.0"
62+
arrow-arith = "56"
63+
arrow-array = "56"
64+
arrow-buffer = "56"
65+
arrow-cast = "56"
66+
arrow-data = "56"
67+
arrow-ipc = "56"
68+
arrow-ord = "56"
69+
arrow-schema = "56"
70+
arrow-select = "56"
71+
arrow-string = "56"
7272
async-compat = "0.2.5"
7373
async-stream = "0.3.6"
7474
async-trait = "0.1.88"
@@ -88,15 +88,17 @@ crossbeam-deque = "0.8.6"
8888
crossbeam-queue = "0.3.12"
8989
crossterm = "0.29"
9090
dashmap = "6.1.0"
91-
datafusion = { version = "49", default-features = false }
92-
datafusion-catalog = { version = "49" }
93-
datafusion-common = { version = "49" }
94-
datafusion-common-runtime = { version = "49" }
95-
datafusion-datasource = { version = "49", default-features = false }
96-
datafusion-execution = { version = "49" }
97-
datafusion-expr = { version = "49" }
98-
datafusion-physical-expr = { version = "49" }
99-
datafusion-physical-plan = { version = "49" }
91+
datafusion = { version = "50", default-features = false, git = "https://github.com/apache/datafusion.git", branch = "branch-50" }
92+
datafusion-catalog = { version = "50", git = "https://github.com/apache/datafusion.git", branch = "branch-50" }
93+
datafusion-common = { version = "50", git = "https://github.com/apache/datafusion.git", branch = "branch-50" }
94+
datafusion-common-runtime = { version = "50", git = "https://github.com/apache/datafusion.git", branch = "branch-50" }
95+
datafusion-datasource = { version = "50", default-features = false, git = "https://github.com/apache/datafusion.git", branch = "branch-50" }
96+
datafusion-execution = { version = "50", git = "https://github.com/apache/datafusion.git", branch = "branch-50" }
97+
datafusion-expr = { version = "50", git = "https://github.com/apache/datafusion.git", branch = "branch-50" }
98+
datafusion-physical-expr = { version = "50", git = "https://github.com/apache/datafusion.git", branch = "branch-50" }
99+
datafusion-physical-expr-adapter = { version = "50", git = "https://github.com/apache/datafusion.git", branch = "branch-50" }
100+
datafusion-physical-expr-common = { version = "50", git = "https://github.com/apache/datafusion.git", branch = "branch-50" }
101+
datafusion-physical-plan = { version = "50", git = "https://github.com/apache/datafusion.git", branch = "branch-50" }
100102
dirs = "6.0.0"
101103
divan = { package = "codspeed-divan-compat", version = "3.0" }
102104
dyn-hash = "0.2.0"
@@ -137,7 +139,7 @@ opentelemetry = "0.30.0"
137139
opentelemetry-otlp = "0.30.0"
138140
opentelemetry_sdk = "0.30.0"
139141
parking_lot = { version = "0.12.3", features = ["nightly"] }
140-
parquet = "55.2.0"
142+
parquet = "56"
141143
paste = "1.0.15"
142144
pco = "0.4.4"
143145
pin-project = "1.1.5"
@@ -186,9 +188,8 @@ thiserror = "2.0.3"
186188
tokio = { version = "1.46" }
187189
tokio-stream = "0.1.17"
188190
tokio-util = { version = "0.7.16" }
189-
# replace these with releases
190-
tpchgen = { git = "https://github.com/clflushopt/tpchgen-rs.git", rev = "d849ff430cd52250f6891ed4d5e3adad77bb2698" }
191-
tpchgen-arrow = { git = "https://github.com/clflushopt/tpchgen-rs.git", rev = "d849ff430cd52250f6891ed4d5e3adad77bb2698" }
191+
tpchgen = { version = "2" }
192+
tpchgen-arrow = { version = "2" }
192193
tracing = { version = "0.1.41" }
193194
tracing-perfetto = "0.1.5"
194195
tracing-subscriber = "0.3.20"

bench-vortex/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ datafusion = { workspace = true, features = [
2929
"parquet",
3030
"datetime_expressions",
3131
"nested_expressions",
32+
"unicode_expressions"
3233
] }
3334
datafusion-common = { workspace = true }
3435
datafusion-physical-plan = { workspace = true }

vortex-datafusion/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@ datafusion-datasource = { workspace = true, default-features = false }
2424
datafusion-execution = { workspace = true }
2525
datafusion-expr = { workspace = true }
2626
datafusion-physical-expr = { workspace = true }
27+
datafusion-physical-expr-adapter = { workspace = true }
28+
datafusion-physical-expr-common = { workspace = true }
2729
datafusion-physical-plan = { workspace = true }
2830
futures = { workspace = true }
2931
itertools = { workspace = true }

vortex-datafusion/src/convert/exprs.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ use std::sync::Arc;
66
use arrow_schema::{DataType, Schema};
77
use datafusion_expr::Operator as DFOperator;
88
use datafusion_physical_expr::{PhysicalExpr, PhysicalExprRef};
9+
use datafusion_physical_expr_common::physical_expr::is_dynamic_physical_expr;
910
use datafusion_physical_plan::expressions as df_expr;
1011
use vortex::error::{VortexResult, vortex_bail, vortex_err};
1112
use vortex::expr::{BinaryExpr, ExprRef, LikeExpr, Operator, and, get_item, lit, root};
@@ -122,6 +123,11 @@ impl TryFromDataFusion<DFOperator> for Operator {
122123
}
123124

124125
pub(crate) fn can_be_pushed_down(expr: &PhysicalExprRef, schema: &Schema) -> bool {
126+
// We currently do not support pushdown of dynamic expressions in DF
127+
if is_dynamic_physical_expr(expr) {
128+
return false;
129+
}
130+
125131
let expr = expr.as_any();
126132
if let Some(binary) = expr.downcast_ref::<df_expr::BinaryExpr>() {
127133
can_binary_be_pushed_down(binary, schema)

vortex-datafusion/src/persistent/opener.rs

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,9 @@ use datafusion_datasource::file_meta::FileMeta;
1010
use datafusion_datasource::file_stream::{FileOpenFuture, FileOpener};
1111
use datafusion_datasource::schema_adapter::SchemaAdapterFactory;
1212
use datafusion_datasource::{FileRange, PartitionedFile};
13-
use datafusion_physical_expr::schema_rewriter::PhysicalExprAdapterFactory;
1413
use datafusion_physical_expr::simplifier::PhysicalExprSimplifier;
1514
use datafusion_physical_expr::{PhysicalExprRef, split_conjunction};
15+
use datafusion_physical_expr_adapter::PhysicalExprAdapterFactory;
1616
use futures::{FutureExt, StreamExt, TryStreamExt, stream};
1717
use object_store::ObjectStore;
1818
use object_store::path::Path;
@@ -215,9 +215,7 @@ impl FileOpener for VortexOpener {
215215
))))
216216
})
217217
.try_flatten()
218-
.map(move |batch| {
219-
batch.and_then(|b| schema_mapping.map_batch(b).map_err(Into::into))
220-
})
218+
.map(move |batch| batch.and_then(|b| schema_mapping.map_batch(b)))
221219
.boxed();
222220

223221
Ok(stream)
@@ -256,15 +254,14 @@ fn byte_range_to_row_range(byte_range: Range<u64>, row_count: u64, total_size: u
256254
#[cfg(test)]
257255
mod tests {
258256
use chrono::Utc;
259-
use datafusion::arrow;
260257
use datafusion::arrow::array::RecordBatch;
261258
use datafusion::arrow::datatypes::{DataType, Schema};
262259
use datafusion::arrow::util::pretty::print_batches;
263260
use datafusion::common::record_batch;
264261
use datafusion::datasource::schema_adapter::DefaultSchemaAdapterFactory;
265262
use datafusion::logical_expr::{col, lit};
266263
use datafusion::physical_expr::planner::logical2physical;
267-
use datafusion::physical_expr::schema_rewriter::DefaultPhysicalExprAdapterFactory;
264+
use datafusion::physical_expr_adapter::DefaultPhysicalExprAdapterFactory;
268265
use datafusion::scalar::ScalarValue;
269266
use futures::stream::BoxStream;
270267
use itertools::Itertools;
@@ -335,7 +332,7 @@ mod tests {
335332
}
336333

337334
async fn count_data(
338-
mut stream: BoxStream<'static, Result<RecordBatch, ArrowError>>,
335+
mut stream: BoxStream<'static, Result<RecordBatch, DataFusionError>>,
339336
) -> anyhow::Result<(usize, usize)> {
340337
let mut batches = vec![];
341338

vortex-datafusion/src/persistent/source.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,10 @@ use datafusion_datasource::file::FileSource;
1212
use datafusion_datasource::file_scan_config::FileScanConfig;
1313
use datafusion_datasource::file_stream::FileOpener;
1414
use datafusion_datasource::schema_adapter::{DefaultSchemaAdapterFactory, SchemaAdapterFactory};
15-
use datafusion_physical_expr::schema_rewriter::{
15+
use datafusion_physical_expr::{PhysicalExprRef, conjunction};
16+
use datafusion_physical_expr_adapter::{
1617
DefaultPhysicalExprAdapterFactory, PhysicalExprAdapterFactory,
1718
};
18-
use datafusion_physical_expr::{PhysicalExprRef, conjunction};
1919
use datafusion_physical_plan::filter_pushdown::{
2020
FilterPushdownPropagation, PushedDown, PushedDownPredicate,
2121
};

vortex-dtype/src/arrow.rs

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,7 @@
1515
1616
use std::sync::Arc;
1717

18-
use arrow_schema::{
19-
DECIMAL128_MAX_PRECISION, DataType, Field, FieldRef, Fields, Schema, SchemaBuilder, SchemaRef,
20-
};
18+
use arrow_schema::{DataType, Field, FieldRef, Fields, Schema, SchemaBuilder, SchemaRef};
2119
use vortex_error::{VortexExpect, VortexResult, vortex_bail, vortex_err};
2220

2321
use crate::datetime::arrow::{make_arrow_temporal_dtype, make_temporal_ext_dtype};
@@ -61,8 +59,11 @@ impl TryFromArrowType<&DataType> for PType {
6159
impl TryFromArrowType<&DataType> for DecimalDType {
6260
fn try_from_arrow(value: &DataType) -> VortexResult<Self> {
6361
match value {
64-
DataType::Decimal128(precision, scale) => Self::try_new(*precision, *scale),
65-
DataType::Decimal256(precision, scale) => Self::try_new(*precision, *scale),
62+
DataType::Decimal32(precision, scale)
63+
| DataType::Decimal64(precision, scale)
64+
| DataType::Decimal128(precision, scale)
65+
| DataType::Decimal256(precision, scale) => Self::try_new(*precision, *scale),
66+
6667
_ => Err(vortex_err!(
6768
"Arrow datatype {:?} cannot be converted to DecimalDType",
6869
value
@@ -108,7 +109,10 @@ impl FromArrowType<(&DataType, Nullability)> for DType {
108109

109110
match data_type {
110111
DataType::Null => DType::Null,
111-
DataType::Decimal128(precision, scale) | DataType::Decimal256(precision, scale) => {
112+
DataType::Decimal32(precision, scale)
113+
| DataType::Decimal64(precision, scale)
114+
| DataType::Decimal128(precision, scale)
115+
| DataType::Decimal256(precision, scale) => {
112116
DType::Decimal(DecimalDType::new(*precision, *scale), nullability)
113117
}
114118
DataType::Boolean => DType::Bool(nullability),
@@ -188,10 +192,17 @@ impl DType {
188192
PType::F64 => DataType::Float64,
189193
},
190194
DType::Decimal(dt, _) => {
191-
if dt.precision() > DECIMAL128_MAX_PRECISION {
192-
DataType::Decimal256(dt.precision(), dt.scale())
193-
} else {
194-
DataType::Decimal128(dt.precision(), dt.scale())
195+
let precision = dt.precision();
196+
let scale = dt.scale();
197+
match dt.precision() {
198+
// DECIMAL32_MAX_PRECISION
199+
0..=9 => DataType::Decimal32(precision, scale),
200+
// DECIMAL64_MAX_PRECISION
201+
10..=18 => DataType::Decimal64(precision, scale),
202+
// DECIMAL128_MAX_PRECISION
203+
19..=38 => DataType::Decimal128(precision, scale),
204+
// DECIMAL256_MAX_PRECISION
205+
39.. => DataType::Decimal256(precision, scale),
195206
}
196207
}
197208
DType::Utf8(_) => DataType::Utf8View,

0 commit comments

Comments
 (0)