diff --git a/Cargo.lock b/Cargo.lock index 833f543e13fde..b385aae1fa8d4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -240,8 +240,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" -version = "56.0.0" -source = "git+https://github.com/Coralogix/arrow-rs.git?tag=v56.0.0-cx.0#a6a352b6661b5da018b1e998ab3a8f14797ae86b" +version = "56.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e833808ff2d94ed40d9379848a950d995043c7fb3e81a30b383f4c6033821cc" dependencies = [ "arrow-arith", "arrow-array", @@ -263,8 +264,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "56.0.0" -source = "git+https://github.com/Coralogix/arrow-rs.git?tag=v56.0.0-cx.0#a6a352b6661b5da018b1e998ab3a8f14797ae86b" +version = "56.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad08897b81588f60ba983e3ca39bda2b179bdd84dced378e7df81a5313802ef8" dependencies = [ "arrow-array", "arrow-buffer", @@ -276,8 +278,9 @@ dependencies = [ [[package]] name = "arrow-array" -version = "56.0.0" -source = "git+https://github.com/Coralogix/arrow-rs.git?tag=v56.0.0-cx.0#a6a352b6661b5da018b1e998ab3a8f14797ae86b" +version = "56.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8548ca7c070d8db9ce7aa43f37393e4bfcf3f2d3681df278490772fd1673d08d" dependencies = [ "ahash 0.8.12", "arrow-buffer", @@ -286,14 +289,15 @@ dependencies = [ "chrono", "chrono-tz", "half", - "hashbrown 0.15.5", + "hashbrown 0.16.1", "num", ] [[package]] name = "arrow-buffer" -version = "56.0.0" -source = "git+https://github.com/Coralogix/arrow-rs.git?tag=v56.0.0-cx.0#a6a352b6661b5da018b1e998ab3a8f14797ae86b" +version = "56.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e003216336f70446457e280807a73899dd822feaf02087d31febca1363e2fccc" dependencies = [ "bytes", "half", @@ -302,8 +306,9 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "56.0.0" -source = "git+https://github.com/Coralogix/arrow-rs.git?tag=v56.0.0-cx.0#a6a352b6661b5da018b1e998ab3a8f14797ae86b" +version = "56.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "919418a0681298d3a77d1a315f625916cb5678ad0d74b9c60108eb15fd083023" dependencies = [ "arrow-array", "arrow-buffer", @@ -322,8 +327,9 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "56.0.0" -source = "git+https://github.com/Coralogix/arrow-rs.git?tag=v56.0.0-cx.0#a6a352b6661b5da018b1e998ab3a8f14797ae86b" +version = "56.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa9bf02705b5cf762b6f764c65f04ae9082c7cfc4e96e0c33548ee3f67012eb" dependencies = [ "arrow-array", "arrow-cast", @@ -336,8 +342,9 @@ dependencies = [ [[package]] name = "arrow-data" -version = "56.0.0" -source = "git+https://github.com/Coralogix/arrow-rs.git?tag=v56.0.0-cx.0#a6a352b6661b5da018b1e998ab3a8f14797ae86b" +version = "56.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5c64fff1d142f833d78897a772f2e5b55b36cb3e6320376f0961ab0db7bd6d0" dependencies = [ "arrow-buffer", "arrow-schema", @@ -347,8 +354,9 @@ dependencies = [ [[package]] name = "arrow-flight" -version = "56.0.0" -source = "git+https://github.com/Coralogix/arrow-rs.git?tag=v56.0.0-cx.0#a6a352b6661b5da018b1e998ab3a8f14797ae86b" +version = "56.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c8b0ba0784d56bc6266b79f5de7a24b47024e7b3a0045d2ad4df3d9b686099f" dependencies = [ "arrow-arith", "arrow-array", @@ -373,13 +381,15 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "56.0.0" -source = "git+https://github.com/Coralogix/arrow-rs.git?tag=v56.0.0-cx.0#a6a352b6661b5da018b1e998ab3a8f14797ae86b" +version = "56.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d3594dcddccc7f20fd069bc8e9828ce37220372680ff638c5e00dea427d88f5" dependencies = [ "arrow-array", "arrow-buffer", "arrow-data", "arrow-schema", + "arrow-select", "flatbuffers", "lz4_flex", "zstd", @@ -387,8 +397,9 @@ dependencies = [ [[package]] name = "arrow-json" -version = "56.0.0" -source = "git+https://github.com/Coralogix/arrow-rs.git?tag=v56.0.0-cx.0#a6a352b6661b5da018b1e998ab3a8f14797ae86b" +version = "56.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88cf36502b64a127dc659e3b305f1d993a544eab0d48cce704424e62074dc04b" dependencies = [ "arrow-array", "arrow-buffer", @@ -408,8 +419,9 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "56.0.0" -source = "git+https://github.com/Coralogix/arrow-rs.git?tag=v56.0.0-cx.0#a6a352b6661b5da018b1e998ab3a8f14797ae86b" +version = "56.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c8f82583eb4f8d84d4ee55fd1cb306720cddead7596edce95b50ee418edf66f" dependencies = [ "arrow-array", "arrow-buffer", @@ -420,8 +432,9 @@ dependencies = [ [[package]] name = "arrow-pyarrow" -version = "56.0.0" -source = "git+https://github.com/Coralogix/arrow-rs.git?tag=v56.0.0-cx.0#a6a352b6661b5da018b1e998ab3a8f14797ae86b" +version = "56.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d924b32e96f8bb74d94cd82bd97b313c432fcb0ea331689ef9e7c6b8be4b258" dependencies = [ "arrow-array", "arrow-data", @@ -431,8 +444,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "56.0.0" -source = "git+https://github.com/Coralogix/arrow-rs.git?tag=v56.0.0-cx.0#a6a352b6661b5da018b1e998ab3a8f14797ae86b" +version = "56.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d07ba24522229d9085031df6b94605e0f4b26e099fb7cdeec37abd941a73753" dependencies = [ "arrow-array", "arrow-buffer", @@ -443,8 +457,9 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "56.0.0" -source = "git+https://github.com/Coralogix/arrow-rs.git?tag=v56.0.0-cx.0#a6a352b6661b5da018b1e998ab3a8f14797ae86b" +version = "56.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b3aa9e59c611ebc291c28582077ef25c97f1975383f1479b12f3b9ffee2ffabe" dependencies = [ "bitflags 2.9.1", "serde", @@ -453,8 +468,9 @@ dependencies = [ [[package]] name = "arrow-select" -version = "56.0.0" -source = "git+https://github.com/Coralogix/arrow-rs.git?tag=v56.0.0-cx.0#a6a352b6661b5da018b1e998ab3a8f14797ae86b" +version = "56.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c41dbbd1e97bfcaee4fcb30e29105fb2c75e4d82ae4de70b792a5d3f66b2e7a" dependencies = [ "ahash 0.8.12", "arrow-array", @@ -466,8 +482,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "56.0.0" -source = "git+https://github.com/Coralogix/arrow-rs.git?tag=v56.0.0-cx.0#a6a352b6661b5da018b1e998ab3a8f14797ae86b" +version = "56.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53f5183c150fbc619eede22b861ea7c0eebed8eaac0333eaa7f6da5205fd504d" dependencies = [ "arrow-array", "arrow-buffer", @@ -1485,13 +1502,13 @@ checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" [[package]] name = "comfy-table" -version = "7.1.0" +version = "7.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c64043d6c7b7a4c58e39e7efccfdea7b93d885a795d0c054a69dbbf4dd52686" +checksum = "e0d05af1e006a2407bedef5af410552494ce5be9090444dbbcb57258c1af3d56" dependencies = [ - "strum 0.25.0", - "strum_macros 0.25.3", - "unicode-width 0.1.14", + "strum 0.26.3", + "strum_macros 0.26.4", + "unicode-width 0.2.1", ] [[package]] @@ -3371,6 +3388,12 @@ dependencies = [ "foldhash", ] +[[package]] +name = "hashbrown" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" + [[package]] name = "heck" version = "0.3.3" @@ -3380,12 +3403,6 @@ dependencies = [ "unicode-segmentation", ] -[[package]] -name = "heck" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" - [[package]] name = "heck" version = "0.5.0" @@ -4523,8 +4540,9 @@ dependencies = [ [[package]] name = "parquet" -version = "56.0.0" -source = "git+https://github.com/Coralogix/arrow-rs.git?tag=v56.0.0-cx.0#a6a352b6661b5da018b1e998ab3a8f14797ae86b" +version = "56.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0dbd48ad52d7dccf8ea1b90a3ddbfaea4f69878dd7683e51c507d4bc52b5b27" dependencies = [ "ahash 0.8.12", "arrow-array", @@ -4541,7 +4559,7 @@ dependencies = [ "flate2", "futures", "half", - "hashbrown 0.15.5", + "hashbrown 0.16.1", "lz4_flex", "num", "num-bigint", @@ -6204,9 +6222,9 @@ dependencies = [ [[package]] name = "strum" -version = "0.25.0" +version = "0.26.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "290d54ea6f91c969195bdbcd7442c8c2a2ba87da8bf60a7ee86a235d4bc1e125" +checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06" [[package]] name = "strum" @@ -6216,11 +6234,11 @@ checksum = "af23d6f6c1a224baef9d3f61e287d2761385a5b88fdab4eb4c6f11aeb54c4bcf" [[package]] name = "strum_macros" -version = "0.25.3" +version = "0.26.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23dc1fa9ac9c169a78ba62f0b841814b7abae11bdd047b9c58f893439e309ea0" +checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be" dependencies = [ - "heck 0.4.1", + "heck 0.5.0", "proc-macro2", "quote", "rustversion", diff --git a/Cargo.toml b/Cargo.toml index 6bd0149121002..7b7d7e3c599bf 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -90,19 +90,12 @@ ahash = { version = "0.8", default-features = false, features = [ "runtime-rng", ] } apache-avro = { version = "0.20", default-features = false } -arrow = { git = "https://github.com/Coralogix/arrow-rs.git", tag = "v56.0.0-cx.0", features = [ - "prettyprint", - "chrono-tz", -] } -arrow-buffer = { git = "https://github.com/Coralogix/arrow-rs.git", tag = "v56.0.0-cx.0", default-features = false } -arrow-flight = { git = "https://github.com/Coralogix/arrow-rs.git", tag = "v56.0.0-cx.0", features = [ - "flight-sql-experimental", -] } -arrow-ipc = { git = "https://github.com/Coralogix/arrow-rs.git", tag = "v56.0.0-cx.0", default-features = false, features = [ - "lz4", -] } -arrow-ord = { git = "https://github.com/Coralogix/arrow-rs.git", tag = "v56.0.0-cx.0", default-features = false } -arrow-schema = { git = "https://github.com/Coralogix/arrow-rs.git", tag = "v56.0.0-cx.0", default-features = false } +arrow = { version = "56.0.0", features = ["prettyprint", "chrono-tz"] } +arrow-buffer = { version = "56.0.0", default-features = false } +arrow-flight = { version = "56.0.0", features = ["flight-sql-experimental"] } +arrow-ipc = { version = "56.0.0", default-features = false, features = ["lz4"] } +arrow-ord = { version = "56.0.0", default-features = false } +arrow-schema = { version = "56.0.0", default-features = false } async-trait = "0.1.89" bigdecimal = "0.4.8" bytes = "1.10" @@ -157,12 +150,7 @@ itertools = "0.14" log = "^0.4" object_store = { version = ">=0.12.3, <0.13", default-features = false } parking_lot = "0.12" -parquet = { git = "https://github.com/Coralogix/arrow-rs.git", tag = "v56.0.0-cx.0", default-features = false, features = [ - "arrow", - "async", - "object_store", - "encryption", -] } +parquet = { version = "56.0.0", default-features = false, features = ["arrow", "async", "object_store", "encryption"] } pbjson = { version = "0.7.0" } pbjson-types = "0.7" # Should match arrow-flight's version of prost. diff --git a/datafusion/datasource-parquet/src/metadata.rs b/datafusion/datasource-parquet/src/metadata.rs index 71c81a25001bf..11eeae205f909 100644 --- a/datafusion/datasource-parquet/src/metadata.rs +++ b/datafusion/datasource-parquet/src/metadata.rs @@ -39,7 +39,9 @@ use object_store::path::Path; use object_store::{ObjectMeta, ObjectStore}; use parquet::arrow::arrow_reader::statistics::StatisticsConverter; use parquet::arrow::parquet_to_arrow_schema; -use parquet::file::metadata::{ParquetMetaData, ParquetMetaDataReader, RowGroupMetaData}; +use parquet::file::metadata::{ + PageIndexPolicy, ParquetMetaData, ParquetMetaDataReader, RowGroupMetaData, +}; use std::any::Any; use std::collections::HashMap; use std::sync::Arc; @@ -148,7 +150,7 @@ impl<'a> DFParquetMetadata<'a> { if cache_metadata && file_metadata_cache.is_some() { // Need to retrieve the entire metadata for the caching to be effective. - reader = reader.with_page_indexes(true); + reader = reader.with_page_index_policy(PageIndexPolicy::Required); } let metadata = Arc::new( diff --git a/datafusion/datasource-parquet/src/opener.rs b/datafusion/datasource-parquet/src/opener.rs index 93a3d4af54326..0eadf6b0270c0 100644 --- a/datafusion/datasource-parquet/src/opener.rs +++ b/datafusion/datasource-parquet/src/opener.rs @@ -54,7 +54,7 @@ use log::debug; use parquet::arrow::arrow_reader::{ArrowReaderMetadata, ArrowReaderOptions}; use parquet::arrow::async_reader::AsyncFileReader; use parquet::arrow::{ParquetRecordBatchStreamBuilder, ProjectionMask}; -use parquet::file::metadata::ParquetMetaDataReader; +use parquet::file::metadata::{PageIndexPolicy, ParquetMetaDataReader}; /// Implements [`FileOpener`] for a parquet file pub(super) struct ParquetOpener { @@ -652,8 +652,8 @@ async fn load_page_index( if missing_column_index || missing_offset_index { let m = Arc::try_unwrap(Arc::clone(parquet_metadata)) .unwrap_or_else(|e| e.as_ref().clone()); - let mut reader = - ParquetMetaDataReader::new_with_metadata(m).with_page_indexes(true); + let mut reader = ParquetMetaDataReader::new_with_metadata(m) + .with_page_index_policy(PageIndexPolicy::Required); reader.load_page_index(input).await?; let new_parquet_metadata = reader.finish()?; let new_arrow_reader = diff --git a/datafusion/physical-plan/src/spill/mod.rs b/datafusion/physical-plan/src/spill/mod.rs index fab62bff840f6..fbea6ae85a390 100644 --- a/datafusion/physical-plan/src/spill/mod.rs +++ b/datafusion/physical-plan/src/spill/mod.rs @@ -752,7 +752,7 @@ mod tests { .unwrap(); let size = get_record_batch_memory_size(&batch); - assert_eq!(size, 8320); + assert_eq!(size, 8208); } // ==== Spill manager tests ==== diff --git a/datafusion/sqllogictest/test_files/explain_tree.slt b/datafusion/sqllogictest/test_files/explain_tree.slt index 0df361a75bae2..b4916cb6e6ec7 100644 --- a/datafusion/sqllogictest/test_files/explain_tree.slt +++ b/datafusion/sqllogictest/test_files/explain_tree.slt @@ -1314,7 +1314,7 @@ physical_plan 11)┌─────────────┴─────────────┐┌─────────────┴─────────────┐ 12)│ DataSourceExec ││ DataSourceExec │ 13)│ -------------------- ││ -------------------- │ -14)│ bytes: 6040 ││ bytes: 6040 │ +14)│ bytes: 5932 ││ bytes: 5932 │ 15)│ format: memory ││ format: memory │ 16)│ rows: 1 ││ rows: 1 │ 17)└───────────────────────────┘└───────────────────────────┘ @@ -1798,7 +1798,7 @@ physical_plan 11)┌─────────────┴─────────────┐ 12)│ DataSourceExec │ 13)│ -------------------- │ -14)│ bytes: 2672 │ +14)│ bytes: 2576 │ 15)│ format: memory │ 16)│ rows: 1 │ 17)└───────────────────────────┘ @@ -1821,7 +1821,7 @@ physical_plan 11)┌─────────────┴─────────────┐ 12)│ DataSourceExec │ 13)│ -------------------- │ -14)│ bytes: 2672 │ +14)│ bytes: 2576 │ 15)│ format: memory │ 16)│ rows: 1 │ 17)└───────────────────────────┘ @@ -1844,7 +1844,7 @@ physical_plan 11)┌─────────────┴─────────────┐ 12)│ DataSourceExec │ 13)│ -------------------- │ -14)│ bytes: 2672 │ +14)│ bytes: 2576 │ 15)│ format: memory │ 16)│ rows: 1 │ 17)└───────────────────────────┘