Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
122 changes: 70 additions & 52 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

26 changes: 7 additions & 19 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -90,19 +90,12 @@ ahash = { version = "0.8", default-features = false, features = [
"runtime-rng",
] }
apache-avro = { version = "0.20", default-features = false }
arrow = { git = "https://github.com/Coralogix/arrow-rs.git", tag = "v56.0.0-cx.0", features = [
"prettyprint",
"chrono-tz",
] }
arrow-buffer = { git = "https://github.com/Coralogix/arrow-rs.git", tag = "v56.0.0-cx.0", default-features = false }
arrow-flight = { git = "https://github.com/Coralogix/arrow-rs.git", tag = "v56.0.0-cx.0", features = [
"flight-sql-experimental",
] }
arrow-ipc = { git = "https://github.com/Coralogix/arrow-rs.git", tag = "v56.0.0-cx.0", default-features = false, features = [
"lz4",
] }
arrow-ord = { git = "https://github.com/Coralogix/arrow-rs.git", tag = "v56.0.0-cx.0", default-features = false }
arrow-schema = { git = "https://github.com/Coralogix/arrow-rs.git", tag = "v56.0.0-cx.0", default-features = false }
arrow = { version = "56.0.0", features = ["prettyprint", "chrono-tz"] }
arrow-buffer = { version = "56.0.0", default-features = false }
arrow-flight = { version = "56.0.0", features = ["flight-sql-experimental"] }
arrow-ipc = { version = "56.0.0", default-features = false, features = ["lz4"] }
arrow-ord = { version = "56.0.0", default-features = false }
arrow-schema = { version = "56.0.0", default-features = false }
async-trait = "0.1.89"
bigdecimal = "0.4.8"
bytes = "1.10"
Expand Down Expand Up @@ -157,12 +150,7 @@ itertools = "0.14"
log = "^0.4"
object_store = { version = ">=0.12.3, <0.13", default-features = false }
parking_lot = "0.12"
parquet = { git = "https://github.com/Coralogix/arrow-rs.git", tag = "v56.0.0-cx.0", default-features = false, features = [
"arrow",
"async",
"object_store",
"encryption",
] }
parquet = { version = "56.0.0", default-features = false, features = ["arrow", "async", "object_store", "encryption"] }
pbjson = { version = "0.7.0" }
pbjson-types = "0.7"
# Should match arrow-flight's version of prost.
Expand Down
6 changes: 4 additions & 2 deletions datafusion/datasource-parquet/src/metadata.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,9 @@ use object_store::path::Path;
use object_store::{ObjectMeta, ObjectStore};
use parquet::arrow::arrow_reader::statistics::StatisticsConverter;
use parquet::arrow::parquet_to_arrow_schema;
use parquet::file::metadata::{ParquetMetaData, ParquetMetaDataReader, RowGroupMetaData};
use parquet::file::metadata::{
PageIndexPolicy, ParquetMetaData, ParquetMetaDataReader, RowGroupMetaData,
};
use std::any::Any;
use std::collections::HashMap;
use std::sync::Arc;
Expand Down Expand Up @@ -148,7 +150,7 @@ impl<'a> DFParquetMetadata<'a> {

if cache_metadata && file_metadata_cache.is_some() {
// Need to retrieve the entire metadata for the caching to be effective.
reader = reader.with_page_indexes(true);
reader = reader.with_page_index_policy(PageIndexPolicy::Required);
}

let metadata = Arc::new(
Expand Down
6 changes: 3 additions & 3 deletions datafusion/datasource-parquet/src/opener.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ use log::debug;
use parquet::arrow::arrow_reader::{ArrowReaderMetadata, ArrowReaderOptions};
use parquet::arrow::async_reader::AsyncFileReader;
use parquet::arrow::{ParquetRecordBatchStreamBuilder, ProjectionMask};
use parquet::file::metadata::ParquetMetaDataReader;
use parquet::file::metadata::{PageIndexPolicy, ParquetMetaDataReader};

/// Implements [`FileOpener`] for a parquet file
pub(super) struct ParquetOpener {
Expand Down Expand Up @@ -652,8 +652,8 @@ async fn load_page_index<T: AsyncFileReader>(
if missing_column_index || missing_offset_index {
let m = Arc::try_unwrap(Arc::clone(parquet_metadata))
.unwrap_or_else(|e| e.as_ref().clone());
let mut reader =
ParquetMetaDataReader::new_with_metadata(m).with_page_indexes(true);
let mut reader = ParquetMetaDataReader::new_with_metadata(m)
.with_page_index_policy(PageIndexPolicy::Required);
reader.load_page_index(input).await?;
let new_parquet_metadata = reader.finish()?;
let new_arrow_reader =
Expand Down
2 changes: 1 addition & 1 deletion datafusion/physical-plan/src/spill/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -752,7 +752,7 @@ mod tests {
.unwrap();

let size = get_record_batch_memory_size(&batch);
assert_eq!(size, 8320);
assert_eq!(size, 8208);
}

// ==== Spill manager tests ====
Expand Down
Loading
Loading