Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
e8588b1
reverse parquet draft version
zhuqi-lucas Nov 20, 2025
2cf2e31
Support limit pushdown for reverse scan
zhuqi-lucas Nov 20, 2025
2f73a4a
Support row group level cache
zhuqi-lucas Nov 21, 2025
f546a7f
Merge branch 'main' into reverse_parquet
zhuqi-lucas Nov 21, 2025
07ef9a2
fix
zhuqi-lucas Nov 21, 2025
c123a37
fmt
zhuqi-lucas Nov 21, 2025
46cfd89
add more test
zhuqi-lucas Nov 21, 2025
99e50de
Add metrics for reverse scan row groups.
zhuqi-lucas Nov 21, 2025
dbcf598
fix
zhuqi-lucas Nov 21, 2025
12f74d6
Merge branch 'main' into reverse_parquet
zhuqi-lucas Nov 21, 2025
2cfd73e
optimize code
zhuqi-lucas Nov 21, 2025
3479672
Merge branch 'main' into reverse_parquet
zhuqi-lucas Nov 21, 2025
98fac46
Add more comments
zhuqi-lucas Nov 21, 2025
92b6487
fmt
zhuqi-lucas Nov 21, 2025
475bf3d
Merge branch 'main' into reverse_parquet
zhuqi-lucas Nov 21, 2025
8ccca52
add enable/disable option
zhuqi-lucas Nov 22, 2025
5d63557
Merge branch 'main' into reverse_parquet
zhuqi-lucas Nov 22, 2025
a2b44a5
fix slt
zhuqi-lucas Nov 22, 2025
b9f8199
fix proto
zhuqi-lucas Nov 22, 2025
3502c10
fix
zhuqi-lucas Nov 22, 2025
fca325c
Merge branch 'main' into reverse_parquet
zhuqi-lucas Nov 22, 2025
9af4fba
Update doc
zhuqi-lucas Nov 22, 2025
7a63ddd
Add reverse files testing in slt
zhuqi-lucas Nov 22, 2025
dfb29d7
Merge branch 'main' into reverse_parquet
zhuqi-lucas Nov 22, 2025
52c9b30
Merge branch 'main' into reverse_parquet
zhuqi-lucas Nov 24, 2025
bb31251
simple test
zhuqi-lucas Nov 24, 2025
45089c5
Change to sort pushdown architecture
zhuqi-lucas Nov 24, 2025
d15994b
Merge branch 'main' into reverse_parquet
zhuqi-lucas Nov 24, 2025
9452005
fix
zhuqi-lucas Nov 24, 2025
d338461
make code easy
zhuqi-lucas Nov 24, 2025
2d77d77
proto fix
zhuqi-lucas Nov 24, 2025
2175b03
address review comments
zhuqi-lucas Nov 27, 2025
325a9db
Merge remote-tracking branch 'upstream/main' into reverse_parquet
zhuqi-lucas Nov 27, 2025
a52c008
fix
zhuqi-lucas Nov 27, 2025
d2e008a
fix
zhuqi-lucas Nov 27, 2025
e51e781
fix
zhuqi-lucas Nov 27, 2025
3227671
fix
zhuqi-lucas Nov 27, 2025
3b2d4f5
support full test
zhuqi-lucas Nov 28, 2025
2a477dd
fix
zhuqi-lucas Nov 28, 2025
cd04b73
fix
zhuqi-lucas Nov 28, 2025
e0754b2
Merge branch 'main' into reverse_parquet
zhuqi-lucas Nov 28, 2025
86e027c
remove datasource from optimizer
zhuqi-lucas Nov 28, 2025
da4586b
Merge branch 'main' into reverse_parquet
zhuqi-lucas Dec 2, 2025
354a82b
address new comments
zhuqi-lucas Dec 3, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions datafusion/common/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -831,6 +831,15 @@ config_namespace! {
/// writing out already in-memory data, such as from a cached
/// data frame.
pub maximum_buffered_record_batches_per_stream: usize, default = 2

/// Enable sort pushdown optimization for sorted Parquet files.
/// Currently, this optimization only has reverse order support.
/// When a query requires ordering that can be satisfied by reversing
/// the file's natural ordering, row groups and batches are read in
/// reverse order to eliminate sort operations.
/// Note: This buffers one row group at a time (typically ~128MB).
/// Default: true
Comment on lines +834 to +841
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can this support non-reverse cases where the query order is the same as the file order? i.e. can we eliminate sorts in that simpler case as well? Or does that already happen / was already implemented?

pub enable_sort_pushdown: bool, default = true
}
}

Expand Down
3 changes: 3 additions & 0 deletions datafusion/common/src/file_options/parquet_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,7 @@ impl ParquetOptions {
coerce_int96: _, // not used for writer props
skip_arrow_metadata: _,
max_predicate_cache_size: _,
enable_sort_pushdown: _,
} = self;

let mut builder = WriterProperties::builder()
Expand Down Expand Up @@ -472,6 +473,7 @@ mod tests {
skip_arrow_metadata: defaults.skip_arrow_metadata,
coerce_int96: None,
max_predicate_cache_size: defaults.max_predicate_cache_size,
enable_sort_pushdown: true,
}
}

Expand Down Expand Up @@ -585,6 +587,7 @@ mod tests {
binary_as_string: global_options_defaults.binary_as_string,
skip_arrow_metadata: global_options_defaults.skip_arrow_metadata,
coerce_int96: None,
enable_sort_pushdown: true,
},
column_specific_options,
key_value_metadata,
Expand Down
1 change: 1 addition & 0 deletions datafusion/core/tests/physical_optimizer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ mod limit_pushdown;
mod limited_distinct_aggregation;
mod partition_statistics;
mod projection_pushdown;
mod pushdown_sort;
mod replace_with_order_preserving_variants;
mod sanity_checker;
#[expect(clippy::needless_pass_by_value)]
Expand Down
Loading