Skip to content

Commit 14647e1

Browse files
committed
Revert "Update mask batch size"
This reverts commit df78cbf.
1 parent a63d130 commit 14647e1

File tree

1 file changed

+4
-60
lines changed

1 file changed

+4
-60
lines changed

parquet/src/arrow/arrow_reader/read_plan.rs

Lines changed: 4 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -380,15 +380,10 @@ impl RowSelectionCursor {
380380
let mut chunk_rows = 0;
381381
let mut selected_rows = 0;
382382

383-
// Advance until this chunk would consume `batch_size` input rows or the mask
384-
// is exhausted, tracking how many of those rows are selected. This mirrors
385-
// the behaviour of the legacy `RowSelector` queue-based iteration while
386-
// ensuring the downstream readers never request more than `batch_size`
387-
// physical rows for a single chunk.
388-
while cursor < mask.len()
389-
&& chunk_rows < batch_size
390-
&& selected_rows < batch_size
391-
{
383+
// Advance until enough rows have been selected to satisfy the batch size,
384+
// or until the mask is exhausted. This mirrors the behaviour of the legacy
385+
// `RowSelector` queue-based iteration.
386+
while cursor < mask.len() && selected_rows < batch_size {
392387
chunk_rows += 1;
393388
if mask.value(cursor) {
394389
selected_rows += 1;
@@ -433,54 +428,3 @@ fn boolean_mask_from_selectors(selectors: &[RowSelector]) -> BooleanBuffer {
433428
}
434429
builder.finish()
435430
}
436-
437-
#[cfg(test)]
438-
mod tests {
439-
use super::*;
440-
441-
#[test]
442-
fn mask_chunk_respects_batch_size() {
443-
// Build a selection that alternates skipping three rows and selecting one.
444-
let selectors = (0..32)
445-
.flat_map(|_| [RowSelector::skip(3), RowSelector::select(1)])
446-
.collect::<Vec<_>>();
447-
448-
let mut cursor = RowSelectionCursor::new(selectors);
449-
assert!(cursor.is_mask_backed());
450-
451-
let batch_size = 8;
452-
let mut total_selected = 0;
453-
let mut total_rows = 0;
454-
let mut saw_sparse_chunk = false;
455-
456-
while let Some(chunk) = cursor.next_mask_chunk(batch_size) {
457-
assert!(
458-
chunk.chunk_rows <= batch_size,
459-
"chunk_rows {} exceeds batch_size {}",
460-
chunk.chunk_rows,
461-
batch_size
462-
);
463-
assert!(
464-
chunk.selected_rows <= chunk.chunk_rows,
465-
"selected_rows {} exceeds chunk_rows {}",
466-
chunk.selected_rows,
467-
chunk.chunk_rows
468-
);
469-
470-
if total_rows == 0 {
471-
assert_eq!(chunk.initial_skip, 3);
472-
}
473-
474-
if chunk.chunk_rows == batch_size && chunk.selected_rows < batch_size {
475-
saw_sparse_chunk = true;
476-
}
477-
478-
total_selected += chunk.selected_rows;
479-
total_rows += chunk.initial_skip + chunk.chunk_rows;
480-
}
481-
482-
assert!(saw_sparse_chunk, "expected at least one sparse chunk");
483-
assert_eq!(total_selected, 32);
484-
assert_eq!(total_rows, 128);
485-
}
486-
}

0 commit comments

Comments
 (0)