From 2ba032423d37952912511cabf2b7347bdafc573e Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Sat, 1 Nov 2025 06:49:46 -0400 Subject: [PATCH 1/2] Update comments in page decompressor --- parquet/src/file/serialized_reader.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/parquet/src/file/serialized_reader.rs b/parquet/src/file/serialized_reader.rs index ef71b4b6ac8f..73e45d80f96a 100644 --- a/parquet/src/file/serialized_reader.rs +++ b/parquet/src/file/serialized_reader.rs @@ -387,8 +387,6 @@ pub(crate) fn decode_page( can_decompress = header_v2.is_compressed.unwrap_or(true); } - // TODO: page header could be huge because of statistics. We should set a - // maximum page header size and abort if that is exceeded. let buffer = match decompressor { Some(decompressor) if can_decompress => { let uncompressed_page_size = usize::try_from(page_header.uncompressed_page_size)?; @@ -398,6 +396,8 @@ pub(crate) fn decode_page( let decompressed_size = uncompressed_page_size - offset; let mut decompressed = Vec::with_capacity(uncompressed_page_size); decompressed.extend_from_slice(&buffer[..offset]); + // decompressed size of zero corresponds to a page with only null values + // see https://github.com/apache/parquet-format/blob/master/README.md#data-pages if decompressed_size > 0 { let compressed = &buffer[offset..]; decompressor.decompress(compressed, &mut decompressed, Some(decompressed_size))?; From cae8d57cdbd3f39ec3eded6ed13f404ccecc814c Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 6 Nov 2025 06:38:26 -0500 Subject: [PATCH 2/2] Update parquet/src/file/serialized_reader.rs --- parquet/src/file/serialized_reader.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parquet/src/file/serialized_reader.rs b/parquet/src/file/serialized_reader.rs index 73e45d80f96a..234426b86734 100644 --- a/parquet/src/file/serialized_reader.rs +++ b/parquet/src/file/serialized_reader.rs @@ -396,7 +396,7 @@ pub(crate) fn decode_page( let decompressed_size = uncompressed_page_size - offset; let mut decompressed = Vec::with_capacity(uncompressed_page_size); decompressed.extend_from_slice(&buffer[..offset]); - // decompressed size of zero corresponds to a page with only null values + // decompressed size of zero corresponds to a page with no non-null values // see https://github.com/apache/parquet-format/blob/master/README.md#data-pages if decompressed_size > 0 { let compressed = &buffer[offset..];