diff --git a/parquet/src/data_type.rs b/parquet/src/data_type.rs index 1a67c9de687b..fde46d5b36b1 100644 --- a/parquet/src/data_type.rs +++ b/parquet/src/data_type.rs @@ -599,6 +599,7 @@ pub(crate) mod private { + super::SliceAsBytes + PartialOrd + Send + + crate::encodings::decoding::private::GetDecoder { /// Encode the value directly from a higher level encoder fn encode( diff --git a/parquet/src/encodings/decoding.rs b/parquet/src/encodings/decoding.rs index b3f98d12b753..f044dd244d25 100644 --- a/parquet/src/encodings/decoding.rs +++ b/parquet/src/encodings/decoding.rs @@ -22,7 +22,7 @@ use std::{cmp, marker::PhantomData, mem}; use super::rle::RleDecoder; use crate::basic::*; -use crate::data_type::private::*; +use crate::data_type::private::ParquetValueType; use crate::data_type::*; use crate::errors::{ParquetError, Result}; use crate::schema::types::ColumnDescPtr; @@ -31,6 +31,111 @@ use crate::util::{ memory::{ByteBuffer, ByteBufferPtr}, }; +pub(crate) mod private { + use super::*; + + /// A trait that allows getting a [`Decoder`] implementation for a [`DataType`] with + /// the corresponding [`ParquetValueType`]. This is necessary to support + /// [`Decoder`] implementations that may not be applicable for all [`DataType`] + /// and by extension all [`ParquetValueType`] + pub trait GetDecoder { + fn get_decoder>( + descr: ColumnDescPtr, + encoding: Encoding, + ) -> Result>> { + get_decoder_default(descr, encoding) + } + } + + fn get_decoder_default( + descr: ColumnDescPtr, + encoding: Encoding, + ) -> Result>> { + match encoding { + Encoding::PLAIN => Ok(Box::new(PlainDecoder::new(descr.type_length()))), + Encoding::RLE_DICTIONARY | Encoding::PLAIN_DICTIONARY => Err(general_err!( + "Cannot initialize this encoding through this function" + )), + Encoding::RLE + | Encoding::DELTA_BINARY_PACKED + | Encoding::DELTA_BYTE_ARRAY + | Encoding::DELTA_LENGTH_BYTE_ARRAY => Err(general_err!( + "Encoding {} is not supported for type", + encoding + )), + e => Err(nyi_err!("Encoding {} is not supported", e)), + } + } + + impl GetDecoder for bool { + fn get_decoder>( + descr: ColumnDescPtr, + encoding: Encoding, + ) -> Result>> { + match encoding { + Encoding::RLE => Ok(Box::new(RleValueDecoder::new())), + _ => get_decoder_default(descr, encoding), + } + } + } + + impl GetDecoder for i32 { + fn get_decoder>( + descr: ColumnDescPtr, + encoding: Encoding, + ) -> Result>> { + match encoding { + Encoding::DELTA_BINARY_PACKED => Ok(Box::new(DeltaBitPackDecoder::new())), + _ => get_decoder_default(descr, encoding), + } + } + } + + impl GetDecoder for i64 { + fn get_decoder>( + descr: ColumnDescPtr, + encoding: Encoding, + ) -> Result>> { + match encoding { + Encoding::DELTA_BINARY_PACKED => Ok(Box::new(DeltaBitPackDecoder::new())), + _ => get_decoder_default(descr, encoding), + } + } + } + + impl GetDecoder for f32 {} + impl GetDecoder for f64 {} + + impl GetDecoder for ByteArray { + fn get_decoder>( + descr: ColumnDescPtr, + encoding: Encoding, + ) -> Result>> { + match encoding { + Encoding::DELTA_BYTE_ARRAY => Ok(Box::new(DeltaByteArrayDecoder::new())), + Encoding::DELTA_LENGTH_BYTE_ARRAY => { + Ok(Box::new(DeltaLengthByteArrayDecoder::new())) + } + _ => get_decoder_default(descr, encoding), + } + } + } + + impl GetDecoder for FixedLenByteArray { + fn get_decoder>( + descr: ColumnDescPtr, + encoding: Encoding, + ) -> Result>> { + match encoding { + Encoding::DELTA_BYTE_ARRAY => Ok(Box::new(DeltaByteArrayDecoder::new())), + _ => get_decoder_default(descr, encoding), + } + } + } + + impl GetDecoder for Int96 {} +} + // ---------------------------------------------------------------------- // Decoders @@ -109,20 +214,8 @@ pub fn get_decoder( descr: ColumnDescPtr, encoding: Encoding, ) -> Result>> { - let decoder: Box> = match encoding { - Encoding::PLAIN => Box::new(PlainDecoder::new(descr.type_length())), - Encoding::RLE_DICTIONARY | Encoding::PLAIN_DICTIONARY => { - return Err(general_err!( - "Cannot initialize this encoding through this function" - )); - } - Encoding::RLE => Box::new(RleValueDecoder::new()), - Encoding::DELTA_BINARY_PACKED => Box::new(DeltaBitPackDecoder::new()), - Encoding::DELTA_LENGTH_BYTE_ARRAY => Box::new(DeltaLengthByteArrayDecoder::new()), - Encoding::DELTA_BYTE_ARRAY => Box::new(DeltaByteArrayDecoder::new()), - e => return Err(nyi_err!("Encoding {} is not supported", e)), - }; - Ok(decoder) + use self::private::GetDecoder; + T::T::get_decoder(descr, encoding) } // ---------------------------------------------------------------------- @@ -817,8 +910,11 @@ mod tests { // supported encodings create_and_check_decoder::(Encoding::PLAIN, None); create_and_check_decoder::(Encoding::DELTA_BINARY_PACKED, None); - create_and_check_decoder::(Encoding::DELTA_LENGTH_BYTE_ARRAY, None); - create_and_check_decoder::(Encoding::DELTA_BYTE_ARRAY, None); + create_and_check_decoder::( + Encoding::DELTA_LENGTH_BYTE_ARRAY, + None, + ); + create_and_check_decoder::(Encoding::DELTA_BYTE_ARRAY, None); create_and_check_decoder::(Encoding::RLE, None); // error when initializing @@ -834,6 +930,18 @@ mod tests { "Cannot initialize this encoding through this function" )), ); + create_and_check_decoder::( + Encoding::DELTA_LENGTH_BYTE_ARRAY, + Some(general_err!( + "Encoding DELTA_LENGTH_BYTE_ARRAY is not supported for type" + )), + ); + create_and_check_decoder::( + Encoding::DELTA_BYTE_ARRAY, + Some(general_err!( + "Encoding DELTA_BYTE_ARRAY is not supported for type" + )), + ); // unsupported create_and_check_decoder::(