diff --git a/parquet/THRIFT.md b/parquet/THRIFT.md index 06e97709cce3..56365665070a 100644 --- a/parquet/THRIFT.md +++ b/parquet/THRIFT.md @@ -406,6 +406,32 @@ optional fields it is. A typical `write_thrift` implementation will look like: } ``` +In most instances, the `WriteThriftField` implementation can be handled by the `write_thrift_field` +macro. The first argument is the unqualified name of an object that implements `WriteThrift`, and +the second is the field type (which will be `FieldType::Struct` for Thrift structs and unions, +and `FieldType::I32` for Thrift enums). + +```rust +write_thrift_field!(MyNewStruct, FieldType::Struct); +``` + +which expands to: + +```rust +impl WriteThriftField for MyNewStruct { + fn write_thrift_field( + &self, + writer: &mut ThriftCompactOutputProtocol, + field_id: i16, + last_field_id: i16, + ) -> Result { + writer.write_field_begin(FieldType::Struct, field_id, last_field_id)?; + self.write_thrift(writer)?; + Ok(field_id) + } +} +``` + ### Handling for lists Lists of serialized objects can usually be read using `parquet_thrift::read_thrift_vec` and written diff --git a/parquet/src/basic.rs b/parquet/src/basic.rs index 0220b1b55d6e..bd42f043eead 100644 --- a/parquet/src/basic.rs +++ b/parquet/src/basic.rs @@ -30,7 +30,7 @@ use crate::parquet_thrift::{ ElementType, FieldType, ReadThrift, ThriftCompactInputProtocol, ThriftCompactOutputProtocol, WriteThrift, WriteThriftField, }; -use crate::{thrift_enum, thrift_struct, thrift_union_all_empty}; +use crate::{thrift_enum, thrift_struct, thrift_union_all_empty, write_thrift_field}; use crate::errors::{ParquetError, Result}; @@ -210,18 +210,7 @@ impl WriteThrift for ConvertedType { } } -impl WriteThriftField for ConvertedType { - fn write_thrift_field( - &self, - writer: &mut ThriftCompactOutputProtocol, - field_id: i16, - last_field_id: i16, - ) -> Result { - writer.write_field_begin(FieldType::I32, field_id, last_field_id)?; - self.write_thrift(writer)?; - Ok(field_id) - } -} +write_thrift_field!(ConvertedType, FieldType::I32); // ---------------------------------------------------------------------- // Mirrors thrift union `TimeUnit` @@ -584,18 +573,7 @@ impl WriteThrift for LogicalType { } } -impl WriteThriftField for LogicalType { - fn write_thrift_field( - &self, - writer: &mut ThriftCompactOutputProtocol, - field_id: i16, - last_field_id: i16, - ) -> Result { - writer.write_field_begin(FieldType::Struct, field_id, last_field_id)?; - self.write_thrift(writer)?; - Ok(field_id) - } -} +write_thrift_field!(LogicalType, FieldType::Struct); // ---------------------------------------------------------------------- // Mirrors thrift enum `FieldRepetitionType` @@ -928,18 +906,7 @@ impl WriteThrift for Compression { } } -impl WriteThriftField for Compression { - fn write_thrift_field( - &self, - writer: &mut ThriftCompactOutputProtocol, - field_id: i16, - last_field_id: i16, - ) -> Result { - writer.write_field_begin(FieldType::I32, field_id, last_field_id)?; - self.write_thrift(writer)?; - Ok(field_id) - } -} +write_thrift_field!(Compression, FieldType::I32); impl Compression { /// Returns the codec type of this compression setting as a string, without the compression @@ -1116,18 +1083,7 @@ impl WriteThrift for EdgeInterpolationAlgorithm { } } -impl WriteThriftField for EdgeInterpolationAlgorithm { - fn write_thrift_field( - &self, - writer: &mut ThriftCompactOutputProtocol, - field_id: i16, - last_field_id: i16, - ) -> Result { - writer.write_field_begin(FieldType::I32, field_id, last_field_id)?; - self.write_thrift(writer)?; - Ok(field_id) - } -} +write_thrift_field!(EdgeInterpolationAlgorithm, FieldType::I32); impl Default for EdgeInterpolationAlgorithm { fn default() -> Self { diff --git a/parquet/src/file/metadata/thrift/mod.rs b/parquet/src/file/metadata/thrift/mod.rs index ba707baa766f..14774910961f 100644 --- a/parquet/src/file/metadata/thrift/mod.rs +++ b/parquet/src/file/metadata/thrift/mod.rs @@ -58,6 +58,7 @@ use crate::{ }, thrift_struct, util::bit_util::FromBytes, + write_thrift_field, }; // this needs to be visible to the schema conversion code @@ -1521,18 +1522,9 @@ impl WriteThrift for crate::geospatial::statistics::GeospatialStatistics { } } -impl WriteThriftField for crate::geospatial::statistics::GeospatialStatistics { - fn write_thrift_field( - &self, - writer: &mut ThriftCompactOutputProtocol, - field_id: i16, - last_field_id: i16, - ) -> Result { - writer.write_field_begin(FieldType::Struct, field_id, last_field_id)?; - self.write_thrift(writer)?; - Ok(field_id) - } -} +// macro cannot handle qualified names +use crate::geospatial::statistics::GeospatialStatistics as RustGeospatialStatistics; +write_thrift_field!(RustGeospatialStatistics, FieldType::Struct); // struct BoundingBox { // 1: required double xmin; @@ -1570,18 +1562,9 @@ impl WriteThrift for crate::geospatial::bounding_box::BoundingBox { } } -impl WriteThriftField for crate::geospatial::bounding_box::BoundingBox { - fn write_thrift_field( - &self, - writer: &mut ThriftCompactOutputProtocol, - field_id: i16, - last_field_id: i16, - ) -> Result { - writer.write_field_begin(FieldType::Struct, field_id, last_field_id)?; - self.write_thrift(writer)?; - Ok(field_id) - } -} +// macro cannot handle qualified names +use crate::geospatial::bounding_box::BoundingBox as RustBoundingBox; +write_thrift_field!(RustBoundingBox, FieldType::Struct); #[cfg(test)] pub(crate) mod tests { diff --git a/parquet/src/parquet_macros.rs b/parquet/src/parquet_macros.rs index 22c61df36a6a..eb8bc2b7f07a 100644 --- a/parquet/src/parquet_macros.rs +++ b/parquet/src/parquet_macros.rs @@ -293,6 +293,21 @@ macro_rules! thrift_struct { } } +#[doc(hidden)] +#[macro_export] +/// Generate `WriteThriftField` implementation for a struct. +macro_rules! write_thrift_field { + ($identifier:ident $(< $lt:lifetime >)?, $fld_type:expr) => { + impl $(<$lt>)? WriteThriftField for $identifier $(<$lt>)? { + fn write_thrift_field(&self, writer: &mut ThriftCompactOutputProtocol, field_id: i16, last_field_id: i16) -> Result { + writer.write_field_begin($fld_type, field_id, last_field_id)?; + self.write_thrift(writer)?; + Ok(field_id) + } + } + } +} + #[doc(hidden)] #[macro_export] macro_rules! __thrift_write_required_or_optional_field { diff --git a/parquet/src/parquet_thrift.rs b/parquet/src/parquet_thrift.rs index e27c7d16efdb..221532ea8332 100644 --- a/parquet/src/parquet_thrift.rs +++ b/parquet/src/parquet_thrift.rs @@ -31,7 +31,10 @@ use std::{ io::{Read, Write}, }; -use crate::errors::{ParquetError, Result}; +use crate::{ + errors::{ParquetError, Result}, + write_thrift_field, +}; /// Wrapper for thrift `double` fields. This is used to provide /// an implementation of `Eq` for floats. This implementation @@ -913,6 +916,7 @@ pub(crate) trait WriteThriftField { ) -> Result; } +// bool struct fields are written differently to bool values impl WriteThriftField for bool { fn write_thrift_field( &self, @@ -929,83 +933,13 @@ impl WriteThriftField for bool { } } -impl WriteThriftField for i8 { - fn write_thrift_field( - &self, - writer: &mut ThriftCompactOutputProtocol, - field_id: i16, - last_field_id: i16, - ) -> Result { - writer.write_field_begin(FieldType::Byte, field_id, last_field_id)?; - writer.write_i8(*self)?; - Ok(field_id) - } -} - -impl WriteThriftField for i16 { - fn write_thrift_field( - &self, - writer: &mut ThriftCompactOutputProtocol, - field_id: i16, - last_field_id: i16, - ) -> Result { - writer.write_field_begin(FieldType::I16, field_id, last_field_id)?; - writer.write_i16(*self)?; - Ok(field_id) - } -} - -impl WriteThriftField for i32 { - fn write_thrift_field( - &self, - writer: &mut ThriftCompactOutputProtocol, - field_id: i16, - last_field_id: i16, - ) -> Result { - writer.write_field_begin(FieldType::I32, field_id, last_field_id)?; - writer.write_i32(*self)?; - Ok(field_id) - } -} - -impl WriteThriftField for i64 { - fn write_thrift_field( - &self, - writer: &mut ThriftCompactOutputProtocol, - field_id: i16, - last_field_id: i16, - ) -> Result { - writer.write_field_begin(FieldType::I64, field_id, last_field_id)?; - writer.write_i64(*self)?; - Ok(field_id) - } -} - -impl WriteThriftField for OrderedF64 { - fn write_thrift_field( - &self, - writer: &mut ThriftCompactOutputProtocol, - field_id: i16, - last_field_id: i16, - ) -> Result { - writer.write_field_begin(FieldType::Double, field_id, last_field_id)?; - writer.write_double(self.0)?; - Ok(field_id) - } -} - -impl WriteThriftField for f64 { - fn write_thrift_field( - &self, - writer: &mut ThriftCompactOutputProtocol, - field_id: i16, - last_field_id: i16, - ) -> Result { - writer.write_field_begin(FieldType::Double, field_id, last_field_id)?; - writer.write_double(*self)?; - Ok(field_id) - } -} +write_thrift_field!(i8, FieldType::Byte); +write_thrift_field!(i16, FieldType::I16); +write_thrift_field!(i32, FieldType::I32); +write_thrift_field!(i64, FieldType::I64); +write_thrift_field!(OrderedF64, FieldType::Double); +write_thrift_field!(f64, FieldType::Double); +write_thrift_field!(String, FieldType::Binary); impl WriteThriftField for &[u8] { fn write_thrift_field( @@ -1033,19 +967,6 @@ impl WriteThriftField for &str { } } -impl WriteThriftField for String { - fn write_thrift_field( - &self, - writer: &mut ThriftCompactOutputProtocol, - field_id: i16, - last_field_id: i16, - ) -> Result { - writer.write_field_begin(FieldType::Binary, field_id, last_field_id)?; - writer.write_bytes(self.as_bytes())?; - Ok(field_id) - } -} - impl WriteThriftField for Vec where T: WriteThrift,