Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions parquet/THRIFT.md
Original file line number Diff line number Diff line change
Expand Up @@ -406,6 +406,32 @@ optional fields it is. A typical `write_thrift` implementation will look like:
}
```

In most instances, the `WriteThriftField` implementation can be handled by the `write_thrift_field`
macro. The first argument is the unqualified name of an object that implements `WriteThrift`, and
the second is the field type (which will be `FieldType::Struct` for Thrift structs and unions,
and `FieldType::I32` for Thrift enums).

```rust
write_thrift_field!(MyNewStruct, FieldType::Struct);
```

which expands to:

```rust
impl WriteThriftField for MyNewStruct {
fn write_thrift_field<W: Write>(
&self,
writer: &mut ThriftCompactOutputProtocol<W>,
field_id: i16,
last_field_id: i16,
) -> Result<i16> {
writer.write_field_begin(FieldType::Struct, field_id, last_field_id)?;
self.write_thrift(writer)?;
Ok(field_id)
}
}
```

### Handling for lists

Lists of serialized objects can usually be read using `parquet_thrift::read_thrift_vec` and written
Expand Down
54 changes: 5 additions & 49 deletions parquet/src/basic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ use crate::parquet_thrift::{
ElementType, FieldType, ReadThrift, ThriftCompactInputProtocol, ThriftCompactOutputProtocol,
WriteThrift, WriteThriftField,
};
use crate::{thrift_enum, thrift_struct, thrift_union_all_empty};
use crate::{thrift_enum, thrift_struct, thrift_union_all_empty, write_thrift_field};

use crate::errors::{ParquetError, Result};

Expand Down Expand Up @@ -210,18 +210,7 @@ impl WriteThrift for ConvertedType {
}
}

impl WriteThriftField for ConvertedType {
fn write_thrift_field<W: Write>(
&self,
writer: &mut ThriftCompactOutputProtocol<W>,
field_id: i16,
last_field_id: i16,
) -> Result<i16> {
writer.write_field_begin(FieldType::I32, field_id, last_field_id)?;
self.write_thrift(writer)?;
Ok(field_id)
}
}
write_thrift_field!(ConvertedType, FieldType::I32);

// ----------------------------------------------------------------------
// Mirrors thrift union `TimeUnit`
Expand Down Expand Up @@ -584,18 +573,7 @@ impl WriteThrift for LogicalType {
}
}

impl WriteThriftField for LogicalType {
fn write_thrift_field<W: Write>(
&self,
writer: &mut ThriftCompactOutputProtocol<W>,
field_id: i16,
last_field_id: i16,
) -> Result<i16> {
writer.write_field_begin(FieldType::Struct, field_id, last_field_id)?;
self.write_thrift(writer)?;
Ok(field_id)
}
}
write_thrift_field!(LogicalType, FieldType::Struct);

// ----------------------------------------------------------------------
// Mirrors thrift enum `FieldRepetitionType`
Expand Down Expand Up @@ -928,18 +906,7 @@ impl WriteThrift for Compression {
}
}

impl WriteThriftField for Compression {
fn write_thrift_field<W: Write>(
&self,
writer: &mut ThriftCompactOutputProtocol<W>,
field_id: i16,
last_field_id: i16,
) -> Result<i16> {
writer.write_field_begin(FieldType::I32, field_id, last_field_id)?;
self.write_thrift(writer)?;
Ok(field_id)
}
}
write_thrift_field!(Compression, FieldType::I32);

impl Compression {
/// Returns the codec type of this compression setting as a string, without the compression
Expand Down Expand Up @@ -1116,18 +1083,7 @@ impl WriteThrift for EdgeInterpolationAlgorithm {
}
}

impl WriteThriftField for EdgeInterpolationAlgorithm {
fn write_thrift_field<W: Write>(
&self,
writer: &mut ThriftCompactOutputProtocol<W>,
field_id: i16,
last_field_id: i16,
) -> Result<i16> {
writer.write_field_begin(FieldType::I32, field_id, last_field_id)?;
self.write_thrift(writer)?;
Ok(field_id)
}
}
write_thrift_field!(EdgeInterpolationAlgorithm, FieldType::I32);

impl Default for EdgeInterpolationAlgorithm {
fn default() -> Self {
Expand Down
31 changes: 7 additions & 24 deletions parquet/src/file/metadata/thrift/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ use crate::{
},
thrift_struct,
util::bit_util::FromBytes,
write_thrift_field,
};

// this needs to be visible to the schema conversion code
Expand Down Expand Up @@ -1521,18 +1522,9 @@ impl WriteThrift for crate::geospatial::statistics::GeospatialStatistics {
}
}

impl WriteThriftField for crate::geospatial::statistics::GeospatialStatistics {
fn write_thrift_field<W: Write>(
&self,
writer: &mut ThriftCompactOutputProtocol<W>,
field_id: i16,
last_field_id: i16,
) -> Result<i16> {
writer.write_field_begin(FieldType::Struct, field_id, last_field_id)?;
self.write_thrift(writer)?;
Ok(field_id)
}
}
// macro cannot handle qualified names
use crate::geospatial::statistics::GeospatialStatistics as RustGeospatialStatistics;
write_thrift_field!(RustGeospatialStatistics, FieldType::Struct);

// struct BoundingBox {
// 1: required double xmin;
Expand Down Expand Up @@ -1570,18 +1562,9 @@ impl WriteThrift for crate::geospatial::bounding_box::BoundingBox {
}
}

impl WriteThriftField for crate::geospatial::bounding_box::BoundingBox {
fn write_thrift_field<W: Write>(
&self,
writer: &mut ThriftCompactOutputProtocol<W>,
field_id: i16,
last_field_id: i16,
) -> Result<i16> {
writer.write_field_begin(FieldType::Struct, field_id, last_field_id)?;
self.write_thrift(writer)?;
Ok(field_id)
}
}
// macro cannot handle qualified names
use crate::geospatial::bounding_box::BoundingBox as RustBoundingBox;
write_thrift_field!(RustBoundingBox, FieldType::Struct);

#[cfg(test)]
pub(crate) mod tests {
Expand Down
15 changes: 15 additions & 0 deletions parquet/src/parquet_macros.rs
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,21 @@ macro_rules! thrift_struct {
}
}

#[doc(hidden)]
#[macro_export]
/// Generate `WriteThriftField` implementation for a struct.
macro_rules! write_thrift_field {
($identifier:ident $(< $lt:lifetime >)?, $fld_type:expr) => {
impl $(<$lt>)? WriteThriftField for $identifier $(<$lt>)? {
fn write_thrift_field<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>, field_id: i16, last_field_id: i16) -> Result<i16> {
writer.write_field_begin($fld_type, field_id, last_field_id)?;
self.write_thrift(writer)?;
Ok(field_id)
}
}
}
}

#[doc(hidden)]
#[macro_export]
macro_rules! __thrift_write_required_or_optional_field {
Expand Down
103 changes: 12 additions & 91 deletions parquet/src/parquet_thrift.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,10 @@ use std::{
io::{Read, Write},
};

use crate::errors::{ParquetError, Result};
use crate::{
errors::{ParquetError, Result},
write_thrift_field,
};

/// Wrapper for thrift `double` fields. This is used to provide
/// an implementation of `Eq` for floats. This implementation
Expand Down Expand Up @@ -913,6 +916,7 @@ pub(crate) trait WriteThriftField {
) -> Result<i16>;
}

// bool struct fields are written differently to bool values
impl WriteThriftField for bool {
fn write_thrift_field<W: Write>(
&self,
Expand All @@ -929,83 +933,13 @@ impl WriteThriftField for bool {
}
}

impl WriteThriftField for i8 {
fn write_thrift_field<W: Write>(
&self,
writer: &mut ThriftCompactOutputProtocol<W>,
field_id: i16,
last_field_id: i16,
) -> Result<i16> {
writer.write_field_begin(FieldType::Byte, field_id, last_field_id)?;
writer.write_i8(*self)?;
Ok(field_id)
}
}

impl WriteThriftField for i16 {
fn write_thrift_field<W: Write>(
&self,
writer: &mut ThriftCompactOutputProtocol<W>,
field_id: i16,
last_field_id: i16,
) -> Result<i16> {
writer.write_field_begin(FieldType::I16, field_id, last_field_id)?;
writer.write_i16(*self)?;
Ok(field_id)
}
}

impl WriteThriftField for i32 {
fn write_thrift_field<W: Write>(
&self,
writer: &mut ThriftCompactOutputProtocol<W>,
field_id: i16,
last_field_id: i16,
) -> Result<i16> {
writer.write_field_begin(FieldType::I32, field_id, last_field_id)?;
writer.write_i32(*self)?;
Ok(field_id)
}
}

impl WriteThriftField for i64 {
fn write_thrift_field<W: Write>(
&self,
writer: &mut ThriftCompactOutputProtocol<W>,
field_id: i16,
last_field_id: i16,
) -> Result<i16> {
writer.write_field_begin(FieldType::I64, field_id, last_field_id)?;
writer.write_i64(*self)?;
Ok(field_id)
}
}

impl WriteThriftField for OrderedF64 {
fn write_thrift_field<W: Write>(
&self,
writer: &mut ThriftCompactOutputProtocol<W>,
field_id: i16,
last_field_id: i16,
) -> Result<i16> {
writer.write_field_begin(FieldType::Double, field_id, last_field_id)?;
writer.write_double(self.0)?;
Ok(field_id)
}
}

impl WriteThriftField for f64 {
fn write_thrift_field<W: Write>(
&self,
writer: &mut ThriftCompactOutputProtocol<W>,
field_id: i16,
last_field_id: i16,
) -> Result<i16> {
writer.write_field_begin(FieldType::Double, field_id, last_field_id)?;
writer.write_double(*self)?;
Ok(field_id)
}
}
write_thrift_field!(i8, FieldType::Byte);
write_thrift_field!(i16, FieldType::I16);
write_thrift_field!(i32, FieldType::I32);
write_thrift_field!(i64, FieldType::I64);
write_thrift_field!(OrderedF64, FieldType::Double);
write_thrift_field!(f64, FieldType::Double);
write_thrift_field!(String, FieldType::Binary);

impl WriteThriftField for &[u8] {
fn write_thrift_field<W: Write>(
Expand Down Expand Up @@ -1033,19 +967,6 @@ impl WriteThriftField for &str {
}
}

impl WriteThriftField for String {
fn write_thrift_field<W: Write>(
&self,
writer: &mut ThriftCompactOutputProtocol<W>,
field_id: i16,
last_field_id: i16,
) -> Result<i16> {
writer.write_field_begin(FieldType::Binary, field_id, last_field_id)?;
writer.write_bytes(self.as_bytes())?;
Ok(field_id)
}
}

impl<T> WriteThriftField for Vec<T>
where
T: WriteThrift,
Expand Down
Loading