diff --git a/parquet-variant/src/builder.rs b/parquet-variant/src/builder.rs index b24af34e86c5..4f88b351a1db 100644 --- a/parquet-variant/src/builder.rs +++ b/parquet-variant/src/builder.rs @@ -184,7 +184,7 @@ impl ValueBuffer { self.0.len() } - fn append_value<'m, 'd, T: Into>>(&mut self, value: T) { + fn append_non_nested_value<'m, 'd, T: Into>>(&mut self, value: T) { let variant = value.into(); match variant { Variant::Null => self.append_null(), @@ -212,7 +212,9 @@ impl ValueBuffer { Variant::String(s) => self.append_string(s), Variant::ShortString(s) => self.append_short_string(s), Variant::Object(_) | Variant::List(_) => { - todo!("How does this work with the redesign?"); + unreachable!( + "Nested values are handled specially by ObjectBuilder and ListBuilder" + ); } } } @@ -414,7 +416,7 @@ impl VariantBuilder { } pub fn append_value<'m, 'd, T: Into>>(&mut self, value: T) { - self.buffer.append_value(value); + self.buffer.append_non_nested_value(value); } pub fn finish(self) -> (Vec, Vec) { @@ -477,6 +479,7 @@ pub struct ListBuilder<'a> { metadata_builder: &'a mut MetadataBuilder, offsets: Vec, buffer: ValueBuffer, + /// Is there a pending nested object or list that needs to be finalized? pending: bool, } @@ -523,7 +526,7 @@ impl<'a> ListBuilder<'a> { pub fn append_value<'m, 'd, T: Into>>(&mut self, value: T) { self.check_new_offset(); - self.buffer.append_value(value); + self.buffer.append_non_nested_value(value); let element_end = self.buffer.offset(); self.offsets.push(element_end); } @@ -574,20 +577,23 @@ impl<'a> ListBuilder<'a> { /// A builder for creating [`Variant::Object`] values. /// /// See the examples on [`VariantBuilder`] for usage. -pub struct ObjectBuilder<'a> { +pub struct ObjectBuilder<'a, 'b> { parent_buffer: &'a mut ValueBuffer, metadata_builder: &'a mut MetadataBuilder, fields: BTreeMap, // (field_id, offset) buffer: ValueBuffer, + /// Is there a pending list or object that needs to be finalized? + pending: Option<(&'b str, usize)>, } -impl<'a> ObjectBuilder<'a> { +impl<'a, 'b> ObjectBuilder<'a, 'b> { fn new(parent_buffer: &'a mut ValueBuffer, metadata_builder: &'a mut MetadataBuilder) -> Self { Self { parent_buffer, metadata_builder, fields: BTreeMap::new(), buffer: ValueBuffer::default(), + pending: None, } } @@ -600,11 +606,50 @@ impl<'a> ObjectBuilder<'a> { let field_start = self.buffer.offset(); self.fields.insert(field_id, field_start); - self.buffer.append_value(value); + self.buffer.append_non_nested_value(value); } - /// Finalize object with sorted fields - pub fn finish(self) { + fn check_pending_field(&mut self) { + let Some((field_name, field_start)) = self.pending.as_ref() else { + return; + }; + + let field_id = self.metadata_builder.add_field_name(field_name); + self.fields.insert(field_id, *field_start); + + self.pending = None; + } + + /// Return a new [`ObjectBuilder`] to add a nested object with the specified + /// key to the object. + pub fn new_object(&mut self, key: &'b str) -> ObjectBuilder { + self.check_pending_field(); + + let field_start = self.buffer.offset(); + let obj_builder = ObjectBuilder::new(&mut self.buffer, self.metadata_builder); + self.pending = Some((key, field_start)); + + obj_builder + } + + /// Return a new [`ListBuilder`] to add a list with the specified key to the + /// object. + pub fn new_list(&mut self, key: &'b str) -> ListBuilder { + self.check_pending_field(); + + let field_start = self.buffer.offset(); + let list_builder = ListBuilder::new(&mut self.buffer, self.metadata_builder); + self.pending = Some((key, field_start)); + + list_builder + } + + /// Finalize object + /// + /// This consumes self and writes the object to the parent buffer. + pub fn finish(mut self) { + self.check_pending_field(); + let data_size = self.buffer.offset(); let num_fields = self.fields.len(); let is_large = num_fields > u8::MAX as usize; @@ -1192,4 +1237,131 @@ mod tests { assert_eq!(list.get(4).unwrap(), Variant::from(3)); } + + #[test] + fn test_nested_object() { + /* + { + "c": { + "b": "a" + } + } + + */ + + let mut builder = VariantBuilder::new(); + { + let mut outer_object_builder = builder.new_object(); + { + let mut inner_object_builder = outer_object_builder.new_object("c"); + inner_object_builder.insert("b", "a"); + inner_object_builder.finish(); + } + + outer_object_builder.finish(); + } + + let (metadata, value) = builder.finish(); + let variant = Variant::try_new(&metadata, &value).unwrap(); + let outer_object = variant.as_object().unwrap(); + + assert_eq!(outer_object.len(), 1); + assert_eq!(outer_object.field_name(0).unwrap(), "c"); + + let inner_object_variant = outer_object.field(0).unwrap(); + let inner_object = inner_object_variant.as_object().unwrap(); + + assert_eq!(inner_object.len(), 1); + assert_eq!(inner_object.field_name(0).unwrap(), "b"); + assert_eq!(inner_object.field(0).unwrap(), Variant::from("a")); + } + + #[test] + fn test_nested_object_with_duplicate_field_names_per_object() { + /* + { + "c": { + "c": "a" + } + } + + */ + + let mut builder = VariantBuilder::new(); + { + let mut outer_object_builder = builder.new_object(); + { + let mut inner_object_builder = outer_object_builder.new_object("c"); + inner_object_builder.insert("c", "a"); + inner_object_builder.finish(); + } + + outer_object_builder.finish(); + } + + let (metadata, value) = builder.finish(); + let variant = Variant::try_new(&metadata, &value).unwrap(); + let outer_object = variant.as_object().unwrap(); + + assert_eq!(outer_object.len(), 1); + assert_eq!(outer_object.field_name(0).unwrap(), "c"); + + let inner_object_variant = outer_object.field(0).unwrap(); + let inner_object = inner_object_variant.as_object().unwrap(); + + assert_eq!(inner_object.len(), 1); + assert_eq!(inner_object.field_name(0).unwrap(), "c"); + assert_eq!(inner_object.field(0).unwrap(), Variant::from("a")); + } + + #[test] + fn test_nested_object_with_lists() { + /* + { + "door 1": { + "items": ["apple", false ] + } + } + + */ + + let mut builder = VariantBuilder::new(); + { + let mut outer_object_builder = builder.new_object(); + { + let mut inner_object_builder = outer_object_builder.new_object("door 1"); + + { + let mut inner_object_list_builder = inner_object_builder.new_list("items"); + inner_object_list_builder.append_value("apple"); + inner_object_list_builder.append_value(false); + inner_object_list_builder.finish(); + } + + inner_object_builder.finish(); + } + + outer_object_builder.finish(); + } + + let (metadata, value) = builder.finish(); + let variant = Variant::try_new(&metadata, &value).unwrap(); + let outer_object = variant.as_object().unwrap(); + + assert_eq!(outer_object.len(), 1); + assert_eq!(outer_object.field_name(0).unwrap(), "door 1"); + + let inner_object_variant = outer_object.field(0).unwrap(); + let inner_object = inner_object_variant.as_object().unwrap(); + + assert_eq!(inner_object.len(), 1); + assert_eq!(inner_object.field_name(0).unwrap(), "items"); + + let items_variant = inner_object.field(0).unwrap(); + let items_list = items_variant.as_list().unwrap(); + + assert_eq!(items_list.len(), 2); + assert_eq!(items_list.get(0).unwrap(), Variant::from("apple")); + assert_eq!(items_list.get(1).unwrap(), Variant::from(false)); + } }