Skip to content

Commit 1201cb5

Browse files
authored
Remove parquet dictionary converters (#1661) (#1662)
1 parent a38e460 commit 1201cb5

File tree

1 file changed

+9
-122
lines changed

1 file changed

+9
-122
lines changed

parquet/src/arrow/converter.rs

Lines changed: 9 additions & 122 deletions
Original file line numberDiff line numberDiff line change
@@ -15,30 +15,20 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
use crate::data_type::{ByteArray, DataType, FixedLenByteArray, Int96};
19-
// TODO: clean up imports (best done when there are few moving parts)
18+
use crate::data_type::{ByteArray, FixedLenByteArray, Int96};
2019
use arrow::array::{
21-
Array, ArrayRef, BinaryBuilder, FixedSizeBinaryBuilder,
22-
IntervalDayTimeArray, IntervalDayTimeBuilder, IntervalYearMonthArray,
23-
IntervalYearMonthBuilder, LargeBinaryBuilder, LargeStringBuilder, PrimitiveBuilder,
24-
PrimitiveDictionaryBuilder, StringBuilder, StringDictionaryBuilder,
20+
Array, ArrayRef, BinaryArray, BinaryBuilder, DecimalArray, FixedSizeBinaryArray,
21+
FixedSizeBinaryBuilder, IntervalDayTimeArray, IntervalDayTimeBuilder,
22+
IntervalYearMonthArray, IntervalYearMonthBuilder, LargeBinaryArray,
23+
LargeBinaryBuilder, LargeStringArray, LargeStringBuilder, StringArray, StringBuilder,
24+
TimestampNanosecondArray,
2525
};
26-
use arrow::compute::cast;
2726
use std::convert::{From, TryInto};
2827
use std::sync::Arc;
2928

3029
use crate::errors::Result;
31-
use arrow::datatypes::{ArrowDictionaryKeyType, ArrowPrimitiveType};
32-
33-
use arrow::array::{
34-
BinaryArray, DecimalArray, DictionaryArray, FixedSizeBinaryArray, LargeBinaryArray,
35-
LargeStringArray, PrimitiveArray, StringArray, TimestampNanosecondArray,
36-
};
3730
use std::marker::PhantomData;
3831

39-
use crate::data_type::Int32Type as ParquetInt32Type;
40-
use arrow::datatypes::Int32Type;
41-
4232
/// A converter is used to consume record reader's content and convert it to arrow
4333
/// primitive array.
4434
pub trait Converter<S, T> {
@@ -100,13 +90,11 @@ impl DecimalArrayConverter {
10090

10191
impl Converter<Vec<Option<FixedLenByteArray>>, DecimalArray> for DecimalArrayConverter {
10292
fn convert(&self, source: Vec<Option<FixedLenByteArray>>) -> Result<DecimalArray> {
103-
let array = source.into_iter()
93+
let array = source
94+
.into_iter()
10495
.map(|array| array.map(|array| Self::from_bytes_to_i128(array.data())))
10596
.collect::<DecimalArray>()
106-
.with_precision_and_scale(
107-
self.precision as usize,
108-
self.scale as usize
109-
)?;
97+
.with_precision_and_scale(self.precision as usize, self.scale as usize)?;
11098

11199
Ok(array)
112100
}
@@ -251,92 +239,6 @@ impl Converter<Vec<Option<ByteArray>>, LargeBinaryArray> for LargeBinaryArrayCon
251239
}
252240
}
253241

254-
pub struct StringDictionaryArrayConverter {}
255-
256-
impl<K: ArrowDictionaryKeyType> Converter<Vec<Option<ByteArray>>, DictionaryArray<K>>
257-
for StringDictionaryArrayConverter
258-
{
259-
fn convert(&self, source: Vec<Option<ByteArray>>) -> Result<DictionaryArray<K>> {
260-
let data_size = source
261-
.iter()
262-
.map(|x| x.as_ref().map(|b| b.len()).unwrap_or(0))
263-
.sum();
264-
265-
let keys_builder = PrimitiveBuilder::<K>::new(source.len());
266-
let values_builder = StringBuilder::with_capacity(source.len(), data_size);
267-
268-
let mut builder = StringDictionaryBuilder::new(keys_builder, values_builder);
269-
for v in source {
270-
match v {
271-
Some(array) => {
272-
let _ = builder.append(array.as_utf8()?)?;
273-
}
274-
None => builder.append_null()?,
275-
}
276-
}
277-
278-
Ok(builder.finish())
279-
}
280-
}
281-
282-
pub struct DictionaryArrayConverter<DictValueSourceType, DictValueTargetType, ParquetType>
283-
{
284-
_dict_value_source_marker: PhantomData<DictValueSourceType>,
285-
_dict_value_target_marker: PhantomData<DictValueTargetType>,
286-
_parquet_marker: PhantomData<ParquetType>,
287-
}
288-
289-
impl<DictValueSourceType, DictValueTargetType, ParquetType>
290-
DictionaryArrayConverter<DictValueSourceType, DictValueTargetType, ParquetType>
291-
{
292-
pub fn new() -> Self {
293-
Self {
294-
_dict_value_source_marker: PhantomData,
295-
_dict_value_target_marker: PhantomData,
296-
_parquet_marker: PhantomData,
297-
}
298-
}
299-
}
300-
301-
impl<K, DictValueSourceType, DictValueTargetType, ParquetType>
302-
Converter<Vec<Option<<ParquetType as DataType>::T>>, DictionaryArray<K>>
303-
for DictionaryArrayConverter<DictValueSourceType, DictValueTargetType, ParquetType>
304-
where
305-
K: ArrowPrimitiveType,
306-
DictValueSourceType: ArrowPrimitiveType,
307-
DictValueTargetType: ArrowPrimitiveType,
308-
ParquetType: DataType,
309-
PrimitiveArray<DictValueSourceType>: From<Vec<Option<<ParquetType as DataType>::T>>>,
310-
{
311-
fn convert(
312-
&self,
313-
source: Vec<Option<<ParquetType as DataType>::T>>,
314-
) -> Result<DictionaryArray<K>> {
315-
let keys_builder = PrimitiveBuilder::<K>::new(source.len());
316-
let values_builder = PrimitiveBuilder::<DictValueTargetType>::new(source.len());
317-
318-
let mut builder = PrimitiveDictionaryBuilder::new(keys_builder, values_builder);
319-
320-
let source_array: Arc<dyn Array> =
321-
Arc::new(PrimitiveArray::<DictValueSourceType>::from(source));
322-
let target_array = cast(&source_array, &DictValueTargetType::DATA_TYPE)?;
323-
let target = target_array
324-
.as_any()
325-
.downcast_ref::<PrimitiveArray<DictValueTargetType>>()
326-
.unwrap();
327-
328-
for i in 0..target.len() {
329-
if target.is_null(i) {
330-
builder.append_null()?;
331-
} else {
332-
let _ = builder.append(target.value(i))?;
333-
}
334-
}
335-
336-
Ok(builder.finish())
337-
}
338-
}
339-
340242
pub type Utf8Converter =
341243
ArrayRefConverter<Vec<Option<ByteArray>>, StringArray, Utf8ArrayConverter>;
342244
pub type LargeUtf8Converter =
@@ -348,21 +250,6 @@ pub type LargeBinaryConverter = ArrayRefConverter<
348250
LargeBinaryArray,
349251
LargeBinaryArrayConverter,
350252
>;
351-
pub type StringDictionaryConverter<T> = ArrayRefConverter<
352-
Vec<Option<ByteArray>>,
353-
DictionaryArray<T>,
354-
StringDictionaryArrayConverter,
355-
>;
356-
pub type DictionaryConverter<K, SV, TV, P> = ArrayRefConverter<
357-
Vec<Option<<P as DataType>::T>>,
358-
DictionaryArray<K>,
359-
DictionaryArrayConverter<SV, TV, P>,
360-
>;
361-
pub type PrimitiveDictionaryConverter<K, V> = ArrayRefConverter<
362-
Vec<Option<<ParquetInt32Type as DataType>::T>>,
363-
DictionaryArray<K>,
364-
DictionaryArrayConverter<Int32Type, V, ParquetInt32Type>,
365-
>;
366253

367254
pub type Int96Converter =
368255
ArrayRefConverter<Vec<Option<Int96>>, TimestampNanosecondArray, Int96ArrayConverter>;

0 commit comments

Comments
 (0)