1515// specific language governing permissions and limitations
1616// under the License.
1717
18- use crate :: data_type:: { ByteArray , DataType , FixedLenByteArray , Int96 } ;
19- // TODO: clean up imports (best done when there are few moving parts)
18+ use crate :: data_type:: { ByteArray , FixedLenByteArray , Int96 } ;
2019use arrow:: array:: {
21- Array , ArrayRef , BinaryBuilder , FixedSizeBinaryBuilder ,
22- IntervalDayTimeArray , IntervalDayTimeBuilder , IntervalYearMonthArray ,
23- IntervalYearMonthBuilder , LargeBinaryBuilder , LargeStringBuilder , PrimitiveBuilder ,
24- PrimitiveDictionaryBuilder , StringBuilder , StringDictionaryBuilder ,
20+ Array , ArrayRef , BinaryArray , BinaryBuilder , DecimalArray , FixedSizeBinaryArray ,
21+ FixedSizeBinaryBuilder , IntervalDayTimeArray , IntervalDayTimeBuilder ,
22+ IntervalYearMonthArray , IntervalYearMonthBuilder , LargeBinaryArray ,
23+ LargeBinaryBuilder , LargeStringArray , LargeStringBuilder , StringArray , StringBuilder ,
24+ TimestampNanosecondArray ,
2525} ;
26- use arrow:: compute:: cast;
2726use std:: convert:: { From , TryInto } ;
2827use std:: sync:: Arc ;
2928
3029use crate :: errors:: Result ;
31- use arrow:: datatypes:: { ArrowDictionaryKeyType , ArrowPrimitiveType } ;
32-
33- use arrow:: array:: {
34- BinaryArray , DecimalArray , DictionaryArray , FixedSizeBinaryArray , LargeBinaryArray ,
35- LargeStringArray , PrimitiveArray , StringArray , TimestampNanosecondArray ,
36- } ;
3730use std:: marker:: PhantomData ;
3831
39- use crate :: data_type:: Int32Type as ParquetInt32Type ;
40- use arrow:: datatypes:: Int32Type ;
41-
4232/// A converter is used to consume record reader's content and convert it to arrow
4333/// primitive array.
4434pub trait Converter < S , T > {
@@ -100,13 +90,11 @@ impl DecimalArrayConverter {
10090
10191impl Converter < Vec < Option < FixedLenByteArray > > , DecimalArray > for DecimalArrayConverter {
10292 fn convert ( & self , source : Vec < Option < FixedLenByteArray > > ) -> Result < DecimalArray > {
103- let array = source. into_iter ( )
93+ let array = source
94+ . into_iter ( )
10495 . map ( |array| array. map ( |array| Self :: from_bytes_to_i128 ( array. data ( ) ) ) )
10596 . collect :: < DecimalArray > ( )
106- . with_precision_and_scale (
107- self . precision as usize ,
108- self . scale as usize
109- ) ?;
97+ . with_precision_and_scale ( self . precision as usize , self . scale as usize ) ?;
11098
11199 Ok ( array)
112100 }
@@ -251,92 +239,6 @@ impl Converter<Vec<Option<ByteArray>>, LargeBinaryArray> for LargeBinaryArrayCon
251239 }
252240}
253241
254- pub struct StringDictionaryArrayConverter { }
255-
256- impl < K : ArrowDictionaryKeyType > Converter < Vec < Option < ByteArray > > , DictionaryArray < K > >
257- for StringDictionaryArrayConverter
258- {
259- fn convert ( & self , source : Vec < Option < ByteArray > > ) -> Result < DictionaryArray < K > > {
260- let data_size = source
261- . iter ( )
262- . map ( |x| x. as_ref ( ) . map ( |b| b. len ( ) ) . unwrap_or ( 0 ) )
263- . sum ( ) ;
264-
265- let keys_builder = PrimitiveBuilder :: < K > :: new ( source. len ( ) ) ;
266- let values_builder = StringBuilder :: with_capacity ( source. len ( ) , data_size) ;
267-
268- let mut builder = StringDictionaryBuilder :: new ( keys_builder, values_builder) ;
269- for v in source {
270- match v {
271- Some ( array) => {
272- let _ = builder. append ( array. as_utf8 ( ) ?) ?;
273- }
274- None => builder. append_null ( ) ?,
275- }
276- }
277-
278- Ok ( builder. finish ( ) )
279- }
280- }
281-
282- pub struct DictionaryArrayConverter < DictValueSourceType , DictValueTargetType , ParquetType >
283- {
284- _dict_value_source_marker : PhantomData < DictValueSourceType > ,
285- _dict_value_target_marker : PhantomData < DictValueTargetType > ,
286- _parquet_marker : PhantomData < ParquetType > ,
287- }
288-
289- impl < DictValueSourceType , DictValueTargetType , ParquetType >
290- DictionaryArrayConverter < DictValueSourceType , DictValueTargetType , ParquetType >
291- {
292- pub fn new ( ) -> Self {
293- Self {
294- _dict_value_source_marker : PhantomData ,
295- _dict_value_target_marker : PhantomData ,
296- _parquet_marker : PhantomData ,
297- }
298- }
299- }
300-
301- impl < K , DictValueSourceType , DictValueTargetType , ParquetType >
302- Converter < Vec < Option < <ParquetType as DataType >:: T > > , DictionaryArray < K > >
303- for DictionaryArrayConverter < DictValueSourceType , DictValueTargetType , ParquetType >
304- where
305- K : ArrowPrimitiveType ,
306- DictValueSourceType : ArrowPrimitiveType ,
307- DictValueTargetType : ArrowPrimitiveType ,
308- ParquetType : DataType ,
309- PrimitiveArray < DictValueSourceType > : From < Vec < Option < <ParquetType as DataType >:: T > > > ,
310- {
311- fn convert (
312- & self ,
313- source : Vec < Option < <ParquetType as DataType >:: T > > ,
314- ) -> Result < DictionaryArray < K > > {
315- let keys_builder = PrimitiveBuilder :: < K > :: new ( source. len ( ) ) ;
316- let values_builder = PrimitiveBuilder :: < DictValueTargetType > :: new ( source. len ( ) ) ;
317-
318- let mut builder = PrimitiveDictionaryBuilder :: new ( keys_builder, values_builder) ;
319-
320- let source_array: Arc < dyn Array > =
321- Arc :: new ( PrimitiveArray :: < DictValueSourceType > :: from ( source) ) ;
322- let target_array = cast ( & source_array, & DictValueTargetType :: DATA_TYPE ) ?;
323- let target = target_array
324- . as_any ( )
325- . downcast_ref :: < PrimitiveArray < DictValueTargetType > > ( )
326- . unwrap ( ) ;
327-
328- for i in 0 ..target. len ( ) {
329- if target. is_null ( i) {
330- builder. append_null ( ) ?;
331- } else {
332- let _ = builder. append ( target. value ( i) ) ?;
333- }
334- }
335-
336- Ok ( builder. finish ( ) )
337- }
338- }
339-
340242pub type Utf8Converter =
341243 ArrayRefConverter < Vec < Option < ByteArray > > , StringArray , Utf8ArrayConverter > ;
342244pub type LargeUtf8Converter =
@@ -348,21 +250,6 @@ pub type LargeBinaryConverter = ArrayRefConverter<
348250 LargeBinaryArray ,
349251 LargeBinaryArrayConverter ,
350252> ;
351- pub type StringDictionaryConverter < T > = ArrayRefConverter <
352- Vec < Option < ByteArray > > ,
353- DictionaryArray < T > ,
354- StringDictionaryArrayConverter ,
355- > ;
356- pub type DictionaryConverter < K , SV , TV , P > = ArrayRefConverter <
357- Vec < Option < <P as DataType >:: T > > ,
358- DictionaryArray < K > ,
359- DictionaryArrayConverter < SV , TV , P > ,
360- > ;
361- pub type PrimitiveDictionaryConverter < K , V > = ArrayRefConverter <
362- Vec < Option < <ParquetInt32Type as DataType >:: T > > ,
363- DictionaryArray < K > ,
364- DictionaryArrayConverter < Int32Type , V , ParquetInt32Type > ,
365- > ;
366253
367254pub type Int96Converter =
368255 ArrayRefConverter < Vec < Option < Int96 > > , TimestampNanosecondArray , Int96ArrayConverter > ;
0 commit comments