@@ -77,10 +77,10 @@ use object_store::{ObjectMeta, ObjectStore};
7777use parquet:: arrow:: arrow_reader:: statistics:: StatisticsConverter ;
7878use parquet:: arrow:: arrow_writer:: {
7979 compute_leaves, ArrowColumnChunk , ArrowColumnWriter , ArrowLeafColumn ,
80- ArrowRowGroupWriterFactory , ArrowWriterOptions ,
80+ ArrowWriterOptions ,
8181} ;
8282use parquet:: arrow:: async_reader:: MetadataFetch ;
83- use parquet:: arrow:: { parquet_to_arrow_schema, ArrowSchemaConverter , AsyncArrowWriter } ;
83+ use parquet:: arrow:: { parquet_to_arrow_schema, ArrowSchemaConverter , ArrowWriter , AsyncArrowWriter } ;
8484use parquet:: basic:: Type ;
8585
8686use parquet:: errors:: ParquetError ;
@@ -1463,13 +1463,10 @@ type ColSender = Sender<ArrowLeafColumn>;
14631463/// Returns join handles for each columns serialization task along with a send channel
14641464/// to send arrow arrays to each serialization task.
14651465fn spawn_column_parallel_row_group_writer (
1466- arrow_row_group_writer_factory : Arc < ArrowRowGroupWriterFactory > ,
1466+ col_writers : Vec < ArrowColumnWriter > ,
14671467 max_buffer_size : usize ,
14681468 pool : & Arc < dyn MemoryPool > ,
14691469) -> Result < ( Vec < ColumnWriterTask > , Vec < ColSender > ) > {
1470- let arrow_row_group_writer =
1471- arrow_row_group_writer_factory. create_row_group_writer ( 0 ) ?;
1472- let col_writers = arrow_row_group_writer. into_column_writers ( ) ;
14731470 let num_columns = col_writers. len ( ) ;
14741471
14751472 let mut col_writer_tasks = Vec :: with_capacity ( num_columns) ;
@@ -1564,7 +1561,7 @@ fn spawn_rg_join_and_finalize_task(
15641561/// across both columns and row_groups, with a theoretical max number of parallel tasks
15651562/// given by n_columns * num_row_groups.
15661563fn spawn_parquet_parallel_serialization_task (
1567- arrow_row_group_writer_factory : Arc < ArrowRowGroupWriterFactory > ,
1564+ arrow_writer : ArrowWriter < SerializedFileWriter < SharedBuffer > > ,
15681565 mut data : Receiver < RecordBatch > ,
15691566 serialize_tx : Sender < SpawnedTask < RBStreamSerializeResult > > ,
15701567 schema : Arc < Schema > ,
@@ -1575,9 +1572,10 @@ fn spawn_parquet_parallel_serialization_task(
15751572 SpawnedTask :: spawn ( async move {
15761573 let max_buffer_rb = parallel_options. max_buffered_record_batches_per_stream ;
15771574 let max_row_group_rows = writer_props. max_row_group_size ( ) ;
1575+ let col_writers = arrow_writer. get_column_writers ( ) . unwrap ( ) ;
15781576 let ( mut column_writer_handles, mut col_array_channels) =
15791577 spawn_column_parallel_row_group_writer (
1580- Arc :: clone ( & arrow_row_group_writer_factory ) ,
1578+ col_writers ,
15811579 max_buffer_rb,
15821580 & pool,
15831581 ) ?;
@@ -1631,7 +1629,7 @@ fn spawn_parquet_parallel_serialization_task(
16311629
16321630 ( column_writer_handles, col_array_channels) =
16331631 spawn_column_parallel_row_group_writer (
1634- Arc :: clone ( & arrow_row_group_writer_factory ) ,
1632+ col_writers ,
16351633 max_buffer_rb,
16361634 & pool,
16371635 ) ?;
@@ -1730,16 +1728,12 @@ async fn output_single_parquet_file_parallelized(
17301728 parquet_schema. root_schema_ptr ( ) ,
17311729 parquet_props. clone ( ) . into ( ) ,
17321730 ) ?;
1733- let arrow_row_group_writer_factory = ArrowRowGroupWriterFactory :: new (
1734- & parquet_writer,
1735- parquet_schema,
1736- Arc :: clone ( & output_schema) ,
1737- parquet_props. clone ( ) . into ( ) ,
1738- ) ;
1731+ let writer = ArrowWriter :: try_new (
1732+ parquet_writer, Arc :: clone ( & output_schema) , Some ( parquet_props. clone ( ) ) ) ?;
17391733
17401734 let arc_props = Arc :: new ( parquet_props. clone ( ) ) ;
17411735 let launch_serialization_task = spawn_parquet_parallel_serialization_task (
1742- Arc :: new ( arrow_row_group_writer_factory ) ,
1736+ writer ,
17431737 data,
17441738 serialize_tx,
17451739 Arc :: clone ( & output_schema) ,
0 commit comments