From 111ca9ce05538c0187736ffbb8273958b998b9f4 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Thu, 1 Sep 2022 19:40:40 +0200 Subject: [PATCH] Merge pull request #40485 from arthurpassos/fix-parquet-chunked-array-deserialization Add support for extended (chunked) arrays for Parquet format --- .../Formats/Impl/ParquetBlockInputFormat.cpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp index 12fa9710c42f..427c159314b3 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp @@ -55,7 +55,16 @@ Chunk ParquetBlockInputFormat::generate() return res; std::shared_ptr table; - arrow::Status read_status = file_reader->ReadRowGroup(row_group_current, column_indices, &table); + + std::unique_ptr<::arrow::RecordBatchReader> rbr; + std::vector row_group_indices { row_group_current }; + arrow::Status get_batch_reader_status = file_reader->GetRecordBatchReader(row_group_indices, column_indices, &rbr); + + if (!get_batch_reader_status.ok()) + throw ParsingException{"Error while reading Parquet data: " + get_batch_reader_status.ToString(), ErrorCodes::CANNOT_READ_ALL_DATA}; + + arrow::Status read_status = rbr->ReadAll(&table); + if (!read_status.ok()) throw ParsingException{"Error while reading Parquet data: " + read_status.ToString(), ErrorCodes::CANNOT_READ_ALL_DATA};