From c2ca1325df3685593b3de43a76292e5179b858cc Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Sat, 15 Nov 2025 21:43:20 +0100 Subject: [PATCH 1/5] preserve format settings --- src/Storages/IStorage.h | 1 + src/Storages/MergeTree/MergeTreeData.cpp | 12 ++++-------- src/Storages/MergeTree/MergeTreeExportManifest.h | 13 +++---------- src/Storages/ObjectStorage/StorageObjectStorage.cpp | 3 ++- src/Storages/ObjectStorage/StorageObjectStorage.h | 1 + .../ObjectStorage/StorageObjectStorageCluster.cpp | 6 ++++-- .../ObjectStorage/StorageObjectStorageCluster.h | 1 + 7 files changed, 16 insertions(+), 21 deletions(-) diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 035d15a6a204..9df664ca746b 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -468,6 +468,7 @@ It is currently only implemented in StorageObjectStorage. Block & /* block_with_partition_values */, std::string & /* destination_file_path */, bool /* overwrite_if_exists */, + const std::optional & /* format_settings */ ContextPtr /* context */) { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Import is not implemented for storage {}", getName()); diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 6e7dcb2cec6f..744c141c5c97 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -47,6 +47,7 @@ #include #include #include +#include #include #include #include @@ -6241,13 +6242,12 @@ void MergeTreeData::exportPartToTable(const PartitionCommand & command, ContextP part_name, getStorageID().getFullTableName()); { + const auto format_settings = getFormatSettings(query_context); MergeTreeExportManifest manifest( dest_storage->getStorageID(), part, query_context->getSettingsRef()[Setting::export_merge_tree_part_overwrite_file_if_exists], - query_context->getSettingsRef()[Setting::output_format_parallel_formatting], - query_context->getSettingsRef()[Setting::output_format_parquet_parallel_encoding], - query_context->getSettingsRef()[Setting::max_threads]); + format_settings); std::lock_guard lock(export_manifests_mutex); @@ -6293,16 +6293,12 @@ void MergeTreeData::exportPartToTableImpl( try { - auto context_copy = Context::createCopy(local_context); - context_copy->setSetting("output_format_parallel_formatting", manifest.parallel_formatting); - context_copy->setSetting("output_format_parquet_parallel_encoding", manifest.parquet_parallel_encoding); - context_copy->setSetting("max_threads", manifest.max_threads); - sink = destination_storage->import( manifest.data_part->name + "_" + manifest.data_part->checksums.getTotalChecksumHex(), block_with_partition_values, destination_file_path, manifest.overwrite_file_if_exists, + format_settings, context_copy); } catch (const Exception & e) diff --git a/src/Storages/MergeTree/MergeTreeExportManifest.h b/src/Storages/MergeTree/MergeTreeExportManifest.h index 5e3d264f47eb..68ee450ac9f6 100644 --- a/src/Storages/MergeTree/MergeTreeExportManifest.h +++ b/src/Storages/MergeTree/MergeTreeExportManifest.h @@ -13,24 +13,17 @@ struct MergeTreeExportManifest const StorageID & destination_storage_id_, const DataPartPtr & data_part_, bool overwrite_file_if_exists_, - bool parallel_formatting_, - bool parallel_formatting_parquet_, - std::size_t max_threads_) + const FormatSettings & format_settings_) : destination_storage_id(destination_storage_id_), data_part(data_part_), overwrite_file_if_exists(overwrite_file_if_exists_), - parallel_formatting(parallel_formatting_), - parquet_parallel_encoding(parallel_formatting_parquet_), - max_threads(max_threads_), + format_settings(format_settings_) create_time(time(nullptr)) {} StorageID destination_storage_id; DataPartPtr data_part; bool overwrite_file_if_exists; - bool parallel_formatting; - /// parquet has a different setting for parallel formatting - bool parquet_parallel_encoding; - std::size_t max_threads; + FormatSettings format_settings; time_t create_time; mutable bool in_progress = false; diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp index 711ef86664f0..6aa579b834b0 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp @@ -483,6 +483,7 @@ SinkToStoragePtr StorageObjectStorage::import( Block & block_with_partition_values, std::string & destination_file_path, bool overwrite_if_exists, + const std::optional & format_settings_ ContextPtr local_context) { std::string partition_key; @@ -508,7 +509,7 @@ SinkToStoragePtr StorageObjectStorage::import( destination_file_path, object_storage, configuration, - std::nullopt, /// passing nullopt to force rebuild for format_settings based on query context + format_settings_ ? format_settings_ : format_settings, std::make_shared(getInMemoryMetadataPtr()->getSampleBlock()), local_context); } diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.h b/src/Storages/ObjectStorage/StorageObjectStorage.h index 9c118913ef46..94b91101dc76 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorage.h +++ b/src/Storages/ObjectStorage/StorageObjectStorage.h @@ -86,6 +86,7 @@ class StorageObjectStorage : public IStorage Block & /* block_with_partition_values */, std::string & /* destination_file_path */, bool /* overwrite_if_exists */, + const std::optional & /* format_settings_ */ ContextPtr /* context */) override; void truncate( diff --git a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp index 3929231b5772..f55a9d0b41c9 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp @@ -1,3 +1,4 @@ +#include #include #include @@ -575,12 +576,13 @@ SinkToStoragePtr StorageObjectStorageCluster::import( Block & block_with_partition_values, std::string & destination_file_path, bool overwrite_if_exists, + const std::optional & format_settings_ ContextPtr context) { if (pure_storage) - return pure_storage->import(file_name, block_with_partition_values, destination_file_path, overwrite_if_exists, context); + return pure_storage->import(file_name, block_with_partition_values, destination_file_path, overwrite_if_exists, format_settings_, context); - return IStorageCluster::import(file_name, block_with_partition_values, destination_file_path, overwrite_if_exists, context); + return IStorageCluster::import(file_name, block_with_partition_values, destination_file_path, overwrite_if_exists, format_settings_, context); } void StorageObjectStorageCluster::readFallBackToPure( diff --git a/src/Storages/ObjectStorage/StorageObjectStorageCluster.h b/src/Storages/ObjectStorage/StorageObjectStorageCluster.h index a62145f64f61..39bd399ab1c5 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageCluster.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageCluster.h @@ -130,6 +130,7 @@ class StorageObjectStorageCluster : public IStorageCluster Block & /* block_with_partition_values */, std::string & /* destination_file_path */, bool /* overwrite_if_exists */, + const std::optional & /* format_settings_ */ ContextPtr /* context */) override; bool prefersLargeBlocks() const override; From 4aa8eb54591d2c7ae309a705c22fdd21858a2edc Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Sat, 15 Nov 2025 21:55:54 +0100 Subject: [PATCH 2/5] build issue --- src/Storages/MergeTree/MergeTreeData.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 744c141c5c97..9427ff9334d8 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -6298,8 +6298,8 @@ void MergeTreeData::exportPartToTableImpl( block_with_partition_values, destination_file_path, manifest.overwrite_file_if_exists, - format_settings, - context_copy); + manifest.format_settings, + local_context); } catch (const Exception & e) { From 84cdcc24157865e3e83c7e4f369d004b905546d0 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Sat, 15 Nov 2025 22:49:41 +0100 Subject: [PATCH 3/5] try to fix build without building --- src/Storages/IStorage.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 9df664ca746b..334dc7bdfc8f 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -468,7 +468,7 @@ It is currently only implemented in StorageObjectStorage. Block & /* block_with_partition_values */, std::string & /* destination_file_path */, bool /* overwrite_if_exists */, - const std::optional & /* format_settings */ + const std::optional & /* format_settings */, ContextPtr /* context */) { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Import is not implemented for storage {}", getName()); From 47ba4547838e8ec0031fb808e2935bc264b1b4b9 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Sun, 16 Nov 2025 00:20:01 +0100 Subject: [PATCH 4/5] build issue --- src/Storages/ObjectStorage/StorageObjectStorage.cpp | 2 +- src/Storages/ObjectStorage/StorageObjectStorage.h | 2 +- src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp | 2 +- src/Storages/ObjectStorage/StorageObjectStorageCluster.h | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp index 6aa579b834b0..1bda30398e17 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp @@ -483,7 +483,7 @@ SinkToStoragePtr StorageObjectStorage::import( Block & block_with_partition_values, std::string & destination_file_path, bool overwrite_if_exists, - const std::optional & format_settings_ + const std::optional & format_settings_, ContextPtr local_context) { std::string partition_key; diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.h b/src/Storages/ObjectStorage/StorageObjectStorage.h index 94b91101dc76..ebdf87b2c280 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorage.h +++ b/src/Storages/ObjectStorage/StorageObjectStorage.h @@ -86,7 +86,7 @@ class StorageObjectStorage : public IStorage Block & /* block_with_partition_values */, std::string & /* destination_file_path */, bool /* overwrite_if_exists */, - const std::optional & /* format_settings_ */ + const std::optional & /* format_settings_ */, ContextPtr /* context */) override; void truncate( diff --git a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp index f55a9d0b41c9..c190c1d3dcb5 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp @@ -576,7 +576,7 @@ SinkToStoragePtr StorageObjectStorageCluster::import( Block & block_with_partition_values, std::string & destination_file_path, bool overwrite_if_exists, - const std::optional & format_settings_ + const std::optional & format_settings_, ContextPtr context) { if (pure_storage) diff --git a/src/Storages/ObjectStorage/StorageObjectStorageCluster.h b/src/Storages/ObjectStorage/StorageObjectStorageCluster.h index 39bd399ab1c5..583d549a89a0 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageCluster.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageCluster.h @@ -130,7 +130,7 @@ class StorageObjectStorageCluster : public IStorageCluster Block & /* block_with_partition_values */, std::string & /* destination_file_path */, bool /* overwrite_if_exists */, - const std::optional & /* format_settings_ */ + const std::optional & /* format_settings_ */, ContextPtr /* context */) override; bool prefersLargeBlocks() const override; From f9419092b57dc371bb90d7a648c55566a717f9cb Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Sun, 16 Nov 2025 00:33:04 +0100 Subject: [PATCH 5/5] build issue --- src/Storages/MergeTree/MergeTreeExportManifest.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeExportManifest.h b/src/Storages/MergeTree/MergeTreeExportManifest.h index 68ee450ac9f6..05506ecb004a 100644 --- a/src/Storages/MergeTree/MergeTreeExportManifest.h +++ b/src/Storages/MergeTree/MergeTreeExportManifest.h @@ -17,7 +17,7 @@ struct MergeTreeExportManifest : destination_storage_id(destination_storage_id_), data_part(data_part_), overwrite_file_if_exists(overwrite_file_if_exists_), - format_settings(format_settings_) + format_settings(format_settings_), create_time(time(nullptr)) {} StorageID destination_storage_id;