From ff7562b89f529b070f12e76eb66696b85ec7b273 Mon Sep 17 00:00:00 2001 From: Anton Ivashkin Date: Tue, 28 Oct 2025 15:16:23 +0100 Subject: [PATCH 1/3] Small iceberg read optimization optimization --- src/Interpreters/ClusterFunctionReadTask.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/ClusterFunctionReadTask.cpp b/src/Interpreters/ClusterFunctionReadTask.cpp index 9870ef6b65bf..35815ce1c843 100644 --- a/src/Interpreters/ClusterFunctionReadTask.cpp +++ b/src/Interpreters/ClusterFunctionReadTask.cpp @@ -19,6 +19,7 @@ namespace ErrorCodes namespace Setting { extern const SettingsBool cluster_function_process_archive_on_multiple_nodes; + extern const SettingsBool allow_experimental_iceberg_read_optimization; } ClusterFunctionReadTaskResponse::ClusterFunctionReadTaskResponse(ObjectInfoPtr object, const ContextPtr & context) @@ -29,7 +30,8 @@ ClusterFunctionReadTaskResponse::ClusterFunctionReadTaskResponse(ObjectInfoPtr o if (object->data_lake_metadata.has_value()) data_lake_metadata = object->data_lake_metadata.value(); - file_meta_info = object->file_meta_info; + if (context->getSettingsRef()[Setting::allow_experimental_iceberg_read_optimization]) + file_meta_info = object->file_meta_info; const bool send_over_whole_archive = !context->getSettingsRef()[Setting::cluster_function_process_archive_on_multiple_nodes]; path = send_over_whole_archive ? object->getPathOrPathToArchiveIfArchive() : object->getPath(); From 787a51ef8c3657b5bdd2225ceca70daf939e7593 Mon Sep 17 00:00:00 2001 From: Anton Ivashkin Date: Mon, 3 Nov 2025 11:59:22 +0100 Subject: [PATCH 2/3] Use allow_experimental_iceberg_read_optimization in serialize --- src/Interpreters/ClusterFunctionReadTask.cpp | 8 +++++--- src/Interpreters/ClusterFunctionReadTask.h | 2 ++ 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/Interpreters/ClusterFunctionReadTask.cpp b/src/Interpreters/ClusterFunctionReadTask.cpp index 35815ce1c843..c3ccd8090d96 100644 --- a/src/Interpreters/ClusterFunctionReadTask.cpp +++ b/src/Interpreters/ClusterFunctionReadTask.cpp @@ -30,8 +30,9 @@ ClusterFunctionReadTaskResponse::ClusterFunctionReadTaskResponse(ObjectInfoPtr o if (object->data_lake_metadata.has_value()) data_lake_metadata = object->data_lake_metadata.value(); - if (context->getSettingsRef()[Setting::allow_experimental_iceberg_read_optimization]) - file_meta_info = object->file_meta_info; + file_meta_info = object->file_meta_info; + + iceberg_read_optimization_enabled = context->getSettingsRef()[Setting::allow_experimental_iceberg_read_optimization]; const bool send_over_whole_archive = !context->getSettingsRef()[Setting::cluster_function_process_archive_on_multiple_nodes]; path = send_over_whole_archive ? object->getPathOrPathToArchiveIfArchive() : object->getPath(); @@ -69,7 +70,8 @@ void ClusterFunctionReadTaskResponse::serialize(WriteBuffer & out, size_t protoc if (protocol_version >= DBMS_CLUSTER_PROCESSING_PROTOCOL_VERSION_WITH_DATA_LAKE_COLUMNS_METADATA) { - if (file_meta_info.has_value()) + /// This info is not used when optimization is disabled, so there is no need to send it. + if (iceberg_read_optimization_enabled && file_meta_info.has_value()) file_meta_info.value()->serialize(out); else DataFileMetaInfo().serialize(out); diff --git a/src/Interpreters/ClusterFunctionReadTask.h b/src/Interpreters/ClusterFunctionReadTask.h index ba0fd799ab80..462b7bbaa06b 100644 --- a/src/Interpreters/ClusterFunctionReadTask.h +++ b/src/Interpreters/ClusterFunctionReadTask.h @@ -23,6 +23,8 @@ struct ClusterFunctionReadTaskResponse /// File's columns info std::optional file_meta_info; + bool iceberg_read_optimization_enabled; + /// Convert received response into ObjectInfo. ObjectInfoPtr getObjectInfo() const; From 2d1baafddf9d3df7e2bdedec207ecdec897c0d5b Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Fri, 7 Nov 2025 16:07:51 +0100 Subject: [PATCH 3/3] Made iceberg_read_optimization_enabled const --- src/Interpreters/ClusterFunctionReadTask.cpp | 3 +-- src/Interpreters/ClusterFunctionReadTask.h | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/Interpreters/ClusterFunctionReadTask.cpp b/src/Interpreters/ClusterFunctionReadTask.cpp index c3ccd8090d96..3843af6d0f41 100644 --- a/src/Interpreters/ClusterFunctionReadTask.cpp +++ b/src/Interpreters/ClusterFunctionReadTask.cpp @@ -23,6 +23,7 @@ namespace Setting } ClusterFunctionReadTaskResponse::ClusterFunctionReadTaskResponse(ObjectInfoPtr object, const ContextPtr & context) + : iceberg_read_optimization_enabled(context->getSettingsRef()[Setting::allow_experimental_iceberg_read_optimization]) { if (!object) throw Exception(ErrorCodes::LOGICAL_ERROR, "`object` cannot be null"); @@ -32,8 +33,6 @@ ClusterFunctionReadTaskResponse::ClusterFunctionReadTaskResponse(ObjectInfoPtr o file_meta_info = object->file_meta_info; - iceberg_read_optimization_enabled = context->getSettingsRef()[Setting::allow_experimental_iceberg_read_optimization]; - const bool send_over_whole_archive = !context->getSettingsRef()[Setting::cluster_function_process_archive_on_multiple_nodes]; path = send_over_whole_archive ? object->getPathOrPathToArchiveIfArchive() : object->getPath(); } diff --git a/src/Interpreters/ClusterFunctionReadTask.h b/src/Interpreters/ClusterFunctionReadTask.h index 462b7bbaa06b..5c3b5912e5c7 100644 --- a/src/Interpreters/ClusterFunctionReadTask.h +++ b/src/Interpreters/ClusterFunctionReadTask.h @@ -23,7 +23,7 @@ struct ClusterFunctionReadTaskResponse /// File's columns info std::optional file_meta_info; - bool iceberg_read_optimization_enabled; + const bool iceberg_read_optimization_enabled = false; /// Convert received response into ObjectInfo. ObjectInfoPtr getObjectInfo() const;