From b2ebef4f1f338697d05fa91f0a96041ba6338cb7 Mon Sep 17 00:00:00 2001 From: Anton Ivashkin Date: Fri, 30 May 2025 18:58:38 +0200 Subject: [PATCH 1/2] Iceberg as alias for DataLakeCatalog with catalog_type='rest' --- src/Databases/DataLake/DatabaseDataLake.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/Databases/DataLake/DatabaseDataLake.cpp b/src/Databases/DataLake/DatabaseDataLake.cpp index 1b556f25edd4..4eb3aeaa2d1d 100644 --- a/src/Databases/DataLake/DatabaseDataLake.cpp +++ b/src/Databases/DataLake/DatabaseDataLake.cpp @@ -572,6 +572,11 @@ void registerDatabaseDataLake(DatabaseFactory & factory) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Engine `{}` must have arguments", database_engine_name); } + if (database_engine_name == "Iceberg" && catalog_type != DatabaseDataLakeCatalogType::ICEBERG_REST) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Engine `Iceberg` must have `rest` catalog type only"); + } + for (auto & engine_arg : engine_args) engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, args.context); @@ -637,6 +642,7 @@ void registerDatabaseDataLake(DatabaseFactory & factory) std::move(engine_for_tables)); }; factory.registerDatabase("DataLakeCatalog", create_fn, { .supports_arguments = true, .supports_settings = true }); + factory.registerDatabase("Iceberg", create_fn, { .supports_arguments = true, .supports_settings = true }); } } From a92e76455b7f0a5b9e88476a85b3805c0dc7a2d3 Mon Sep 17 00:00:00 2001 From: Anton Ivashkin Date: Mon, 2 Jun 2025 16:34:10 +0200 Subject: [PATCH 2/2] Add tests and fix hiding sensitive info --- src/Databases/DataLake/DataLakeConstants.h | 1 + src/Parsers/ASTSetQuery.cpp | 3 +- .../integration/test_database_iceberg/test.py | 31 ++++++++++++------- 3 files changed, 23 insertions(+), 12 deletions(-) diff --git a/src/Databases/DataLake/DataLakeConstants.h b/src/Databases/DataLake/DataLakeConstants.h index eaa8f5a276e6..02f6a7dcfcd7 100644 --- a/src/Databases/DataLake/DataLakeConstants.h +++ b/src/Databases/DataLake/DataLakeConstants.h @@ -8,6 +8,7 @@ namespace DataLake { static constexpr auto DATABASE_ENGINE_NAME = "DataLakeCatalog"; +static constexpr auto DATABASE_ALIAS_NAME = "Iceberg"; static constexpr std::string_view FILE_PATH_PREFIX = "file:/"; /// Some catalogs (Unity or Glue) may store not only Iceberg/DeltaLake tables but other kinds of "tables" diff --git a/src/Parsers/ASTSetQuery.cpp b/src/Parsers/ASTSetQuery.cpp index df53bfdba6ad..5f4951e030a8 100644 --- a/src/Parsers/ASTSetQuery.cpp +++ b/src/Parsers/ASTSetQuery.cpp @@ -94,7 +94,8 @@ void ASTSetQuery::formatImpl(WriteBuffer & ostr, const FormatSettings & format, return true; } - if (DataLake::DATABASE_ENGINE_NAME == state.create_engine_name) + if (DataLake::DATABASE_ENGINE_NAME == state.create_engine_name + || DataLake::DATABASE_ALIAS_NAME == state.create_engine_name) { if (DataLake::SETTINGS_TO_HIDE.contains(change.name)) { diff --git a/tests/integration/test_database_iceberg/test.py b/tests/integration/test_database_iceberg/test.py index 7cb8c77b2907..9b7f11d2645d 100644 --- a/tests/integration/test_database_iceberg/test.py +++ b/tests/integration/test_database_iceberg/test.py @@ -70,6 +70,8 @@ DEFAULT_SORT_ORDER = SortOrder(SortField(source_id=2, transform=IdentityTransform())) +AVAILABLE_ENGINES = ["DataLakeCatalog", "Iceberg"] + def list_namespaces(): response = requests.get(f"{BASE_URL_LOCAL}/namespaces") @@ -120,7 +122,7 @@ def generate_record(): def create_clickhouse_iceberg_database( - started_cluster, node, name, additional_settings={} + started_cluster, node, name, additional_settings={}, engine='DataLakeCatalog' ): settings = { "catalog_type": "rest", @@ -134,7 +136,7 @@ def create_clickhouse_iceberg_database( f""" DROP DATABASE IF EXISTS {name}; SET allow_experimental_database_iceberg=true; -CREATE DATABASE {name} ENGINE = DataLakeCatalog('{BASE_URL}', 'minio', '{minio_secret_key}') +CREATE DATABASE {name} ENGINE = {engine}('{BASE_URL}', 'minio', '{minio_secret_key}') SETTINGS {",".join((k+"="+repr(v) for k, v in settings.items()))} """ ) @@ -180,7 +182,8 @@ def started_cluster(): cluster.shutdown() -def test_list_tables(started_cluster): +@pytest.mark.parametrize("engine", AVAILABLE_ENGINES) +def test_list_tables(started_cluster, engine): node = started_cluster.instances["node1"] root_namespace = f"clickhouse_{uuid.uuid4()}" @@ -211,7 +214,7 @@ def test_list_tables(started_cluster): for namespace in [namespace_1, namespace_2]: assert len(catalog.list_tables(namespace)) == 0 - create_clickhouse_iceberg_database(started_cluster, node, CATALOG_NAME) + create_clickhouse_iceberg_database(started_cluster, node, CATALOG_NAME, engine=engine) tables_list = "" for table in namespace_1_tables: @@ -246,7 +249,8 @@ def test_list_tables(started_cluster): ) -def test_many_namespaces(started_cluster): +@pytest.mark.parametrize("engine", AVAILABLE_ENGINES) +def test_many_namespaces(started_cluster, engine): node = started_cluster.instances["node1"] root_namespace_1 = f"A_{uuid.uuid4()}" root_namespace_2 = f"B_{uuid.uuid4()}" @@ -267,7 +271,7 @@ def test_many_namespaces(started_cluster): for table in tables: create_table(catalog, namespace, table) - create_clickhouse_iceberg_database(started_cluster, node, CATALOG_NAME) + create_clickhouse_iceberg_database(started_cluster, node, CATALOG_NAME, engine=engine) for namespace in namespaces: for table in tables: @@ -279,7 +283,8 @@ def test_many_namespaces(started_cluster): ) -def test_select(started_cluster): +@pytest.mark.parametrize("engine", AVAILABLE_ENGINES) +def test_select(started_cluster, engine): node = started_cluster.instances["node1"] test_ref = f"test_list_tables_{uuid.uuid4()}" @@ -307,7 +312,7 @@ def test_select(started_cluster): df = pa.Table.from_pylist(data) table.append(df) - create_clickhouse_iceberg_database(started_cluster, node, CATALOG_NAME) + create_clickhouse_iceberg_database(started_cluster, node, CATALOG_NAME, engine=engine) expected = DEFAULT_CREATE_TABLE.format(CATALOG_NAME, namespace, table_name) assert expected == node.query( @@ -319,7 +324,8 @@ def test_select(started_cluster): ) -def test_hide_sensitive_info(started_cluster): +@pytest.mark.parametrize("engine", AVAILABLE_ENGINES) +def test_hide_sensitive_info(started_cluster, engine): node = started_cluster.instances["node1"] test_ref = f"test_hide_sensitive_info_{uuid.uuid4()}" @@ -337,6 +343,7 @@ def test_hide_sensitive_info(started_cluster): node, CATALOG_NAME, additional_settings={"catalog_credential": "SECRET_1"}, + engine=engine, ) assert "SECRET_1" not in node.query(f"SHOW CREATE DATABASE {CATALOG_NAME}") @@ -345,11 +352,13 @@ def test_hide_sensitive_info(started_cluster): node, CATALOG_NAME, additional_settings={"auth_header": "SECRET_2"}, + engine=engine, ) assert "SECRET_2" not in node.query(f"SHOW CREATE DATABASE {CATALOG_NAME}") -def test_tables_with_same_location(started_cluster): +@pytest.mark.parametrize("engine", AVAILABLE_ENGINES) +def test_tables_with_same_location(started_cluster, engine): node = started_cluster.instances["node1"] test_ref = f"test_tables_with_same_location_{uuid.uuid4()}" @@ -380,7 +389,7 @@ def record(key): df = pa.Table.from_pylist(data) table_2.append(df) - create_clickhouse_iceberg_database(started_cluster, node, CATALOG_NAME) + create_clickhouse_iceberg_database(started_cluster, node, CATALOG_NAME, engine=engine) assert 'aaa\naaa\naaa' == node.query(f"SELECT symbol FROM {CATALOG_NAME}.`{namespace}.{table_name}`").strip() assert 'bbb\nbbb\nbbb' == node.query(f"SELECT symbol FROM {CATALOG_NAME}.`{namespace}.{table_name_2}`").strip()