From dc1cb611873f66cc66ecce1b1c73efcb325d64ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=9A=93=E7=9A=9C?= Date: Thu, 15 Jan 2026 11:35:51 +0800 Subject: [PATCH 1/5] feat(catalog): Support db/tbl exists check --- include/paimon/catalog/catalog.h | 12 ++++++++++++ src/paimon/core/catalog/file_system_catalog.cpp | 6 ++++++ src/paimon/core/catalog/file_system_catalog.h | 3 ++- 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/include/paimon/catalog/catalog.h b/include/paimon/catalog/catalog.h index 31eb51a1..457ee5b8 100644 --- a/include/paimon/catalog/catalog.h +++ b/include/paimon/catalog/catalog.h @@ -95,6 +95,18 @@ class PAIMON_EXPORT Catalog { /// status. virtual Result> ListTables(const std::string& db_name) const = 0; + /// Checks whether a database with the specified name exists in the catalog. + /// + /// @param db_name The name of the database to check for existence. + /// @return A result containing true if the database exists, false otherwise, or an error status. + virtual Result DataBaseExists(const std::string& db_name) const = 0; + + /// Checks whether a table with the specified identifier exists in the catalog. + /// + /// @param identifier The identifier of the table to check for existence. + /// @return A result containing true if the table exists, false otherwise, or an error status. + virtual Result TableExists(const Identifier& identifier) const = 0; + /// Loads the latest schema of a specified table. /// /// @note System tables will not be supported. diff --git a/src/paimon/core/catalog/file_system_catalog.cpp b/src/paimon/core/catalog/file_system_catalog.cpp index 58aba0eb..24f41f55 100644 --- a/src/paimon/core/catalog/file_system_catalog.cpp +++ b/src/paimon/core/catalog/file_system_catalog.cpp @@ -87,6 +87,12 @@ Result FileSystemCatalog::DataBaseExists(const std::string& db_name) const return fs_->Exists(NewDatabasePath(warehouse_, db_name)); } +Result FileSystemCatalog::TableExists(const Identifier& identifier) const { + PAIMON_ASSIGN_OR_RAISE(std::optional> latest_schema, + TableSchemaExists(identifier)); + return latest_schema != std::nullopt; +} + Status FileSystemCatalog::CreateTable(const Identifier& identifier, ArrowSchema* c_schema, const std::vector& partition_keys, const std::vector& primary_keys, diff --git a/src/paimon/core/catalog/file_system_catalog.h b/src/paimon/core/catalog/file_system_catalog.h index 9b5ce747..04d09d00 100644 --- a/src/paimon/core/catalog/file_system_catalog.h +++ b/src/paimon/core/catalog/file_system_catalog.h @@ -49,6 +49,8 @@ class FileSystemCatalog : public Catalog { Result> ListDatabases() const override; Result> ListTables(const std::string& database_names) const override; + Result DataBaseExists(const std::string& db_name) const override; + Result TableExists(const Identifier& identifier) const override; Result> LoadTableSchema(const Identifier& identifier) const override; private: @@ -57,7 +59,6 @@ class FileSystemCatalog : public Catalog { static bool IsSystemDatabase(const std::string& db_name); static bool IsSpecifiedSystemTable(const Identifier& identifier); static bool IsSystemTable(const Identifier& identifier); - Result DataBaseExists(const std::string& db_name) const; Result>> TableSchemaExists( const Identifier& identifier) const; From 34b02c22c51a6ed9f570d47e635555c499b40c90 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=9A=93=E7=9A=9C?= Date: Thu, 15 Jan 2026 11:38:11 +0800 Subject: [PATCH 2/5] refactor(schema): Use TableSchema directly --- .../core/catalog/file_system_catalog.cpp | 3 +- src/paimon/core/schema/schema_impl.h | 74 ------------------- src/paimon/core/schema/table_schema.cpp | 9 +++ src/paimon/core/schema/table_schema.h | 26 ++++--- 4 files changed, 25 insertions(+), 87 deletions(-) delete mode 100644 src/paimon/core/schema/schema_impl.h diff --git a/src/paimon/core/catalog/file_system_catalog.cpp b/src/paimon/core/catalog/file_system_catalog.cpp index 24f41f55..f2b6815d 100644 --- a/src/paimon/core/catalog/file_system_catalog.cpp +++ b/src/paimon/core/catalog/file_system_catalog.cpp @@ -27,7 +27,6 @@ #include "paimon/common/utils/arrow/status_utils.h" #include "paimon/common/utils/path_util.h" #include "paimon/common/utils/string_utils.h" -#include "paimon/core/schema/schema_impl.h" #include "paimon/core/schema/schema_manager.h" #include "paimon/fs/file_system.h" #include "paimon/logging.h" @@ -222,7 +221,7 @@ Result> FileSystemCatalog::LoadTableSchema( if (!latest_schema) { return Status::NotExist(fmt::format("{} not exist", identifier.ToString())); } - return std::make_shared(*latest_schema); + return std::static_pointer_cast(*latest_schema); } } // namespace paimon diff --git a/src/paimon/core/schema/schema_impl.h b/src/paimon/core/schema/schema_impl.h deleted file mode 100644 index 409c84e3..00000000 --- a/src/paimon/core/schema/schema_impl.h +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright 2025-present Alibaba Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include - -#include "paimon/core/schema/table_schema.h" -#include "paimon/schema/schema.h" - -namespace paimon { - -class SchemaImpl : public Schema { - public: - explicit SchemaImpl(const std::shared_ptr& table_schema) - : table_schema_(table_schema) {} - - std::vector FieldNames() const override { - return table_schema_->FieldNames(); - } - int64_t Id() const override { - return table_schema_->Id(); - } - const std::vector& PrimaryKeys() const override { - return table_schema_->PrimaryKeys(); - } - const std::vector& PartitionKeys() const override { - return table_schema_->PartitionKeys(); - } - const std::vector& BucketKeys() const override { - return table_schema_->BucketKeys(); - } - int32_t NumBuckets() const override { - return table_schema_->NumBuckets(); - } - int32_t HighestFieldId() const override { - return table_schema_->HighestFieldId(); - } - const std::map& Options() const override { - return table_schema_->Options(); - } - std::optional Comment() const override { - return table_schema_->Comment(); - } - - Result> GetArrowSchema() const override { - const auto& fields = table_schema_->Fields(); - std::shared_ptr schema = DataField::ConvertDataFieldsToArrowSchema(fields); - auto arrow_schema = std::make_unique<::ArrowSchema>(); - PAIMON_RETURN_NOT_OK_FROM_ARROW(arrow::ExportSchema(*schema, arrow_schema.get())); - return arrow_schema; - } - - private: - std::shared_ptr table_schema_; -}; - -} // namespace paimon diff --git a/src/paimon/core/schema/table_schema.cpp b/src/paimon/core/schema/table_schema.cpp index 984215df..bd58f724 100644 --- a/src/paimon/core/schema/table_schema.cpp +++ b/src/paimon/core/schema/table_schema.cpp @@ -27,6 +27,7 @@ #include "fmt/format.h" #include "paimon/common/utils/arrow/status_utils.h" #include "paimon/common/utils/date_time_utils.h" +#include "paimon/common/utils/field_type_utils.h" #include "paimon/common/utils/object_utils.h" #include "paimon/common/utils/options_utils.h" #include "paimon/common/utils/rapidjson_util.h" @@ -169,6 +170,14 @@ bool TableSchema::operator==(const TableSchema& other) const { options_ == other.options_ && comment_ == other.comment_ && time_millis_ == other.time_millis_; } + +Result> TableSchema::GetArrowSchema() const { + auto schema = DataField::ConvertDataFieldsToArrowSchema(fields_); + auto c_schema = std::make_unique<::ArrowSchema>(); + PAIMON_RETURN_NOT_OK_FROM_ARROW(arrow::ExportSchema(*schema, c_schema.get())); + return c_schema; +} + std::vector TableSchema::FieldNames() const { std::vector field_names; field_names.reserve(fields_.size()); diff --git a/src/paimon/core/schema/table_schema.h b/src/paimon/core/schema/table_schema.h index 57542348..5818677a 100644 --- a/src/paimon/core/schema/table_schema.h +++ b/src/paimon/core/schema/table_schema.h @@ -27,6 +27,7 @@ #include "paimon/common/types/data_field.h" #include "paimon/common/utils/jsonizable.h" #include "paimon/result.h" +#include "paimon/schema/schema.h" #include "rapidjson/allocators.h" #include "rapidjson/document.h" #include "rapidjson/rapidjson.h" @@ -35,7 +36,7 @@ struct ArrowSchema; namespace paimon { /// Schema of a table, including schemaId and fieldId. -class TableSchema : public Jsonizable { +class TableSchema : public Schema, public Jsonizable { public: static constexpr int64_t FIRST_SCHEMA_ID = 0; static constexpr int32_t PAIMON_07_VERSION = 1; @@ -57,28 +58,31 @@ class TableSchema : public Jsonizable { bool operator==(const TableSchema& other) const; - std::vector FieldNames() const; - int64_t Id() const { + Result> GetArrowSchema() const override; + + std::vector FieldNames() const override; + + int64_t Id() const override { return id_; } - const std::vector& PrimaryKeys() const { + const std::vector& PrimaryKeys() const override { return primary_keys_; } - const std::vector& PartitionKeys() const { + const std::vector& PartitionKeys() const override { return partition_keys_; } - const std::vector& BucketKeys() const { + const std::vector& BucketKeys() const override { return bucket_keys_; } - int32_t NumBuckets() const { + int32_t NumBuckets() const override { return num_bucket_; } - int32_t HighestFieldId() const { + int32_t HighestFieldId() const override { return highest_field_id_; } - const std::map& Options() const { + const std::map& Options() const override { return options_; } const std::vector& Fields() const { @@ -92,7 +96,7 @@ class TableSchema : public Jsonizable { Result> GetFields(const std::vector& field_names) const; Result> TrimmedPrimaryKeys() const; - std::optional Comment() const { + std::optional Comment() const override { return comment_; } @@ -130,7 +134,7 @@ class TableSchema : public Jsonizable { std::vector partition_keys_; std::vector primary_keys_; std::vector bucket_keys_; - int32_t num_bucket_; + int32_t num_bucket_ = -1; std::map options_; std::optional comment_; int64_t time_millis_ = -1; From 827f55b67a166e06ab6a2807d1ac23abdfb8bca9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=9A=93=E7=9A=9C?= Date: Thu, 15 Jan 2026 15:16:03 +0800 Subject: [PATCH 3/5] feat(schema): Add GetJsonSchema for Schema interface --- include/paimon/schema/schema.h | 7 +++++++ src/paimon/core/schema/table_schema.h | 4 ++++ 2 files changed, 11 insertions(+) diff --git a/include/paimon/schema/schema.h b/include/paimon/schema/schema.h index f5ed04f1..66b232f1 100644 --- a/include/paimon/schema/schema.h +++ b/include/paimon/schema/schema.h @@ -39,6 +39,13 @@ class PAIMON_EXPORT Schema { /// @return A result containing an ArrowSchema, or an error status if conversion fails. virtual Result> GetArrowSchema() const = 0; + /// Get the JSON schema representation of this table schema. + /// + /// This method provides a JSON string that represents the complete schema information. + /// + /// @return A string containing the JSON schema, or an error status on failure. + virtual Result GetJsonSchema() const = 0; + /// Get the names of all fields in the table schema. /// @return A vector of field names. virtual std::vector FieldNames() const = 0; diff --git a/src/paimon/core/schema/table_schema.h b/src/paimon/core/schema/table_schema.h index 5818677a..a5b2a3bc 100644 --- a/src/paimon/core/schema/table_schema.h +++ b/src/paimon/core/schema/table_schema.h @@ -60,6 +60,10 @@ class TableSchema : public Schema, public Jsonizable { Result> GetArrowSchema() const override; + Result GetJsonSchema() const override { + return ToJsonString(); + } + std::vector FieldNames() const override; int64_t Id() const override { From 3b051aa82688369af8e7e03086540d0b7dfbe056 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=9A=93=E7=9A=9C?= Date: Thu, 15 Jan 2026 19:02:50 +0800 Subject: [PATCH 4/5] test(catalog): Test TableExists & GetJsonSchema --- .../core/catalog/file_system_catalog_test.cpp | 24 +++++++++++++------ 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/src/paimon/core/catalog/file_system_catalog_test.cpp b/src/paimon/core/catalog/file_system_catalog_test.cpp index cf28561b..f33c9997 100644 --- a/src/paimon/core/catalog/file_system_catalog_test.cpp +++ b/src/paimon/core/catalog/file_system_catalog_test.cpp @@ -134,12 +134,16 @@ TEST(FileSystemCatalogTest, TestCreateTable) { arrow::Schema typed_schema(fields); ::ArrowSchema schema; ASSERT_TRUE(arrow::ExportSchema(typed_schema, &schema).ok()); - ASSERT_OK(catalog.CreateTable(Identifier("db1", "tbl1"), &schema, + Identifier identifier("db1", "tbl1"); + ASSERT_OK_AND_ASSIGN(auto exist, catalog.TableExists(identifier)); + ASSERT_FALSE(exist); + + ASSERT_OK(catalog.CreateTable(identifier, &schema, /*partition_keys=*/{"f1", "f2"}, /*primary_keys=*/{"f3"}, options, false)); - ASSERT_OK_AND_ASSIGN(std::vector table_names, catalog.ListTables("db1")); - ASSERT_EQ(1, table_names.size()); - ASSERT_EQ(table_names[0], "tbl1"); + + ASSERT_OK_AND_ASSIGN(exist, catalog.TableExists(identifier)); + ASSERT_TRUE(exist); ArrowSchemaRelease(&schema); } @@ -349,13 +353,19 @@ TEST(FileSystemCatalogTest, TestValidateTableSchema) { std::vector expected_field_names = {"f0", "f1", "f2", "f3"}; ASSERT_EQ(field_names, expected_field_names); + ASSERT_OK_AND_ASSIGN(auto fs, FileSystemFactory::Get("local", dir->Str(), {})); + std::string schema_path = PathUtil::JoinPath(dir->Str(), "db1.db/tbl1/schema/schema-0"); + std::string expected_json_schema;; + ASSERT_OK(fs->ReadFile(schema_path, &expected_json_schema)); + + ASSERT_OK_AND_ASSIGN(auto json_schema, table_schema->GetJsonSchema()); + ASSERT_EQ(expected_json_schema, json_schema); + ASSERT_OK_AND_ASSIGN(auto arrow_schema, table_schema->GetArrowSchema()); auto loaded_schema = arrow::ImportSchema(arrow_schema.get()).ValueOrDie(); ASSERT_TRUE(typed_schema.Equals(loaded_schema)); - ASSERT_OK_AND_ASSIGN(auto fs, FileSystemFactory::Get("local", dir->Str(), {})); - ASSERT_OK(fs->Delete(PathUtil::JoinPath(dir->Str(), "db1.db/tbl1/schema/schema-0"))); - + ASSERT_OK(fs->Delete(schema_path)); ASSERT_NOK_WITH_MSG(catalog.LoadTableSchema(Identifier("db1", "tbl1")), "Identifier{database=\'db1\', table=\'tbl1\'} not exist"); From 71c6e9a5ad6c65af3cc706a3e9083bea1ec8c614 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=9A=93=E7=9A=9C?= Date: Sat, 17 Jan 2026 14:07:43 +0800 Subject: [PATCH 5/5] fix(format) --- include/paimon/catalog/catalog.h | 3 ++- src/paimon/core/catalog/file_system_catalog.cpp | 2 +- src/paimon/core/catalog/file_system_catalog_test.cpp | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/include/paimon/catalog/catalog.h b/include/paimon/catalog/catalog.h index 457ee5b8..c7801fcb 100644 --- a/include/paimon/catalog/catalog.h +++ b/include/paimon/catalog/catalog.h @@ -98,7 +98,8 @@ class PAIMON_EXPORT Catalog { /// Checks whether a database with the specified name exists in the catalog. /// /// @param db_name The name of the database to check for existence. - /// @return A result containing true if the database exists, false otherwise, or an error status. + /// @return A result containing true if the database exists, false otherwise, or an error + /// status. virtual Result DataBaseExists(const std::string& db_name) const = 0; /// Checks whether a table with the specified identifier exists in the catalog. diff --git a/src/paimon/core/catalog/file_system_catalog.cpp b/src/paimon/core/catalog/file_system_catalog.cpp index f2b6815d..a8bef047 100644 --- a/src/paimon/core/catalog/file_system_catalog.cpp +++ b/src/paimon/core/catalog/file_system_catalog.cpp @@ -88,7 +88,7 @@ Result FileSystemCatalog::DataBaseExists(const std::string& db_name) const Result FileSystemCatalog::TableExists(const Identifier& identifier) const { PAIMON_ASSIGN_OR_RAISE(std::optional> latest_schema, - TableSchemaExists(identifier)); + TableSchemaExists(identifier)); return latest_schema != std::nullopt; } diff --git a/src/paimon/core/catalog/file_system_catalog_test.cpp b/src/paimon/core/catalog/file_system_catalog_test.cpp index f33c9997..14467cf5 100644 --- a/src/paimon/core/catalog/file_system_catalog_test.cpp +++ b/src/paimon/core/catalog/file_system_catalog_test.cpp @@ -355,7 +355,7 @@ TEST(FileSystemCatalogTest, TestValidateTableSchema) { ASSERT_OK_AND_ASSIGN(auto fs, FileSystemFactory::Get("local", dir->Str(), {})); std::string schema_path = PathUtil::JoinPath(dir->Str(), "db1.db/tbl1/schema/schema-0"); - std::string expected_json_schema;; + std::string expected_json_schema; ASSERT_OK(fs->ReadFile(schema_path, &expected_json_schema)); ASSERT_OK_AND_ASSIGN(auto json_schema, table_schema->GetJsonSchema());