From 062b862f61aeee7c50a31037c3eb44b3083d9e23 Mon Sep 17 00:00:00 2001 From: "yonghao.fyh" Date: Mon, 5 Jan 2026 11:53:06 +0800 Subject: [PATCH 1/4] feat: support map to/from json string and string utils replacelast --- src/paimon/CMakeLists.txt | 1 + src/paimon/common/utils/rapidjson_util.cpp | 55 +++++++++++++++++++ src/paimon/common/utils/rapidjson_util.h | 40 +++++++++----- .../common/utils/rapidjson_util_test.cpp | 12 ++++ src/paimon/common/utils/string_utils.cpp | 10 ++++ src/paimon/common/utils/string_utils.h | 3 + src/paimon/common/utils/string_utils_test.cpp | 22 ++++++++ 7 files changed, 128 insertions(+), 15 deletions(-) create mode 100644 src/paimon/common/utils/rapidjson_util.cpp diff --git a/src/paimon/CMakeLists.txt b/src/paimon/CMakeLists.txt index 82d516c0..af9fe154 100644 --- a/src/paimon/CMakeLists.txt +++ b/src/paimon/CMakeLists.txt @@ -105,6 +105,7 @@ set(PAIMON_COMMON_SRCS common/utils/range.cpp common/utils/roaring_bitmap32.cpp common/utils/roaring_bitmap64.cpp + common/utils/rapidjson_util.cpp common/utils/status.cpp common/utils/string_utils.cpp) diff --git a/src/paimon/common/utils/rapidjson_util.cpp b/src/paimon/common/utils/rapidjson_util.cpp new file mode 100644 index 00000000..be260ff8 --- /dev/null +++ b/src/paimon/common/utils/rapidjson_util.cpp @@ -0,0 +1,55 @@ +/* + * Copyright 2026-present Alibaba Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "paimon/common/utils/rapidjson_util.h" + +namespace paimon { + +std::string RapidJsonUtil::MapToJsonString(const std::map& map) { + rapidjson::Document d; + d.SetObject(); + rapidjson::Document::AllocatorType& allocator = d.GetAllocator(); + + for (const auto& kv : map) { + d.AddMember(rapidjson::Value(kv.first.c_str(), allocator), + rapidjson::Value(kv.second.c_str(), allocator), allocator); + } + + rapidjson::StringBuffer buffer; + rapidjson::Writer writer(buffer); + d.Accept(writer); + + return buffer.GetString(); +} + +Result> RapidJsonUtil::MapFromJsonString( + const std::string& json_str) { + rapidjson::Document doc; + doc.Parse(json_str.c_str()); + if (doc.HasParseError() || !doc.IsObject()) { + return Status::Invalid("deserialize failed: parse error or not JSON object: ", json_str); + } + + std::map result; + for (auto it = doc.MemberBegin(); it != doc.MemberEnd(); ++it) { + if (it->name.IsString() && it->value.IsString()) { + result[it->name.GetString()] = it->value.GetString(); + } + } + return result; +} + +} // namespace paimon diff --git a/src/paimon/common/utils/rapidjson_util.h b/src/paimon/common/utils/rapidjson_util.h index 08824661..0b583d63 100644 --- a/src/paimon/common/utils/rapidjson_util.h +++ b/src/paimon/common/utils/rapidjson_util.h @@ -35,16 +35,12 @@ #include "rapidjson/writer.h" namespace paimon { + class RapidJsonUtil { public: RapidJsonUtil() = delete; ~RapidJsonUtil() = delete; - // supports vector and map and optional, if T is custom type, T must have ToJson() - // noted that rapidjson does not support map with non string key (will - // trigger assert in rapidjson: Assertion `name.IsString()' failed) - // therefore, RapidJsonUtil convert key to string in serialize and convert string to key type in - // deserialize template static inline Status ToJsonString(const T& obj, std::string* json_str) { rapidjson::Document doc; @@ -53,6 +49,9 @@ class RapidJsonUtil { try { if constexpr (is_pointer::value) { value = obj->ToJson(&allocator); + } else if constexpr (std::is_same_v>) { + *json_str = MapToJsonString(obj); + return Status::OK(); } else { value = obj.ToJson(&allocator); } @@ -67,19 +66,26 @@ class RapidJsonUtil { return Status::OK(); } - // supports vector and map, if T is custom type, T must have FromJson() template static inline Status FromJsonString(const std::string& json_str, T* obj) { - rapidjson::Document doc; - if (!obj || !FromJson(json_str, &doc)) { - return Status::Invalid("deserialize failed: ", json_str); + if (!obj) { + return Status::Invalid("deserialize failed: obj is nullptr"); } - try { - obj->FromJson(doc); - } catch (const std::invalid_argument& e) { - return Status::Invalid("deserialize failed, possibly type incompatible: ", e.what()); - } catch (...) { - return Status::Invalid("deserialize failed, reason unknown: ", json_str); + if constexpr (std::is_same_v>) { + PAIMON_ASSIGN_OR_RAISE(*obj, MapFromJsonString(json_str)); + } else { + rapidjson::Document doc; + if (!obj || !FromJson(json_str, &doc)) { + return Status::Invalid("deserialize failed: ", json_str); + } + try { + obj->FromJson(doc); + } catch (const std::invalid_argument& e) { + return Status::Invalid("deserialize failed, possibly type incompatible: ", + e.what()); + } catch (...) { + return Status::Invalid("deserialize failed, reason unknown: ", json_str); + } } return Status::OK(); } @@ -140,6 +146,10 @@ class RapidJsonUtil { template static T GetValue(const rapidjson::Value& value); + + static std::string MapToJsonString(const std::map& map); + static Result> MapFromJsonString( + const std::string& json_str); }; template diff --git a/src/paimon/common/utils/rapidjson_util_test.cpp b/src/paimon/common/utils/rapidjson_util_test.cpp index 57d09471..b143653b 100644 --- a/src/paimon/common/utils/rapidjson_util_test.cpp +++ b/src/paimon/common/utils/rapidjson_util_test.cpp @@ -22,6 +22,7 @@ #include #include "gtest/gtest.h" +#include "paimon/testing/utils/testharness.h" #include "rapidjson/allocators.h" #include "rapidjson/document.h" #include "rapidjson/rapidjson.h" @@ -129,4 +130,15 @@ TEST(RapidJsonUtilTest, TestSerializeAndDeserialize) { ASSERT_EQ(2.333, non_exist_value); } +TEST(RapidJsonUtilTest, TestMapJsonString) { + std::map m1 = {{"key1", "value1"}, {"key2", "value2"}}; + std::string result; + ASSERT_OK(RapidJsonUtil::ToJsonString(m1, &result)); + ASSERT_EQ(result, "{\"key1\":\"value1\",\"key2\":\"value2\"}"); + + std::map m2; + ASSERT_OK(RapidJsonUtil::FromJsonString(result, &m2)); + ASSERT_EQ(m1, m2); +} + } // namespace paimon::test diff --git a/src/paimon/common/utils/string_utils.cpp b/src/paimon/common/utils/string_utils.cpp index c387c024..88312d03 100644 --- a/src/paimon/common/utils/string_utils.cpp +++ b/src/paimon/common/utils/string_utils.cpp @@ -40,6 +40,16 @@ std::string StringUtils::Replace(const std::string& text, const std::string& sea return str; } +std::string StringUtils::ReplaceLast(const std::string& text, const std::string& old_str, + const std::string& new_str) { + std::string str = text; + size_t pos = str.rfind(old_str); + if (pos != std::string::npos) { + str.replace(pos, old_str.size(), new_str); + } + return str; +} + bool StringUtils::StartsWith(const std::string& str, const std::string& prefix, size_t start_pos) { return (str.size() >= prefix.size()) && (str.compare(start_pos, prefix.size(), prefix) == 0); } diff --git a/src/paimon/common/utils/string_utils.h b/src/paimon/common/utils/string_utils.h index 0f9f478f..f0b01090 100644 --- a/src/paimon/common/utils/string_utils.h +++ b/src/paimon/common/utils/string_utils.h @@ -98,6 +98,9 @@ class PAIMON_EXPORT StringUtils { static std::string Replace(const std::string& text, const std::string& search_string, const std::string& replacement, int32_t max); + static std::string ReplaceLast(const std::string& text, const std::string& old_str, + const std::string& new_str); + static bool StartsWith(const std::string& str, const std::string& prefix, size_t start_pos = 0); static bool EndsWith(const std::string& str, const std::string& suffix); diff --git a/src/paimon/common/utils/string_utils_test.cpp b/src/paimon/common/utils/string_utils_test.cpp index 3b421f76..18bb44e7 100644 --- a/src/paimon/common/utils/string_utils_test.cpp +++ b/src/paimon/common/utils/string_utils_test.cpp @@ -113,6 +113,28 @@ TEST_F(StringUtilsTest, TestReplaceAll) { } } +TEST_F(StringUtilsTest, TestReplaceLast) { + { + std::string origin = "a/b/c//"; + std::string expect = "a/b/c/_"; + std::string actual = StringUtils::ReplaceLast(origin, "/", "_"); + ASSERT_EQ(expect, actual); + } + { + std::string origin = "a/b/c//"; + std::string expect = "a/b/c//"; + std::string actual = StringUtils::ReplaceLast(origin, "_", "/"); + ASSERT_EQ(expect, actual); + } + + { + std::string origin = "how is is you"; + std::string expect = "how is are you"; + std::string actual = StringUtils::ReplaceLast(origin, "is", "are"); + ASSERT_EQ(expect, actual); + } +} + TEST_F(StringUtilsTest, TestReplaceWithMaxCount) { { std::string origin = "how is is you"; From cf166b481a2feaa1558cae97a022a59c97ae8873 Mon Sep 17 00:00:00 2001 From: "yonghao.fyh" Date: Mon, 5 Jan 2026 13:26:56 +0800 Subject: [PATCH 2/4] fix --- src/paimon/common/utils/rapidjson_util.cpp | 6 ++++-- src/paimon/common/utils/rapidjson_util.h | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/paimon/common/utils/rapidjson_util.cpp b/src/paimon/common/utils/rapidjson_util.cpp index be260ff8..aa2233c0 100644 --- a/src/paimon/common/utils/rapidjson_util.cpp +++ b/src/paimon/common/utils/rapidjson_util.cpp @@ -45,9 +45,11 @@ Result> RapidJsonUtil::MapFromJsonString( std::map result; for (auto it = doc.MemberBegin(); it != doc.MemberEnd(); ++it) { - if (it->name.IsString() && it->value.IsString()) { - result[it->name.GetString()] = it->value.GetString(); + if (!it->name.IsString() || !it->value.IsString()) { + return Status::Invalid("deserialize failed: non-string key or value in JSON object: ", + json_str); } + result[it->name.GetString()] = it->value.GetString(); } return result; } diff --git a/src/paimon/common/utils/rapidjson_util.h b/src/paimon/common/utils/rapidjson_util.h index 0b583d63..4db2fe50 100644 --- a/src/paimon/common/utils/rapidjson_util.h +++ b/src/paimon/common/utils/rapidjson_util.h @@ -75,7 +75,7 @@ class RapidJsonUtil { PAIMON_ASSIGN_OR_RAISE(*obj, MapFromJsonString(json_str)); } else { rapidjson::Document doc; - if (!obj || !FromJson(json_str, &doc)) { + if (!FromJson(json_str, &doc)) { return Status::Invalid("deserialize failed: ", json_str); } try { From bae81709bd359d286c509fee80000e540e3237c7 Mon Sep 17 00:00:00 2001 From: "yonghao.fyh" Date: Mon, 5 Jan 2026 14:52:48 +0800 Subject: [PATCH 3/4] fix --- src/paimon/common/utils/rapidjson_util.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/paimon/common/utils/rapidjson_util.h b/src/paimon/common/utils/rapidjson_util.h index 4db2fe50..6159039e 100644 --- a/src/paimon/common/utils/rapidjson_util.h +++ b/src/paimon/common/utils/rapidjson_util.h @@ -41,6 +41,7 @@ class RapidJsonUtil { RapidJsonUtil() = delete; ~RapidJsonUtil() = delete; + // if T is custom type, T must have ToJson() template static inline Status ToJsonString(const T& obj, std::string* json_str) { rapidjson::Document doc; @@ -66,6 +67,7 @@ class RapidJsonUtil { return Status::OK(); } + // if T is custom type, T must have FromJson() template static inline Status FromJsonString(const std::string& json_str, T* obj) { if (!obj) { From 88f44e1c854409a87061873581b95476d9b76465 Mon Sep 17 00:00:00 2001 From: "yonghao.fyh" Date: Mon, 5 Jan 2026 15:07:22 +0800 Subject: [PATCH 4/4] fix --- src/paimon/CMakeLists.txt | 1 - src/paimon/common/utils/rapidjson_util.cpp | 57 ---------------------- src/paimon/common/utils/rapidjson_util.h | 36 +++++++++++++- 3 files changed, 34 insertions(+), 60 deletions(-) delete mode 100644 src/paimon/common/utils/rapidjson_util.cpp diff --git a/src/paimon/CMakeLists.txt b/src/paimon/CMakeLists.txt index af9fe154..82d516c0 100644 --- a/src/paimon/CMakeLists.txt +++ b/src/paimon/CMakeLists.txt @@ -105,7 +105,6 @@ set(PAIMON_COMMON_SRCS common/utils/range.cpp common/utils/roaring_bitmap32.cpp common/utils/roaring_bitmap64.cpp - common/utils/rapidjson_util.cpp common/utils/status.cpp common/utils/string_utils.cpp) diff --git a/src/paimon/common/utils/rapidjson_util.cpp b/src/paimon/common/utils/rapidjson_util.cpp deleted file mode 100644 index aa2233c0..00000000 --- a/src/paimon/common/utils/rapidjson_util.cpp +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright 2026-present Alibaba Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "paimon/common/utils/rapidjson_util.h" - -namespace paimon { - -std::string RapidJsonUtil::MapToJsonString(const std::map& map) { - rapidjson::Document d; - d.SetObject(); - rapidjson::Document::AllocatorType& allocator = d.GetAllocator(); - - for (const auto& kv : map) { - d.AddMember(rapidjson::Value(kv.first.c_str(), allocator), - rapidjson::Value(kv.second.c_str(), allocator), allocator); - } - - rapidjson::StringBuffer buffer; - rapidjson::Writer writer(buffer); - d.Accept(writer); - - return buffer.GetString(); -} - -Result> RapidJsonUtil::MapFromJsonString( - const std::string& json_str) { - rapidjson::Document doc; - doc.Parse(json_str.c_str()); - if (doc.HasParseError() || !doc.IsObject()) { - return Status::Invalid("deserialize failed: parse error or not JSON object: ", json_str); - } - - std::map result; - for (auto it = doc.MemberBegin(); it != doc.MemberEnd(); ++it) { - if (!it->name.IsString() || !it->value.IsString()) { - return Status::Invalid("deserialize failed: non-string key or value in JSON object: ", - json_str); - } - result[it->name.GetString()] = it->value.GetString(); - } - return result; -} - -} // namespace paimon diff --git a/src/paimon/common/utils/rapidjson_util.h b/src/paimon/common/utils/rapidjson_util.h index 6159039e..7a6c4400 100644 --- a/src/paimon/common/utils/rapidjson_util.h +++ b/src/paimon/common/utils/rapidjson_util.h @@ -149,9 +149,41 @@ class RapidJsonUtil { template static T GetValue(const rapidjson::Value& value); - static std::string MapToJsonString(const std::map& map); + static std::string MapToJsonString(const std::map& map) { + rapidjson::Document d; + d.SetObject(); + rapidjson::Document::AllocatorType& allocator = d.GetAllocator(); + + for (const auto& kv : map) { + d.AddMember(rapidjson::Value(kv.first.c_str(), allocator), + rapidjson::Value(kv.second.c_str(), allocator), allocator); + } + + rapidjson::StringBuffer buffer; + rapidjson::Writer writer(buffer); + d.Accept(writer); + + return buffer.GetString(); + } static Result> MapFromJsonString( - const std::string& json_str); + const std::string& json_str) { + rapidjson::Document doc; + doc.Parse(json_str.c_str()); + if (doc.HasParseError() || !doc.IsObject()) { + return Status::Invalid("deserialize failed: parse error or not JSON object: ", + json_str); + } + + std::map result; + for (auto it = doc.MemberBegin(); it != doc.MemberEnd(); ++it) { + if (!it->name.IsString() || !it->value.IsString()) { + return Status::Invalid( + "deserialize failed: non-string key or value in JSON object: ", json_str); + } + result[it->name.GetString()] = it->value.GetString(); + } + return result; + } }; template