Skip to content

Commit 5e3d768

Browse files
committed
Mooncake|Posix
1 parent 1c0a081 commit 5e3d768

23 files changed

Lines changed: 2544 additions & 1 deletion

examples/ucm_mooncake_config.yaml

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
# UCM + Mooncake Configuration Example
2+
#
3+
# Prerequisites:
4+
# 1. mooncake master_service running: mooncake_master --port 50088
5+
# 2. libmooncakestore.so built and installed
6+
# 3. NPU device available
7+
#
8+
# Usage:
9+
# kv_connector_extra_config={"UCM_CONFIG_FILE": "/path/to/ucm_mooncake_config.yaml"}
10+
11+
ucm_connectors:
12+
- ucm_connector_name: "UcmPipelineStore"
13+
ucm_connector_config:
14+
store_pipeline: "Mooncake|Posix"
15+
local_hostname: "127.0.0.1"
16+
master_server_address: "127.0.0.1:50088"
17+
metadata_server: "P2PHANDSHAKE"
18+
protocol: "ascend"
19+
global_segment_size: 32212254720
20+
replica_num: 1
21+
local_buffer_size: 1073741824
22+
storage_backends: "/mnt/test"
23+
io_direct: false
24+
# device_name: "" # RDMA device, leave empty for ascend
25+
26+
# prerequisiteHandle requires event sync to ensure NPU compute finishes
27+
# before Mooncake reads device memory for dump.
28+
enable_event_sync: true
29+
30+
# Enable UCM metrics (optional)
31+
# metrics_config_path: "/workspace/unified-cache-management/examples/metrics/metrics_configs.yaml"
32+
33+
# Sparse attention (optional)
34+
# ucm_sparse_config:
35+
# GSAOnDevice: {}
36+
37+
# Whether to use layerwise loading/saving (optional, default: True for UCMConnector)
38+
use_layerwise: true

ucm/integration/vllm/ucm_connector.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -382,6 +382,7 @@ def register_kv_caches(self, kv_caches: dict[str, torch.Tensor]):
382382
)
383383

384384
self.store = self._create_store(self.kv_cache_layout, store_cores)
385+
self._register_kv_cache_memory()
385386

386387
if worker_cores:
387388
try:
@@ -393,6 +394,31 @@ def register_kv_caches(self, kv_caches: dict[str, torch.Tensor]):
393394
if self.device is None:
394395
raise RuntimeError(f"Unsupported device platform for UCMDirectConnector.")
395396

397+
def _register_kv_cache_memory(self):
398+
for layer_name, kv_layer in self.kv_caches.items():
399+
if isinstance(kv_layer, torch.Tensor):
400+
if kv_layer.dim() == 5:
401+
num_blocks = kv_layer.shape[1]
402+
block_size = kv_layer[0].shape[1:].numel() * kv_layer.element_size()
403+
total_size = num_blocks * block_size
404+
self.store.register_memory(kv_layer[0].data_ptr(), total_size)
405+
self.store.register_memory(kv_layer[1].data_ptr(), total_size)
406+
elif kv_layer.dim() == 3:
407+
num_blocks = kv_layer.shape[0]
408+
total_size = kv_layer.numel() * kv_layer.element_size()
409+
self.store.register_memory(kv_layer.data_ptr(), total_size)
410+
else:
411+
raise ValueError(
412+
f"Unsupported kv cache tensor shape: {kv_layer.shape}"
413+
)
414+
elif isinstance(kv_layer, Tuple):
415+
for tensor in kv_layer:
416+
total_size = tensor.numel() * tensor.element_size()
417+
self.store.register_memory(tensor.data_ptr(), total_size)
418+
else:
419+
raise TypeError(f"Unsupported kv cache type: {type(kv_layer)}")
420+
logger.info(f"Registered {len(self.kv_caches)} layers' kv cache memory")
421+
396422
def get_num_new_matched_tokens(
397423
self,
398424
request: "Request",

ucm/shared/vendor/helper.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ function(find_reachable_git_url OUT_REACHABLE_URL IN_URL_LIST)
1515
COMMAND ${GIT_EXECUTABLE} ls-remote --heads "${GIT_URL}"
1616
RESULT_VARIABLE GIT_RESULT
1717
OUTPUT_QUIET ERROR_QUIET
18-
TIMEOUT 5
18+
TIMEOUT 20
1919
)
2020
if(GIT_RESULT EQUAL 0)
2121
set(${OUT_REACHABLE_URL} ${GIT_URL} PARENT_SCOPE)

ucm/store/cache/cc/cache_store.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@ class CacheStore : public StoreV1 {
9999
if (s.Failure()) [[unlikely]] { UC_ERROR("Failed({}) to wait task({}).", s, taskId); }
100100
return s;
101101
}
102+
Status RegisterMemory(void* base_addr, size_t total_size) override { return Status::OK(); }
102103

103104
private:
104105
Config ParseConfig(const Detail::Dictionary& config)

ucm/store/empty/cc/empty_store.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ class EmptyStore : public StoreV1 {
4242
Expected<Detail::TaskHandle> Dump(Detail::TaskDesc task) { return NextId(); }
4343
Expected<bool> Check(Detail::TaskHandle taskId) { return true; }
4444
Status Wait(Detail::TaskHandle taskId) { return Status::OK(); }
45+
Status RegisterMemory(void* base_addr, size_t total_size) { return Status::OK(); }
4546

4647
private:
4748
static Detail::TaskHandle NextId() noexcept

ucm/store/fake/cc/fake_store.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ class FakeStore : public StoreV1 {
8282
}
8383
Expected<bool> Check(Detail::TaskHandle taskId) override { return true; }
8484
Status Wait(Detail::TaskHandle taskId) override { return Status::OK(); }
85+
Status RegisterMemory(void* base_addr, size_t total_size) override { return Status::OK(); }
8586

8687
private:
8788
static Detail::TaskHandle NextId() noexcept
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
find_path(ASCEND_ACL_INCLUDE_DIR NAMES acl/acl.h PATHS /usr/local/Ascend/ascend-toolkit/latest/include NO_DEFAULT_PATH)
2+
find_library(ASCEND_ACL_LIBRARY NAMES ascendcl PATHS /usr/local/Ascend/ascend-toolkit/latest/lib64 NO_DEFAULT_PATH)
3+
find_library(MOONCAKE_STORE_LIBRARY NAMES mooncake_store PATHS /vllm-workspace/Mooncake/mooncake-store/lib NO_DEFAULT_PATH)
4+
5+
if(ASCEND_ACL_INCLUDE_DIR AND ASCEND_ACL_LIBRARY AND MOONCAKE_STORE_LIBRARY)
6+
file(GLOB_RECURSE SOURCES "./cc/*.cc")
7+
add_library(mooncakestore SHARED ${SOURCES})
8+
9+
target_compile_features(mooncakestore PUBLIC cxx_std_20)
10+
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
11+
target_compile_options(mooncakestore PRIVATE -fcoroutines)
12+
endif()
13+
14+
target_compile_definitions(mooncakestore PRIVATE YLT_ENABLE_IBV SPDLOG_FMT_EXTERNAL)
15+
16+
if(NOT MOONCAKE_STORE_INCLUDE_DIR)
17+
set(MOONCAKE_STORE_INCLUDE_DIR "/vllm-workspace/Mooncake/mooncake-store/include")
18+
endif()
19+
20+
target_include_directories(mooncakestore PUBLIC
21+
${CMAKE_CURRENT_SOURCE_DIR}/cc
22+
${MOONCAKE_STORE_INCLUDE_DIR}
23+
${MOONCAKE_STORE_INCLUDE_DIR}/cachelib_memory_allocator
24+
${MOONCAKE_STORE_INCLUDE_DIR}/cachelib_memory_allocator/fake_include
25+
)
26+
target_include_directories(mooncakestore SYSTEM PUBLIC
27+
${MOONCAKE_STORE_INCLUDE_DIR}/cachelib_memory_allocator/include
28+
${ASCEND_ACL_INCLUDE_DIR}
29+
)
30+
31+
target_link_libraries(mooncakestore PUBLIC
32+
storeintf infra_logger fmt ${MOONCAKE_STORE_LIBRARY}
33+
${ASCEND_ACL_LIBRARY}
34+
)
35+
36+
file(RELATIVE_PATH INSTALL_REL_PATH ${UCM_ROOT_DIR} ${CMAKE_CURRENT_SOURCE_DIR})
37+
install(TARGETS mooncakestore LIBRARY DESTINATION ${INSTALL_REL_PATH} COMPONENT ucm)
38+
else()
39+
message(STATUS "mooncakestore: Skipping build - required Ascend/Mooncake dependencies not found")
40+
if(NOT ASCEND_ACL_INCLUDE_DIR)
41+
message(STATUS " * Missing: Ascend ACL headers (acl/acl.h)")
42+
endif()
43+
if(NOT ASCEND_ACL_LIBRARY)
44+
message(STATUS " * Missing: libascendcl.so library")
45+
endif()
46+
if(NOT MOONCAKE_STORE_LIBRARY)
47+
message(STATUS " * Missing: libmooncake_store.so library")
48+
endif()
49+
message(STATUS " Please ensure Ascend and Mooncake dependencies are installed or build paths are correct")
50+
endif()
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
/**
2+
* MIT License
3+
*
4+
* Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved.
5+
*
6+
* Permission is hereby granted, free of charge, to any person obtaining a copy
7+
* of this software and associated documentation files (the "Software"), to deal
8+
* in the Software without restriction, including without limitation the rights
9+
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10+
* copies of the Software, and to permit persons to whom the Software is
11+
* furnished to do so, subject to the following conditions:
12+
*
13+
* The above copyright notice and this permission notice shall be included in all
14+
* copies or substantial portions of the Software.
15+
*
16+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21+
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22+
* SOFTWARE.
23+
* */
24+
#ifndef UNIFIEDCACHE_MOONCAKE_STORE_CC_GLOBAL_CONFIG_H
25+
#define UNIFIEDCACHE_MOONCAKE_STORE_CC_GLOBAL_CONFIG_H
26+
27+
#include <cstdint>
28+
#include <string>
29+
#include <vector>
30+
31+
namespace UC {
32+
class StoreV1;
33+
}
34+
35+
namespace UC::MooncakeStore {
36+
37+
struct Config {
38+
std::string localHostname{};
39+
std::string metadataServer{"P2PHANDSHAKE"};
40+
std::string masterServerAddress{"127.0.0.1:50088"};
41+
std::string protocol{"ascend"};
42+
std::string deviceName{};
43+
44+
uint64_t globalSegmentSize{32212254720};
45+
uint64_t localBufferSize{1073741824};
46+
uint32_t replicaNum{1};
47+
bool withSoftPin{false};
48+
49+
std::vector<uint64_t> tensorSizeList{};
50+
51+
int32_t deviceId{-1};
52+
uint32_t loadWorkerNum{32};
53+
uint32_t dumpWorkerNum{32};
54+
uint32_t dumpGetBufferNum{4};
55+
uint32_t dumpSubmitNum{4};
56+
uint32_t dumpWaitNum{4};
57+
uint32_t loadWaitNum{4};
58+
uint32_t loadPutNum{4};
59+
uint32_t loadGetNum{4};
60+
uint32_t hostBufPoolSize{8192};
61+
62+
StoreV1* storeBackend{nullptr};
63+
};
64+
65+
} // namespace UC::MooncakeStore
66+
67+
#endif
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
/**
2+
* MIT License
3+
*
4+
* Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved.
5+
*
6+
* Permission is hereby granted, free of charge, to any person obtaining a copy
7+
* of this software and associated documentation files (the "Software"), to deal
8+
* in the Software without restriction, including without limitation the rights
9+
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10+
* copies of the Software, and to permit persons to whom the Software is
11+
* furnished to do so, subject to the following conditions:
12+
*
13+
* The above copyright notice and this permission notice shall be included in all
14+
* copies or substantial portions of the Software.
15+
*
16+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21+
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22+
* SOFTWARE.
23+
* */
24+
#ifndef UNIFIEDCACHE_MOONCAKE_STORE_CC_HOST_BUFFER_POOL_H
25+
#define UNIFIEDCACHE_MOONCAKE_STORE_CC_HOST_BUFFER_POOL_H
26+
27+
#include <chrono>
28+
#include <condition_variable>
29+
#include <cstddef>
30+
#include <memory>
31+
#include <mutex>
32+
#include "thread/index_pool.h"
33+
34+
namespace UC::MooncakeStore {
35+
36+
class HostBufferPool {
37+
public:
38+
HostBufferPool() = default;
39+
~HostBufferPool() = default;
40+
41+
void Setup(uint32_t count, size_t unitSize)
42+
{
43+
if (count == 0 || unitSize == 0) { return; }
44+
size_t totalSize = static_cast<size_t>(count) * unitSize;
45+
if (totalSize / unitSize != count) { return; }
46+
unitSize_ = unitSize;
47+
count_ = count;
48+
pool_ = std::make_unique<char[]>(totalSize);
49+
index_.Setup(count);
50+
}
51+
52+
void* Acquire()
53+
{
54+
if (!pool_ || unitSize_ == 0) { return nullptr; }
55+
auto idx = index_.Acquire();
56+
if (idx == IndexPool::npos) { return nullptr; }
57+
return pool_.get() + static_cast<size_t>(idx) * unitSize_;
58+
}
59+
60+
void* AcquireWithTimeout(std::chrono::milliseconds timeout)
61+
{
62+
if (!pool_ || unitSize_ == 0) { return nullptr; }
63+
auto idx = index_.Acquire();
64+
if (idx != IndexPool::npos) { return pool_.get() + static_cast<size_t>(idx) * unitSize_; }
65+
std::unique_lock<std::mutex> lk(cvMtx_);
66+
auto deadline = std::chrono::steady_clock::now() + timeout;
67+
while (true) {
68+
cv_.wait_until(lk, deadline);
69+
idx = index_.Acquire();
70+
if (idx != IndexPool::npos) {
71+
return pool_.get() + static_cast<size_t>(idx) * unitSize_;
72+
}
73+
if (std::chrono::steady_clock::now() >= deadline) { return nullptr; }
74+
}
75+
}
76+
77+
void Release(void* buf)
78+
{
79+
if (!buf || !pool_ || unitSize_ == 0) { return; }
80+
auto offset = static_cast<char*>(buf) - pool_.get();
81+
if (offset < 0 || static_cast<size_t>(offset) >= static_cast<size_t>(count_) * unitSize_) {
82+
return;
83+
}
84+
if (static_cast<size_t>(offset) % unitSize_ != 0) { return; }
85+
auto idx = static_cast<IndexPool::Index>(static_cast<size_t>(offset) / unitSize_);
86+
index_.Release(idx);
87+
cv_.notify_one();
88+
}
89+
90+
size_t UnitSize() const { return unitSize_; }
91+
uint32_t Count() const { return count_; }
92+
93+
private:
94+
std::unique_ptr<char[]> pool_;
95+
size_t unitSize_{0};
96+
uint32_t count_{0};
97+
IndexPool index_;
98+
std::mutex cvMtx_;
99+
std::condition_variable cv_;
100+
};
101+
102+
} // namespace UC::MooncakeStore
103+
104+
#endif

0 commit comments

Comments
 (0)