taco-project
diff --git a/‎.github/workflows/publish.yml‎
Lines changed: 73 additions & 0 deletions b/‎.github/workflows/publish.yml‎
Lines changed: 73 additions & 0 deletions
diff --git a/‎.github/workflows/scripts/cuda-install.sh‎
Lines changed: 24 additions & 0 deletions b/‎.github/workflows/scripts/cuda-install.sh‎
Lines changed: 24 additions & 0 deletions
diff --git a/‎.github/workflows/scripts/env.sh‎
Lines changed: 21 additions & 0 deletions b/‎.github/workflows/scripts/env.sh‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎.github/workflows/scripts/pytorch-install.sh‎
Lines changed: 16 additions & 0 deletions b/‎.github/workflows/scripts/pytorch-install.sh‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎.gitignore‎
Lines changed: 3 additions & 0 deletions b/‎.gitignore‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎CHANGELOG.md‎
Lines changed: 30 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 30 additions & 0 deletions
diff --git a/‎CONTRIBUTING.md‎
Lines changed: 13 additions & 0 deletions b/‎CONTRIBUTING.md‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 3 additions & 16 deletions b/‎README.md‎
Lines changed: 3 additions & 16 deletions
diff --git a/‎README_zh.md‎
Lines changed: 2 additions & 15 deletions b/‎README_zh.md‎
Lines changed: 2 additions & 15 deletions
diff --git a/‎VERSION‎
Lines changed: 1 addition & 0 deletions b/‎VERSION‎
Lines changed: 1 addition & 0 deletions
@@ -0,0 +1,73 @@
+# This workflow will upload a Python Package to Release asset
+# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions
+# Copied from vLLM github actions https://github.com/vllm-project/vllm/blob/main/.github/workflows/publish.yml
+name: flexkv ci
+
+on:
+  pull_request:
+    branches: [ "main", "dev"]
+  push:
+    branches: [ "main", "dev"]
+
+# Needed to create wheel and upload assets
+permissions:
+  contents: write
+
+jobs:
+  build:
+    name: Build Wheel
+    runs-on: ${{ matrix.os }}
+
+    strategy:
+      fail-fast: false
+      matrix:
+          os: ['ubuntu-22.04']
+          python-version: ['3.10']
+          pytorch-version: ['2.6.0']
+          cuda-version: ['12.4']
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Set up Linux Env
+        if: ${{ runner.os == 'Linux' }}
+        run: |
+          bash -x .github/workflows/scripts/env.sh
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+            python-version: ${{ matrix.python-version }}
+            cache: 'pip'
+
+      - name: Install CUDA ${{ matrix.cuda-version }}
+        run: |
+          bash -x .github/workflows/scripts/cuda-install.sh ${{ matrix.cuda-version }} ${{ matrix.os }}
+
+      - name: Install PyTorch ${{ matrix.pytorch-version }} with CUDA ${{ matrix.cuda-version }}
+        run: |
+          bash -x .github/workflows/scripts/pytorch-install.sh ${{ matrix.python-version }} ${{ matrix.pytorch-version }} ${{ matrix.cuda-version }}
+
+      - name: Build wheel
+        shell: bash
+        env:
+          TORCH_CUDA_ARCH_LIST: "8.9 9.0+PTX"
+          MAX_JOBS: 4
+        run: |
+          ./build.sh --release
+
+      - name: Get Date and Time
+        run: |
+          echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_ENV
+          echo "time=$(date +'%H-%M-%S')" >> $GITHUB_ENV
+
+      - name: Upload to cos
+        uses: shallwefootball/s3-upload-action@master
+        with:
+          aws_key_id: ${{ secrets.COS_SECRET_ID }}
+          aws_secret_access_key: ${{ secrets.COS_SECRET_KEY }}
+          aws_bucket: ${{ secrets.COS_BUCKET }}
+          endpoint: ${{ secrets.COS_ENDPOINT }}
+          source_dir: dist
+          destination_dir: flexkv/${{ env.date }}/${{ env.time }}
@@ -0,0 +1,24 @@
+#!/bin/bash
+# Copied from vLLM github actions https://github.com/vllm-project/vllm/blob/main/.github/workflows/scripts/cuda-install.sh
+
+# Replace '.' with '-' ex: 11.8 -> 11-8
+cuda_version=$(echo "$1" | tr "." "-")
+# Removes '-' and '.' ex: ubuntu-20.04 -> ubuntu2004
+OS=$(echo "$2" | tr -d ".\-")
+
+# Installs CUDA
+wget -nv "https://developer.download.nvidia.com/compute/cuda/repos/${OS}/x86_64/cuda-keyring_1.1-1_all.deb"
+sudo dpkg -i cuda-keyring_1.1-1_all.deb
+rm cuda-keyring_1.1-1_all.deb
+sudo apt -qq update
+sudo apt -y install "cuda-${cuda_version}" "cuda-nvcc-${cuda_version}" "cuda-libraries-dev-${cuda_version}"
+sudo apt clean
+
+# Test nvcc
+PATH=/usr/local/cuda-$1/bin:${PATH}
+nvcc --version
+
+# Log gcc, g++, c++ versions
+gcc --version
+g++ --version
+c++ --version
@@ -0,0 +1,21 @@
+#!/bin/bash
+# Copied from vLLM github actions https://github.com/vllm-project/vllm/blob/main/.github/workflows/scripts/env.sh
+
+# This file installs common linux environment tools
+
+export LANG=C.UTF-8
+
+sudo    apt-get update && \
+sudo    apt-get install -y --no-install-recommends \
+        software-properties-common
+
+sudo    apt-get install -y --no-install-recommends \
+        build-essential \
+        liburing-dev \
+        git \
+        cmake
+
+# Remove github bloat files to free up disk space
+sudo rm -rf "/usr/local/share/boost"
+sudo rm -rf "$AGENT_TOOLSDIRECTORY"
+sudo rm -rf "/usr/share/dotnet"
@@ -0,0 +1,16 @@
+#!/bin/bash
+# Copied from vLLM github actions https://github.com/vllm-project/vllm/blob/main/.github/workflows/scripts/pytorch-install.sh
+
+python_executable=python$1
+pytorch_version=$2
+cuda_version=$3
+
+# Install torch
+$python_executable -m pip install numpy ninja cython wheel typing typing-extensions dataclasses setuptools && conda clean -ya
+$python_executable -m pip install torch=="${pytorch_version}+cu${cuda_version//./}" --extra-index-url "https://download.pytorch.org/whl/cu${cuda_version//./}"
+
+# Print version information
+$python_executable --version
+$python_executable -c "import torch; print('PyTorch:', torch.__version__)"
+$python_executable -c "import torch; print('CUDA:', torch.version.cuda)"
+$python_executable -c "from torch.utils import cpp_extension; print (cpp_extension.CUDA_HOME)"
@@ -70,3 +70,6 @@ cover/
 
 # mypy
 .mypy_cache/
+
+# VSCode
+.vscode/
@@ -0,0 +1,30 @@
+# Changelog
+
+All notable changes to this project will be documented in this file.
+
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
+and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+
+## [Unreleased]
+
+## [1.0.0] - 2025-09-11
+
+### Added
+- C++ radix tree for fast match, need set "index_accel": true in cache_config
+- sync kernel launch
+- a huge change that move cache engine to a library for accelerator(vLLM e.g.) to use instead of server-client mode.
+  This accelerate the get and put when no KVCache is matched. This version includes breaking API changes and is not backward compatible. 
+- add evict_ratio, need set "evict_ratio": 0.05 in cache_config
+- reducing the bubble inner the launch kernel
+- add vLLM 0.10.1.1 adapter
+
+### Fixed
+- cython release package
+
+
+## [0.1.0] - 2025-08-29
+
+### Init
+- init version
+- add license
+
@@ -0,0 +1,13 @@
+# Contributing to Mooncake
+
+Thank you for your interest in contributing to FlexKV!
+
+## PR Title and Classification
+Use a prefixed PR title to indicate the type of changes. Please use one of the following:
+
+- `[bugfix]` for bugfixes  
+- `[feature]` for new features  
+- `[test]` for test cases  
+- `[ci/build]` for build or continuous integration improvements  
+- `[doc]` for documentation fixes  
+- `[misc]` for PRs that do not fit the above categories. Please use this sparingly.
@@ -14,23 +14,9 @@ FlexKV is released under the **Apache-2.0 License**. See the [LICENSE](LICENSE)
 ./build.sh
 ```
 
-### Use FlexKV with vLLM (v0.8.4)
+### Use FlexKV with vLLM
 
-Apply the patch `examples/vllm_adaption/flexkv_vllm_0_8_4.patch` to vLLM 0.8.4, then start FlexKV, vLLM, and the benchmark script:
-
-```bash
-# Start FlexKV as server
-bash benchmarks/flexkv_benchmark/run_flexkv_server.sh
-
-# Start vLLM as client
-bash benchmarks/flexkv_benchmark/serving_vllm.sh
-
-# Start benchmark
-bash benchmarks/flexkv_benchmark/multiturn_benchmark.sh
-```
-Apply the patch `examples/vllm_adaption/flexkv_vllm_0_10_0.patch` to vLLM 0.10.0, and use the same testing method as above.
-
-> **Note**: The current script is only compatible with the `main` branch. Support for the latest features in the `dev` branch is under development.
+See [docs/vllm_adapter/README_en.md](docs/vllm_adapter/README_en.md)
 
 ## Design Architecture
 
@@ -88,6 +74,7 @@ FlexKV performs:
 - The main branch is the stable branch, which maintains already tested commits. Please pull from main branch if you need stable code.
 - The dev branch is the development branch, which contains newer features. Please branch from and merge into dev if you need new features or are developing new functionality.
 - The bugfix branch is for bug fixes, maintaining urgent bugs that need immediate resolution or documentation that requires prompt updates. If you need to fix a bug or update documentation urgently, please branch from and merge into the bugfix branch.
+- The stable branch refers to the previous main branch state, intended only for rollback or extremely conservative use cases (e.g., production deployment). Its use is discouraged.
 
 ## Roadmap
 
 
@@ -16,21 +16,7 @@ FlexKV 采用 **Apache-2.0 开源协议**，详细信息请参见 [LICENSE](LICE
 
 ### 以 vLLM 为例使用 FlexKV
 
-在 vLLM 0.8.4 版本中应用patch `examples/vllm_adaption/flexkv_vllm_0_8_4.patch`，分别启动 FlexKV、vLLM 和测试脚本：
-
-```bash
-# 启动 FlexKV 作为服务端
-bash benchmarks/flexkv_benchmark/run_flexkv_server.sh
-
-# 启动 vLLM 作为客户端
-bash benchmarks/flexkv_benchmark/serving_vllm.sh
-
-# 启动性能测试
-bash benchmarks/flexkv_benchmark/multiturn_benchmark.sh
-```
-在 vLLM 0.10.0 版本中应用patch `examples/vllm_adaption/flexkv_vllm_0_10_0.patch`，测试方法同上。
-
-> **注意**：当前脚本仅适配 `main` 分支。`dev` 分支的最新特性支持脚本正在开发中。
+见[docs/vllm_adapter/README_zh.md](docs/vllm_adapter/README_zh.md)
 
 ## 设计框架
 
@@ -88,6 +74,7 @@ FlexKV 在处理 *get* 请求时：
 - main 为稳定分支，维护已经测试过的commit。需要稳定的代码请从此分支拉取。
 - dev 为开发分支，维护较新特性。需要新特性和开发新特性请从此分支拉取和合入。
 - bugfix 为bug分支，维护需要立即解决的bug或需要立即更新的文档。需要解决bug和立即更新的文档请从此分支拉取和合入。
+- stable 为上一个版本的main分支位置，仅用于回滚以及极其保守的情况使用（如产品化）。不鼓励使用此版本。
 
 ## Roadmap
 
 
@@ -0,0 +1 @@
+1.0.0