taco-project · peaceforeverCN · Sep 1, 2025 · Aug 19, 2025 · Aug 20, 2025 · Aug 27, 2025
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
@@ -0,0 +1,73 @@
+# This workflow will upload a Python Package to Release asset
+# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions
+# Copied from vLLM github actions https://github.com/vllm-project/vllm/blob/main/.github/workflows/publish.yml
+name: flexkv ci
+
+on:
+  pull_request:
+    branches: [ "main", "dev"]
+  push:
+    branches: [ "main", "dev"]
+
+# Needed to create wheel and upload assets
+permissions:
+  contents: write
+
+jobs:
+  build:
+    name: Build Wheel
+    runs-on: ${{ matrix.os }}
+
+    strategy:
+      fail-fast: false
+      matrix:
+          os: ['ubuntu-22.04']
+          python-version: ['3.10']
+          pytorch-version: ['2.6.0']
+          cuda-version: ['12.4']
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Set up Linux Env
+        if: ${{ runner.os == 'Linux' }}
+        run: |
+          bash -x .github/workflows/scripts/env.sh
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+            python-version: ${{ matrix.python-version }}
+            cache: 'pip'
+
+      - name: Install CUDA ${{ matrix.cuda-version }}
+        run: |
+          bash -x .github/workflows/scripts/cuda-install.sh ${{ matrix.cuda-version }} ${{ matrix.os }}
+
+      - name: Install PyTorch ${{ matrix.pytorch-version }} with CUDA ${{ matrix.cuda-version }}
+        run: |
+          bash -x .github/workflows/scripts/pytorch-install.sh ${{ matrix.python-version }} ${{ matrix.pytorch-version }} ${{ matrix.cuda-version }}
+
+      - name: Build wheel
+        shell: bash
+        env:
+          TORCH_CUDA_ARCH_LIST: "8.9 9.0+PTX"
+          MAX_JOBS: 4
+        run: |
+          ./build.sh --release
+
+      - name: Get Date and Time
+        run: |
+          echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_ENV
+          echo "time=$(date +'%H-%M-%S')" >> $GITHUB_ENV
+
+      - name: Upload to cos
+        uses: shallwefootball/s3-upload-action@master
+        with:
+          aws_key_id: ${{ secrets.COS_SECRET_ID }}
+          aws_secret_access_key: ${{ secrets.COS_SECRET_KEY }}
+          aws_bucket: ${{ secrets.COS_BUCKET }}
+          endpoint: ${{ secrets.COS_ENDPOINT }}
+          source_dir: dist
+          destination_dir: flexkv/${{ env.date }}/${{ env.time }}
diff --git a/.github/workflows/scripts/cuda-install.sh b/.github/workflows/scripts/cuda-install.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+# Copied from vLLM github actions https://github.com/vllm-project/vllm/blob/main/.github/workflows/scripts/cuda-install.sh
+
+# Replace '.' with '-' ex: 11.8 -> 11-8
+cuda_version=$(echo "$1" | tr "." "-")
+# Removes '-' and '.' ex: ubuntu-20.04 -> ubuntu2004
+OS=$(echo "$2" | tr -d ".\-")
+
+# Installs CUDA
+wget -nv "https://developer.download.nvidia.com/compute/cuda/repos/${OS}/x86_64/cuda-keyring_1.1-1_all.deb"
+sudo dpkg -i cuda-keyring_1.1-1_all.deb
+rm cuda-keyring_1.1-1_all.deb
+sudo apt -qq update
+sudo apt -y install "cuda-${cuda_version}" "cuda-nvcc-${cuda_version}" "cuda-libraries-dev-${cuda_version}"
+sudo apt clean
+
+# Test nvcc
+PATH=/usr/local/cuda-$1/bin:${PATH}
+nvcc --version
+
+# Log gcc, g++, c++ versions
+gcc --version
+g++ --version
+c++ --version
diff --git a/.github/workflows/scripts/env.sh b/.github/workflows/scripts/env.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+# Copied from vLLM github actions https://github.com/vllm-project/vllm/blob/main/.github/workflows/scripts/env.sh
+
+# This file installs common linux environment tools
+
+export LANG=C.UTF-8
+
+sudo    apt-get update && \
+sudo    apt-get install -y --no-install-recommends \
+        software-properties-common
+
+sudo    apt-get install -y --no-install-recommends \
+        build-essential \
+        liburing-dev \
+        git \
+        cmake
+
+# Remove github bloat files to free up disk space
+sudo rm -rf "/usr/local/share/boost"
+sudo rm -rf "$AGENT_TOOLSDIRECTORY"
+sudo rm -rf "/usr/share/dotnet"
diff --git a/.github/workflows/scripts/pytorch-install.sh b/.github/workflows/scripts/pytorch-install.sh
@@ -0,0 +1,16 @@
+#!/bin/bash
+# Copied from vLLM github actions https://github.com/vllm-project/vllm/blob/main/.github/workflows/scripts/pytorch-install.sh
+
+python_executable=python$1
+pytorch_version=$2
+cuda_version=$3
+
+# Install torch
+$python_executable -m pip install numpy ninja cython wheel typing typing-extensions dataclasses setuptools && conda clean -ya
+$python_executable -m pip install torch=="${pytorch_version}+cu${cuda_version//./}" --extra-index-url "https://download.pytorch.org/whl/cu${cuda_version//./}"
+
+# Print version information
+$python_executable --version
+$python_executable -c "import torch; print('PyTorch:', torch.__version__)"
+$python_executable -c "import torch; print('CUDA:', torch.version.cuda)"
+$python_executable -c "from torch.utils import cpp_extension; print (cpp_extension.CUDA_HOME)"
diff --git a/.gitignore b/.gitignore
@@ -70,3 +70,6 @@ cover/
 
 # mypy
 .mypy_cache/
+
+# VSCode
+.vscode/
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -0,0 +1,30 @@
+# Changelog
+
+All notable changes to this project will be documented in this file.
+
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
+and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+
+## [Unreleased]
+
+## [1.0.0] - 2025-09-11
+
+### Added
+- C++ radix tree for fast match, need set "index_accel": true in cache_config
+- sync kernel launch
+- a huge change that move cache engine to a library for accelerator(vLLM e.g.) to use instead of server-client mode.
+  This accelerate the get and put when no KVCache is matched. This version includes breaking API changes and is not backward compatible. 
+- add evict_ratio, need set "evict_ratio": 0.05 in cache_config
+- reducing the bubble inner the launch kernel
+- add vLLM 0.10.1.1 adapter
+
+### Fixed
+- cython release package
+
+
+## [0.1.0] - 2025-08-29
+
+### Init
+- init version
+- add license
+
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -0,0 +1,13 @@
+# Contributing to FlexKV
+
+Thank you for your interest in contributing to FlexKV!
+
+## PR Title and Classification
+Use a prefixed PR title to indicate the type of changes. Please use one of the following:
+
+- `[bugfix]` for bugfixes  
+- `[feature]` for new features  
+- `[test]` for test cases  
+- `[ci/build]` for build or continuous integration improvements  
+- `[doc]` for documentation fixes  
+- `[misc]` for PRs that do not fit the above categories. Please use this sparingly.
diff --git a/README.md b/README.md
@@ -8,28 +8,27 @@ FlexKV is released under the **Apache-2.0 License**. See the [LICENSE](LICENSE)
 
 ## How to Use
 
-### Build FlexKV
+### Install Dependencies
 
 ```bash
-./build.sh
+apt install liburing-dev
+apt install libxxhash-dev 
 ```
 
-### Use FlexKV with vLLM (v0.8.4)
-
-Apply the patch `examples/vllm_adaption/flexkv_vllm_0_8_4.patch` to vLLM 0.8.4, then start FlexKV, vLLM, and the benchmark script:
+### Build FlexKV
 
 ```bash
-# Start FlexKV as server
-bash benchmarks/flexkv_benchmark/run_flexkv_server.sh
+./build.sh
+#./build.sh --release for cython package
+```
 
-# Start vLLM as client
-bash benchmarks/flexkv_benchmark/serving_vllm.sh
+### Use FlexKV with vLLM
 
-# Start benchmark
-bash benchmarks/flexkv_benchmark/multiturn_benchmark.sh
-```
+See [docs/vllm_adapter/README_en.md](docs/vllm_adapter/README_en.md)
+
+### FlexKV Integration with Dynamo
 
-> **Note**: The current script is only compatible with the `main` branch. Support for the latest features in the `dev` branch is under development.
+See [docs/dynamo_integration/README_en.md](docs/dynamo_integration/README_en.md)
 
 ## Design Architecture
 
@@ -84,8 +83,10 @@ FlexKV performs:
 - *put* requests can be called asynchronously; the time to copy data from GPU to CPU memory can overlap with subsequent computation. Data transfers between CPU memory, SSD, and scalable storage are fully handled asynchronously by the TransferEngine and transparent to the main process.
 
 ## Branch
-- main is the stable branch, maintaining commits that have been tested.
-- dev is the development branch, maintaining newer features.
+- The main branch is the stable branch, which maintains already tested commits. Please pull from main branch if you need stable code.
+- The dev branch is the development branch, which contains newer features. Please branch from and merge into dev if you need new features or are developing new functionality.
+- The bugfix branch is for bug fixes, maintaining urgent bugs that need immediate resolution or documentation that requires prompt updates. If you need to fix a bug or update documentation urgently, please branch from and merge into the bugfix branch.
+- The stable branch refers to the previous main branch state, intended only for rollback or extremely conservative use cases (e.g., production deployment). Its use is discouraged.
 
 ## Roadmap
 

diff --git a/README_zh.md b/README_zh.md
@@ -8,28 +8,27 @@ FlexKV 采用 **Apache-2.0 开源协议**，详细信息请参见 [LICENSE](LICE
 
 ## 如何使用
 
+### 安装依赖
+
+```bash
+apt install liburing-dev
+apt install libxxhash-dev 
+```
+
 ### 编译 FlexKV
 
 ```bash
 ./build.sh
+#./build.sh --release for cython package
 ```
 
 ### 以 vLLM 为例使用 FlexKV
 
-在 vLLM 0.8.4 版本中应用patch `examples/vllm_adaption/flexkv_vllm_0_8_4.patch`，分别启动 FlexKV、vLLM 和测试脚本：
+见[docs/vllm_adapter/README_zh.md](docs/vllm_adapter/README_zh.md)
 
-```bash
-# 启动 FlexKV 作为服务端
-bash benchmarks/flexkv_benchmark/run_flexkv_server.sh
-
-# 启动 vLLM 作为客户端
-bash benchmarks/flexkv_benchmark/serving_vllm.sh
-
-# 启动性能测试
-bash benchmarks/flexkv_benchmark/multiturn_benchmark.sh
-```
+### FlexKV和Dynamo框架的集成
 
-> **注意**：当前脚本仅适配 `main` 分支。`dev` 分支的最新特性支持脚本正在开发中。
+见[docs/dynamo_integration/README_zh.md](docs/dynamo_integration/README_zh.md)
 
 ## 设计框架
 
@@ -84,8 +83,10 @@ FlexKV 在处理 *get* 请求时：
 - *put*请求可以异步调用，从GPU copy到内存的时间可以与之后的计算重合。内存与SSD以及扩展存储间的传输则完全由TransferEngine之后执行，主进程不感知。
 
 ## Branch
-- main 为稳定分支，维护已经测试过的commit。
-- dev 为开发分支，维护较新特性。
+- main 为稳定分支，维护已经测试过的commit。需要稳定的代码请从此分支拉取。
+- dev 为开发分支，维护较新特性。需要新特性和开发新特性请从此分支拉取和合入。
+- bugfix 为bug分支，维护需要立即解决的bug或需要立即更新的文档。需要解决bug和立即更新的文档请从此分支拉取和合入。
+- stable 为上一个版本的main分支位置，仅用于回滚以及极其保守的情况使用（如产品化）。不鼓励使用此版本。
 
 ## Roadmap
 

diff --git a/VERSION b/VERSION
@@ -0,0 +1 @@
+1.0.0