Skip to content

Commit 7addca5

Browse files
authored
Specify python package dependencies in requirements.txt (#78)
1 parent c84e924 commit 7addca5

File tree

3 files changed

+57
-36
lines changed

3 files changed

+57
-36
lines changed

README.md

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
# CacheFlow
22

3-
## Installation
3+
## Build from source
44

55
```bash
6-
pip install ninja psutil numpy sentencepiece ray torch transformers xformers
7-
pip install -e .
6+
pip install -r requirements.txt
7+
pip install -e . # This may take several minutes.
88
```
99

1010
## Test simple server
@@ -21,11 +21,6 @@ python simple_server.py --help
2121

2222
## FastAPI server
2323

24-
Install the following additional dependencies:
25-
```bash
26-
pip install fastapi uvicorn
27-
```
28-
2924
To start the server:
3025
```bash
3126
ray start --head

requirements.txt

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
ninja # For faster builds.
2+
psutil
3+
ray
4+
sentencepiece # Required for LLaMA tokenizer.
5+
numpy
6+
torch >= 2.0.0
7+
transformers >= 4.28.0 # Required for LLaMA.
8+
xformers >= 0.0.19
9+
fastapi
10+
uvicorn

setup.py

Lines changed: 44 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,67 +1,83 @@
1+
from typing import List
2+
13
import setuptools
24
import torch
3-
from torch.utils import cpp_extension
5+
from torch.utils.cpp_extension import BuildExtension, CUDAExtension
6+
from torch.utils.cpp_extension import CUDA_HOME
7+
48

5-
CXX_FLAGS = ['-g']
6-
NVCC_FLAGS = ['-O2']
9+
# Build custom operators.
10+
CXX_FLAGS = ["-g"]
11+
# TODO(woosuk): Should we use -O3?
12+
NVCC_FLAGS = ["-O2"]
713

814
if not torch.cuda.is_available():
915
raise RuntimeError(
10-
f'Cannot find CUDA at CUDA_HOME: {cpp_extension.CUDA_HOME}. '
11-
'CUDA must be available in order to build the package.')
16+
f"Cannot find CUDA at CUDA_HOME: {CUDA_HOME}. "
17+
"CUDA must be available in order to build the package.")
1218

1319
# FIXME(woosuk): Consider the case where the machine has multiple GPUs with
1420
# different compute capabilities.
1521
compute_capability = torch.cuda.get_device_capability()
1622
major, minor = compute_capability
1723
# Enable bfloat16 support if the compute capability is >= 8.0.
1824
if major >= 8:
19-
NVCC_FLAGS.append('-DENABLE_BF16')
25+
NVCC_FLAGS.append("-DENABLE_BF16")
2026

2127
ext_modules = []
2228

2329
# Cache operations.
24-
cache_extension = cpp_extension.CUDAExtension(
25-
name='cacheflow.cache_ops',
26-
sources=['csrc/cache.cpp', 'csrc/cache_kernels.cu'],
27-
extra_compile_args={'cxx': CXX_FLAGS, 'nvcc': NVCC_FLAGS},
30+
cache_extension = CUDAExtension(
31+
name="cacheflow.cache_ops",
32+
sources=["csrc/cache.cpp", "csrc/cache_kernels.cu"],
33+
extra_compile_args={"cxx": CXX_FLAGS, "nvcc": NVCC_FLAGS},
2834
)
2935
ext_modules.append(cache_extension)
3036

3137
# Attention kernels.
32-
attention_extension = cpp_extension.CUDAExtension(
33-
name='cacheflow.attention_ops',
34-
sources=['csrc/attention.cpp', 'csrc/attention/attention_kernels.cu'],
35-
extra_compile_args={'cxx': CXX_FLAGS, 'nvcc': NVCC_FLAGS},
38+
attention_extension = CUDAExtension(
39+
name="cacheflow.attention_ops",
40+
sources=["csrc/attention.cpp", "csrc/attention/attention_kernels.cu"],
41+
extra_compile_args={"cxx": CXX_FLAGS, "nvcc": NVCC_FLAGS},
3642
)
3743
ext_modules.append(attention_extension)
3844

3945
# Positional encoding kernels.
40-
positional_encoding_extension = cpp_extension.CUDAExtension(
41-
name='cacheflow.pos_encoding_ops',
42-
sources=['csrc/pos_encoding.cpp', 'csrc/pos_encoding_kernels.cu'],
43-
extra_compile_args={'cxx': CXX_FLAGS, 'nvcc': NVCC_FLAGS},
46+
positional_encoding_extension = CUDAExtension(
47+
name="cacheflow.pos_encoding_ops",
48+
sources=["csrc/pos_encoding.cpp", "csrc/pos_encoding_kernels.cu"],
49+
extra_compile_args={"cxx": CXX_FLAGS, "nvcc": NVCC_FLAGS},
4450
)
4551
ext_modules.append(positional_encoding_extension)
4652

4753
# Layer normalization kernels.
48-
layernorm_extension = cpp_extension.CUDAExtension(
49-
name='cacheflow.layernorm_ops',
50-
sources=['csrc/layernorm.cpp', 'csrc/layernorm_kernels.cu'],
51-
extra_compile_args={'cxx': CXX_FLAGS, 'nvcc': NVCC_FLAGS},
54+
layernorm_extension = CUDAExtension(
55+
name="cacheflow.layernorm_ops",
56+
sources=["csrc/layernorm.cpp", "csrc/layernorm_kernels.cu"],
57+
extra_compile_args={"cxx": CXX_FLAGS, "nvcc": NVCC_FLAGS},
5258
)
5359
ext_modules.append(layernorm_extension)
5460

5561
# Activation kernels.
56-
activation_extension = cpp_extension.CUDAExtension(
57-
name='cacheflow.activation_ops',
58-
sources=['csrc/activation.cpp', 'csrc/activation_kernels.cu'],
59-
extra_compile_args={'cxx': CXX_FLAGS, 'nvcc': NVCC_FLAGS},
62+
activation_extension = CUDAExtension(
63+
name="cacheflow.activation_ops",
64+
sources=["csrc/activation.cpp", "csrc/activation_kernels.cu"],
65+
extra_compile_args={"cxx": CXX_FLAGS, "nvcc": NVCC_FLAGS},
6066
)
6167
ext_modules.append(activation_extension)
6268

69+
70+
def get_requirements() -> List[str]:
71+
"""Get Python package dependencies from requirements.txt."""
72+
with open("requirements.txt") as f:
73+
requirements = f.read().strip().split("\n")
74+
return requirements
75+
76+
6377
setuptools.setup(
64-
name='cacheflow',
78+
name="cacheflow",
79+
python_requires=">=3.8",
80+
install_requires=get_requirements(),
6581
ext_modules=ext_modules,
66-
cmdclass={'build_ext': cpp_extension.BuildExtension},
82+
cmdclass={"build_ext": BuildExtension},
6783
)

0 commit comments

Comments
 (0)