|
| 1 | +from typing import List |
| 2 | + |
1 | 3 | import setuptools |
2 | 4 | import torch |
3 | | -from torch.utils import cpp_extension |
| 5 | +from torch.utils.cpp_extension import BuildExtension, CUDAExtension |
| 6 | +from torch.utils.cpp_extension import CUDA_HOME |
| 7 | + |
4 | 8 |
|
5 | | -CXX_FLAGS = ['-g'] |
6 | | -NVCC_FLAGS = ['-O2'] |
| 9 | +# Build custom operators. |
| 10 | +CXX_FLAGS = ["-g"] |
| 11 | +# TODO(woosuk): Should we use -O3? |
| 12 | +NVCC_FLAGS = ["-O2"] |
7 | 13 |
|
8 | 14 | if not torch.cuda.is_available(): |
9 | 15 | raise RuntimeError( |
10 | | - f'Cannot find CUDA at CUDA_HOME: {cpp_extension.CUDA_HOME}. ' |
11 | | - 'CUDA must be available in order to build the package.') |
| 16 | + f"Cannot find CUDA at CUDA_HOME: {CUDA_HOME}. " |
| 17 | + "CUDA must be available in order to build the package.") |
12 | 18 |
|
13 | 19 | # FIXME(woosuk): Consider the case where the machine has multiple GPUs with |
14 | 20 | # different compute capabilities. |
15 | 21 | compute_capability = torch.cuda.get_device_capability() |
16 | 22 | major, minor = compute_capability |
17 | 23 | # Enable bfloat16 support if the compute capability is >= 8.0. |
18 | 24 | if major >= 8: |
19 | | - NVCC_FLAGS.append('-DENABLE_BF16') |
| 25 | + NVCC_FLAGS.append("-DENABLE_BF16") |
20 | 26 |
|
21 | 27 | ext_modules = [] |
22 | 28 |
|
23 | 29 | # Cache operations. |
24 | | -cache_extension = cpp_extension.CUDAExtension( |
25 | | - name='cacheflow.cache_ops', |
26 | | - sources=['csrc/cache.cpp', 'csrc/cache_kernels.cu'], |
27 | | - extra_compile_args={'cxx': CXX_FLAGS, 'nvcc': NVCC_FLAGS}, |
| 30 | +cache_extension = CUDAExtension( |
| 31 | + name="cacheflow.cache_ops", |
| 32 | + sources=["csrc/cache.cpp", "csrc/cache_kernels.cu"], |
| 33 | + extra_compile_args={"cxx": CXX_FLAGS, "nvcc": NVCC_FLAGS}, |
28 | 34 | ) |
29 | 35 | ext_modules.append(cache_extension) |
30 | 36 |
|
31 | 37 | # Attention kernels. |
32 | | -attention_extension = cpp_extension.CUDAExtension( |
33 | | - name='cacheflow.attention_ops', |
34 | | - sources=['csrc/attention.cpp', 'csrc/attention/attention_kernels.cu'], |
35 | | - extra_compile_args={'cxx': CXX_FLAGS, 'nvcc': NVCC_FLAGS}, |
| 38 | +attention_extension = CUDAExtension( |
| 39 | + name="cacheflow.attention_ops", |
| 40 | + sources=["csrc/attention.cpp", "csrc/attention/attention_kernels.cu"], |
| 41 | + extra_compile_args={"cxx": CXX_FLAGS, "nvcc": NVCC_FLAGS}, |
36 | 42 | ) |
37 | 43 | ext_modules.append(attention_extension) |
38 | 44 |
|
39 | 45 | # Positional encoding kernels. |
40 | | -positional_encoding_extension = cpp_extension.CUDAExtension( |
41 | | - name='cacheflow.pos_encoding_ops', |
42 | | - sources=['csrc/pos_encoding.cpp', 'csrc/pos_encoding_kernels.cu'], |
43 | | - extra_compile_args={'cxx': CXX_FLAGS, 'nvcc': NVCC_FLAGS}, |
| 46 | +positional_encoding_extension = CUDAExtension( |
| 47 | + name="cacheflow.pos_encoding_ops", |
| 48 | + sources=["csrc/pos_encoding.cpp", "csrc/pos_encoding_kernels.cu"], |
| 49 | + extra_compile_args={"cxx": CXX_FLAGS, "nvcc": NVCC_FLAGS}, |
44 | 50 | ) |
45 | 51 | ext_modules.append(positional_encoding_extension) |
46 | 52 |
|
47 | 53 | # Layer normalization kernels. |
48 | | -layernorm_extension = cpp_extension.CUDAExtension( |
49 | | - name='cacheflow.layernorm_ops', |
50 | | - sources=['csrc/layernorm.cpp', 'csrc/layernorm_kernels.cu'], |
51 | | - extra_compile_args={'cxx': CXX_FLAGS, 'nvcc': NVCC_FLAGS}, |
| 54 | +layernorm_extension = CUDAExtension( |
| 55 | + name="cacheflow.layernorm_ops", |
| 56 | + sources=["csrc/layernorm.cpp", "csrc/layernorm_kernels.cu"], |
| 57 | + extra_compile_args={"cxx": CXX_FLAGS, "nvcc": NVCC_FLAGS}, |
52 | 58 | ) |
53 | 59 | ext_modules.append(layernorm_extension) |
54 | 60 |
|
55 | 61 | # Activation kernels. |
56 | | -activation_extension = cpp_extension.CUDAExtension( |
57 | | - name='cacheflow.activation_ops', |
58 | | - sources=['csrc/activation.cpp', 'csrc/activation_kernels.cu'], |
59 | | - extra_compile_args={'cxx': CXX_FLAGS, 'nvcc': NVCC_FLAGS}, |
| 62 | +activation_extension = CUDAExtension( |
| 63 | + name="cacheflow.activation_ops", |
| 64 | + sources=["csrc/activation.cpp", "csrc/activation_kernels.cu"], |
| 65 | + extra_compile_args={"cxx": CXX_FLAGS, "nvcc": NVCC_FLAGS}, |
60 | 66 | ) |
61 | 67 | ext_modules.append(activation_extension) |
62 | 68 |
|
| 69 | + |
| 70 | +def get_requirements() -> List[str]: |
| 71 | + """Get Python package dependencies from requirements.txt.""" |
| 72 | + with open("requirements.txt") as f: |
| 73 | + requirements = f.read().strip().split("\n") |
| 74 | + return requirements |
| 75 | + |
| 76 | + |
63 | 77 | setuptools.setup( |
64 | | - name='cacheflow', |
| 78 | + name="cacheflow", |
| 79 | + python_requires=">=3.8", |
| 80 | + install_requires=get_requirements(), |
65 | 81 | ext_modules=ext_modules, |
66 | | - cmdclass={'build_ext': cpp_extension.BuildExtension}, |
| 82 | + cmdclass={"build_ext": BuildExtension}, |
67 | 83 | ) |
0 commit comments