Skip to content

Commit 83798ef

Browse files
oobaboogaPoetOnTheRun
authored andcommitted
Bump llama-cpp-python, remove python 3.8/3.9, cuda 11.7 (oobabooga#5397)
1 parent 2a312c8 commit 83798ef

10 files changed

+60
-162
lines changed

Colab-TextGen-GPU.ipynb

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -65,12 +65,9 @@
6565
" torver = torch.__version__\n",
6666
" print(f\"TORCH: {torver}\")\n",
6767
" is_cuda118 = '+cu118' in torver # 2.1.0+cu118\n",
68-
" is_cuda117 = '+cu117' in torver # 2.0.1+cu117\n",
6968
"\n",
7069
" textgen_requirements = open('requirements.txt').read().splitlines()\n",
71-
" if is_cuda117:\n",
72-
" textgen_requirements = [req.replace('+cu121', '+cu117').replace('+cu122', '+cu117').replace('torch2.1', 'torch2.0') for req in textgen_requirements]\n",
73-
" elif is_cuda118:\n",
70+
" if is_cuda118:\n",
7471
" textgen_requirements = [req.replace('+cu121', '+cu118').replace('+cu122', '+cu118') for req in textgen_requirements]\n",
7572
" with open('temp_requirements.txt', 'w') as file:\n",
7673
" file.write('\\n'.join(textgen_requirements))\n",
@@ -130,4 +127,4 @@
130127
"outputs": []
131128
}
132129
]
133-
}
130+
}

one_click.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -299,7 +299,6 @@ def update_requirements(initial_installation=False):
299299
torver = torch_version()
300300
is_cuda = '+cu' in torver
301301
is_cuda118 = '+cu118' in torver # 2.1.0+cu118
302-
is_cuda117 = '+cu117' in torver # 2.0.1+cu117
303302
is_rocm = '+rocm' in torver # 2.0.1+rocm5.4.2
304303
is_intel = '+cxx11' in torver # 2.0.1a0+cxx11.abi
305304
is_cpu = '+cpu' in torver # 2.0.1+cpu
@@ -320,11 +319,9 @@ def update_requirements(initial_installation=False):
320319

321320
# Prepare the requirements file
322321
textgen_requirements = open(requirements_file).read().splitlines()
323-
if is_cuda117:
324-
textgen_requirements = [req.replace('+cu121', '+cu117').replace('+cu122', '+cu117').replace('torch2.1', 'torch2.0') for req in textgen_requirements]
325-
elif is_cuda118:
322+
if is_cuda118:
326323
textgen_requirements = [req.replace('+cu121', '+cu118').replace('+cu122', '+cu118') for req in textgen_requirements]
327-
if is_windows() and (is_cuda117 or is_cuda118): # No flash-attention on Windows for CUDA 11
324+
if is_windows() and is_cuda118: # No flash-attention on Windows for CUDA 11
328325
textgen_requirements = [req for req in textgen_requirements if 'jllllll/flash-attention' not in req]
329326

330327
with open('temp_requirements.txt', 'w') as file:

requirements.txt

Lines changed: 12 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -29,67 +29,39 @@ bitsandbytes==0.41.1; platform_system != "Windows"
2929
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"
3030

3131
# llama-cpp-python (CPU only, AVX2)
32-
https://github.com/oobabooga/llama-cpp-wheels/releases/download/cpu/llama_cpp_python-0.2.31+cpuavx2-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
33-
https://github.com/oobabooga/llama-cpp-wheels/releases/download/cpu/llama_cpp_python-0.2.31+cpuavx2-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
34-
https://github.com/oobabooga/llama-cpp-wheels/releases/download/cpu/llama_cpp_python-0.2.31+cpuavx2-cp39-cp39-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
35-
https://github.com/oobabooga/llama-cpp-wheels/releases/download/cpu/llama_cpp_python-0.2.31+cpuavx2-cp38-cp38-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
36-
https://github.com/oobabooga/llama-cpp-wheels/releases/download/cpu/llama_cpp_python-0.2.31+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
37-
https://github.com/oobabooga/llama-cpp-wheels/releases/download/cpu/llama_cpp_python-0.2.31+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
38-
https://github.com/oobabooga/llama-cpp-wheels/releases/download/cpu/llama_cpp_python-0.2.31+cpuavx2-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
39-
https://github.com/oobabooga/llama-cpp-wheels/releases/download/cpu/llama_cpp_python-0.2.31+cpuavx2-cp38-cp38-win_amd64.whl; platform_system == "Windows" and python_version == "3.8"
32+
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.36+cpuavx2-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
33+
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.36+cpuavx2-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
34+
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.36+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
35+
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.36+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
4036

4137
# llama-cpp-python (CUDA, no tensor cores)
42-
https://github.com/oobabooga/llama-cpp-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.31+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
43-
https://github.com/oobabooga/llama-cpp-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.31+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
44-
https://github.com/oobabooga/llama-cpp-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.31+cu121-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
45-
https://github.com/oobabooga/llama-cpp-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.31+cu121-cp38-cp38-win_amd64.whl; platform_system == "Windows" and python_version == "3.8"
46-
https://github.com/oobabooga/llama-cpp-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.31+cu121-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
47-
https://github.com/oobabooga/llama-cpp-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.31+cu121-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
48-
https://github.com/oobabooga/llama-cpp-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.31+cu121-cp39-cp39-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
49-
https://github.com/oobabooga/llama-cpp-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.31+cu121-cp38-cp38-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
38+
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.36+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
39+
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.36+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
40+
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.36+cu121-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
41+
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.36+cu121-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
5042

5143
# llama-cpp-python (CUDA, tensor cores)
52-
https://github.com/oobabooga/llama-cpp-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.31+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
53-
https://github.com/oobabooga/llama-cpp-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.31+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
54-
https://github.com/oobabooga/llama-cpp-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.31+cu121-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
55-
https://github.com/oobabooga/llama-cpp-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.31+cu121-cp38-cp38-win_amd64.whl; platform_system == "Windows" and python_version == "3.8"
56-
https://github.com/oobabooga/llama-cpp-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.31+cu121-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
57-
https://github.com/oobabooga/llama-cpp-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.31+cu121-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
58-
https://github.com/oobabooga/llama-cpp-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.31+cu121-cp39-cp39-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
59-
https://github.com/oobabooga/llama-cpp-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.31+cu121-cp38-cp38-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
44+
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.36+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
45+
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.36+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
46+
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.36+cu121-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
47+
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.36+cu121-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
6048

6149
# CUDA wheels
6250
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
6351
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
64-
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
65-
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp38-cp38-win_amd64.whl; platform_system == "Windows" and python_version == "3.8"
6652
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
6753
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
68-
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp39-cp39-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
69-
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp38-cp38-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
7054
https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
7155
https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
72-
https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+cu121-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
73-
https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+cu121-cp38-cp38-win_amd64.whl; platform_system == "Windows" and python_version == "3.8"
7456
https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
7557
https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
76-
https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+cu121-cp39-cp39-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
77-
https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+cu121-cp38-cp38-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
7858
https://github.com/jllllll/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu121torch2.1cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
7959
https://github.com/jllllll/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu121torch2.1cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
80-
https://github.com/jllllll/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu121torch2.1cxx11abiFALSE-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
81-
https://github.com/jllllll/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu121torch2.1cxx11abiFALSE-cp38-cp38-win_amd64.whl; platform_system == "Windows" and python_version == "3.8"
8260
https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu122torch2.1cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
8361
https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu122torch2.1cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
84-
https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu122torch2.1cxx11abiFALSE-cp39-cp39-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
85-
https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu122torch2.1cxx11abiFALSE-cp38-cp38-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
8662
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
8763
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
88-
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu121-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
89-
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu121-cp38-cp38-win_amd64.whl; platform_system == "Windows" and python_version == "3.8"
9064
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
9165
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
92-
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu121-cp39-cp39-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
93-
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu121-cp38-cp38-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
9466
https://github.com/jllllll/ctransformers-cuBLAS-wheels/releases/download/AVX2/ctransformers-0.2.27+cu121-py3-none-any.whl
9567
autoawq==0.1.8; platform_system == "Linux" or platform_system == "Windows"

0 commit comments

Comments
 (0)