Skip to content

Commit 933eee9

Browse files
authored
Merge pull request #43 from maysunfaisal/new-models-and-servers-1
granite-3.1-8b-instruct, mistral-7b-instruct-v0.2, vllm 0.8.4, llamacpp_python 0.3.8
2 parents ad6422a + 1886b96 commit 933eee9

File tree

29 files changed

+3148
-10
lines changed

29 files changed

+3148
-10
lines changed

.github/workflows/upload-image.yml

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -37,14 +37,24 @@ jobs:
3737
for dir in ${ALL_CHANGED_FILES}; do
3838
echo "Change detected in $dir ..."
3939
prevdir=$(dirname $dir)
40-
if [ $prevdir == "model-servers/vllm" ]; then
41-
echo "Skipping uploading of vllm model server changes! Please perform a manual upload ..."
40+
skip_model_server=false
41+
model_server_set=("model-servers/vllm" "model-servers/llamacpp_python")
42+
for model_server in "${model_server_set[@]}"; do
43+
if [[ "$prevdir" == *"$model_server"* ]]; then
44+
echo "Skipping $dir due to size constraints. Please perform a manual upload ..."
45+
skip_model_server=true
46+
break
47+
fi
48+
done
49+
if [ "$skip_model_server" = true ]; then
4250
continue
4351
fi
44-
if [ $dir == "models/detr-resnet-101" ]; then
45-
echo "Skipping detr-resnet-101 due to size constraints. Please perform a manual upload ..."
46-
continue
52+
model_set=("models/detr-resnet-101" "models/granite-3.1-8b-instruct-gguf" "models/mistral-7b-instruct-v0.2")
53+
if printf "%s\n" "${model_set[@]}" | grep -q -x "$dir"; then
54+
echo "Skipping $dir due to size constraints. Please perform a manual upload ..."
55+
continue
4756
fi
57+
4858
cd $dir
4959
if [ ! -f config.env ]; then
5060
echo "No config.env file present in changed directory, skipping ..."

.github/workflows/validate-build.yml

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -30,13 +30,22 @@ jobs:
3030
for dir in ${ALL_CHANGED_FILES}; do
3131
echo "Change detected in $dir ..."
3232
prevdir=$(dirname $dir)
33-
if [ $prevdir == "model-servers/vllm" ]; then
34-
echo "Skipping validation of vllm model server changes! Please perform a manual validation ..."
33+
skip_model_server=false
34+
model_server_set=("model-servers/vllm" "model-servers/llamacpp_python")
35+
for model_server in "${model_server_set[@]}"; do
36+
if [[ "$prevdir" == *"$model_server"* ]]; then
37+
echo "Skipping $dir due to size constraints. Please perform a manual validation ..."
38+
skip_model_server=true
39+
break
40+
fi
41+
done
42+
if [ "$skip_model_server" = true ]; then
3543
continue
3644
fi
37-
if [ $dir == "models/detr-resnet-101" ]; then
38-
echo "Skipping detr-resnet-101 due to size constraints. Please perform a manual validation ..."
39-
continue
45+
model_set=("models/detr-resnet-101" "models/granite-3.1-8b-instruct-gguf" "models/mistral-7b-instruct-v0.2")
46+
if printf "%s\n" "${model_set[@]}" | grep -q -x "$dir"; then
47+
echo "Skipping $dir due to size constraints. Please perform a manual validation ..."
48+
continue
4049
fi
4150
cd $dir
4251
if [ ! -f config.env ] && ([ -f Dockerfile ] || [ -f Containerfile ]); then
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
config.env
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
FROM registry.access.redhat.com/ubi9/python-311:1-77.1726664316
2+
WORKDIR /locallm
3+
COPY src .
4+
USER root
5+
RUN dnf install -y gcc-toolset-13-gcc gcc-toolset-13-gcc-c++
6+
USER 1001
7+
RUN CC="/opt/rh/gcc-toolset-13/root/usr/bin/gcc" CXX="/opt/rh/gcc-toolset-13/root/usr/bin/g++" pip install --no-cache-dir --verbose -r ./requirements.txt
8+
EXPOSE 8001
9+
ENTRYPOINT [ "sh", "./run.sh" ]
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# llama-cpp-python model server
2+
3+
The llama-cpp-python model server is adapted from [ai-lab-recipes](https://github.com/containers/ai-lab-recipes/tree/main/model_servers/llamacpp_python).
4+
5+
## Build Model Image
6+
7+
To build the llama-cpp-python model server from this directory:
8+
9+
```bash
10+
podman build -t quay.io/redhat-ai-dev/llamacpp_python:0.3.8 .
11+
```
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
IMAGE_NAME=quay.io/redhat-ai-dev/llamacpp_python
2+
IMAGE_TAG=0.3.8
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
llama-cpp-python[server]==0.3.8
2+
transformers==4.41.2
3+
pip==24.0
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
#!/bin/bash
2+
if [ ${CONFIG_PATH} ] || [[ ${MODEL_PATH} && ${CONFIG_PATH} ]]; then
3+
python -m llama_cpp.server --config_file ${CONFIG_PATH}
4+
exit 0
5+
fi
6+
7+
if [ "${MODEL_HF_PRETRAINED_MODEL}" == "None" ]; then
8+
MODEL_HF_PRETRAINED_MODEL=""
9+
fi
10+
11+
if [ ${MODEL_PATH} ]; then
12+
python -m llama_cpp.server \
13+
--model ${MODEL_PATH} \
14+
--host ${HOST:=0.0.0.0} \
15+
--port ${PORT:=8001} \
16+
--n_gpu_layers ${GPU_LAYERS:=0} \
17+
--clip_model_path ${CLIP_MODEL_PATH:=None} \
18+
--chat_format ${MODEL_CHAT_FORMAT:=llama-2} \
19+
${PRETRAINED_MODEL_PATH:=} \
20+
${MODEL_HF_PRETRAINED_MODEL:+--hf_pretrained_model_name_or_path ${MODEL_HF_PRETRAINED_MODEL}} \
21+
--interrupt_requests ${INTERRUPT_REQUESTS:=False}
22+
exit 0
23+
fi
24+
25+
echo "Please set either a CONFIG_PATH or a MODEL_PATH"
26+
exit 1
27+
Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
FROM registry.access.redhat.com/ubi9/python-311 as cuda-runtime
2+
3+
###################################################################################################
4+
# CUDA 12.1 Layer, from https://gitlab.com/nvidia/container-images/cuda/-/blob/master/dist/12.1.1 #
5+
###################################################################################################
6+
7+
# Base
8+
USER 0
9+
10+
ENV NVARCH x86_64
11+
ENV NVIDIA_REQUIRE_CUDA "cuda>=12.1 brand=tesla,driver>=470,driver<471 brand=unknown,driver>=470,driver<471 brand=nvidia,driver>=470,driver<471 brand=nvidiartx,driver>=470,driver<471 brand=geforce,driver>=470,driver<471 brand=geforcertx,driver>=470,driver<471 brand=quadro,driver>=470,driver<471 brand=quadrortx,driver>=470,driver<471 brand=titan,driver>=470,driver<471 brand=titanrtx,driver>=470,driver<471 brand=tesla,driver>=525,driver<526 brand=unknown,driver>=525,driver<526 brand=nvidia,driver>=525,driver<526 brand=nvidiartx,driver>=525,driver<526 brand=geforce,driver>=525,driver<526 brand=geforcertx,driver>=525,driver<526 brand=quadro,driver>=525,driver<526 brand=quadrortx,driver>=525,driver<526 brand=titan,driver>=525,driver<526 brand=titanrtx,driver>=525,driver<526"
12+
ENV NV_CUDA_CUDART_VERSION 12.1.105-1
13+
14+
COPY cuda.repo-x86_64 /etc/yum.repos.d/cuda.repo
15+
16+
RUN NVIDIA_GPGKEY_SUM=d0664fbbdb8c32356d45de36c5984617217b2d0bef41b93ccecd326ba3b80c87 && \
17+
curl -fsSL https://developer.download.nvidia.com/compute/cuda/repos/rhel9/${NVARCH}/D42D0685.pub | sed '/^Version/d' > /etc/pki/rpm-gpg/RPM-GPG-KEY-NVIDIA && \
18+
echo "$NVIDIA_GPGKEY_SUM /etc/pki/rpm-gpg/RPM-GPG-KEY-NVIDIA" | sha256sum -c --strict -
19+
20+
ENV CUDA_VERSION 12.1.1
21+
22+
# For libraries in the cuda-compat-* package: https://docs.nvidia.com/cuda/eula/index.html#attachment-a
23+
RUN yum upgrade -y && yum install -y \
24+
cuda-cudart-12-1-${NV_CUDA_CUDART_VERSION} \
25+
cuda-compat-12-1 \
26+
&& ln -s cuda-12.1 /usr/local/cuda \
27+
&& yum -y clean all --enablerepo='*' && \
28+
rm -rf /var/cache/dnf && \
29+
find /var/log -type f -name "*.log" -exec rm -f {} \;
30+
31+
# nvidia-docker 1.0
32+
RUN echo "/usr/local/nvidia/lib" >> /etc/ld.so.conf.d/nvidia.conf && \
33+
echo "/usr/local/nvidia/lib64" >> /etc/ld.so.conf.d/nvidia.conf
34+
35+
ENV PATH /usr/local/nvidia/bin:/usr/local/cuda/bin:${PATH}
36+
ENV LD_LIBRARY_PATH /usr/local/nvidia/lib:/usr/local/nvidia/lib64
37+
38+
COPY NGC-DL-CONTAINER-LICENSE /
39+
40+
# nvidia-container-runtime
41+
ENV NVIDIA_VISIBLE_DEVICES all
42+
ENV NVIDIA_DRIVER_CAPABILITIES compute,utility
43+
44+
# Runtime
45+
ENV NV_CUDA_LIB_VERSION 12.1.1-1
46+
47+
ENV NV_NVTX_VERSION 12.1.105-1
48+
ENV NV_LIBNPP_VERSION 12.1.0.40-1
49+
ENV NV_LIBNPP_PACKAGE libnpp-12-1-${NV_LIBNPP_VERSION}
50+
ENV NV_LIBCUBLAS_VERSION 12.1.3.1-1
51+
ENV NV_LIBNCCL_PACKAGE_NAME libnccl
52+
ENV NV_LIBNCCL_PACKAGE_VERSION 2.17.1-1
53+
ENV NV_LIBNCCL_VERSION 2.17.1
54+
ENV NCCL_VERSION 2.17.1
55+
ENV NV_LIBNCCL_PACKAGE ${NV_LIBNCCL_PACKAGE_NAME}-${NV_LIBNCCL_PACKAGE_VERSION}+cuda12.1
56+
57+
RUN yum install -y \
58+
cuda-libraries-12-1-${NV_CUDA_LIB_VERSION} \
59+
cuda-nvtx-12-1-${NV_NVTX_VERSION} \
60+
${NV_LIBNPP_PACKAGE} \
61+
libcublas-12-1-${NV_LIBCUBLAS_VERSION} \
62+
${NV_LIBNCCL_PACKAGE} \
63+
&& yum clean all \
64+
&& rm -rf /var/cache/yum/*
65+
66+
# Set this flag so that libraries can find the location of CUDA
67+
ENV XLA_FLAGS=--xla_gpu_cuda_data_dir=/usr/local/cuda
68+
69+
# CUDA Devel image
70+
FROM cuda-runtime as cuda-devel
71+
ENV NVIDIA_VISIBLE_DEVICES all
72+
ENV NVIDIA_DRIVER_CAPABILITIES compute,utility
73+
74+
ENV NV_CUDA_LIB_VERSION 12.1.1-1
75+
ENV NV_NVPROF_VERSION 12.1.105-1
76+
ENV NV_NVPROF_DEV_PACKAGE cuda-nvprof-12-1-${NV_NVPROF_VERSION}
77+
ENV NV_CUDA_CUDART_DEV_VERSION 12.1.105-1
78+
ENV NV_NVML_DEV_VERSION 12.1.105-1
79+
ENV NV_LIBCUBLAS_DEV_VERSION 12.1.3.1-1
80+
ENV NV_LIBNPP_DEV_VERSION 12.1.0.40-1
81+
ENV NV_LIBNPP_DEV_PACKAGE libnpp-devel-12-1-${NV_LIBNPP_DEV_VERSION}
82+
ENV NV_LIBNCCL_DEV_PACKAGE_NAME libnccl-devel
83+
ENV NV_LIBNCCL_DEV_PACKAGE_VERSION 2.17.1-1
84+
ENV NCCL_VERSION 2.17.1
85+
ENV NV_LIBNCCL_DEV_PACKAGE ${NV_LIBNCCL_DEV_PACKAGE_NAME}-${NV_LIBNCCL_DEV_PACKAGE_VERSION}+cuda12.1
86+
ENV NV_CUDA_NSIGHT_COMPUTE_VERSION 12.1.1-1
87+
ENV NV_CUDA_NSIGHT_COMPUTE_DEV_PACKAGE cuda-nsight-compute-12-1-${NV_CUDA_NSIGHT_COMPUTE_VERSION}
88+
89+
90+
RUN yum install -y \
91+
make \
92+
findutils \
93+
cuda-command-line-tools-12-1-${NV_CUDA_LIB_VERSION} \
94+
cuda-libraries-devel-12-1-${NV_CUDA_LIB_VERSION} \
95+
cuda-minimal-build-12-1-${NV_CUDA_LIB_VERSION} \
96+
cuda-cudart-devel-12-1-${NV_CUDA_CUDART_DEV_VERSION} \
97+
${NV_NVPROF_DEV_PACKAGE} \
98+
cuda-nvml-devel-12-1-${NV_NVML_DEV_VERSION} \
99+
libcublas-devel-12-1-${NV_LIBCUBLAS_DEV_VERSION} \
100+
${NV_LIBNPP_DEV_PACKAGE} \
101+
${NV_LIBNCCL_DEV_PACKAGE} \
102+
${NV_CUDA_NSIGHT_COMPUTE_DEV_PACKAGE} \
103+
&& yum clean all \
104+
&& rm -rf /var/cache/yum/*
105+
106+
ENV LIBRARY_PATH /usr/local/cuda/lib64/stubs
107+
108+
#############################################
109+
# End of CUDA 12.1 Layer #
110+
#############################################
111+
112+
###################################
113+
# vLLM install in build container #
114+
###################################
115+
116+
FROM cuda-devel as vllm-install
117+
118+
WORKDIR /opt/app-root/src
119+
120+
USER 1001
121+
122+
COPY --chown=1001:0 requirements.txt ./
123+
124+
RUN pip install --no-cache-dir -r requirements.txt && \
125+
rm -f requirements.txt && \
126+
# Install flash-attn from PyPI \
127+
pip install flash-attn==2.5.8 --no-build-isolation && \
128+
# Correction for FIPS mode \
129+
sed -i s/md5/sha1/g /opt/app-root/lib64/python3.11/site-packages/triton/runtime/jit.py && \
130+
# Fix permissions to support pip in Openshift environments \
131+
chmod -R g+w /opt/app-root/lib/python3.11/site-packages && \
132+
fix-permissions /opt/app-root -P
133+
134+
##################
135+
# vLLM container #
136+
##################
137+
138+
FROM cuda-runtime as vllm-container
139+
140+
WORKDIR /opt/app-root/src
141+
142+
COPY --from=vllm-install --chown=1001:0 /opt/app-root/lib64/python3.11/site-packages /opt/app-root/lib64/python3.11/site-packages
143+
COPY --from=vllm-install --chown=1001:0 /opt/app-root/src/.config/vllm/nccl/cu12/libnccl.so.2* /usr/local/lib/libnccl.so.2
144+
145+
# Fix VLLM_NCCL_SO_PATH
146+
ENV VLLM_NCCL_SO_PATH=/usr/local/lib/libnccl.so.2
147+
148+
USER 1001
149+
150+
EXPOSE 8000
151+
152+
ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]
153+

0 commit comments

Comments
 (0)