File tree Expand file tree Collapse file tree 2 files changed +43
-5
lines changed
Expand file tree Collapse file tree 2 files changed +43
-5
lines changed Original file line number Diff line number Diff line change 1+ # This script build the CPU docker image and run the offline inference inside the container.
2+ # It serves a sanity check for compilation and basic model usage.
3+ set -ex
4+
5+ # Try building the docker image
6+ docker build -t cpu-test -f Dockerfile.ppc64le .
7+
8+ # Setup cleanup
9+ remove_docker_container () { docker rm -f cpu-test || true ; }
10+ trap remove_docker_container EXIT
11+ remove_docker_container
12+
13+ # Run the image, setting --shm-size=4g for tensor parallel.
14+ # docker run -itd --entrypoint /bin/bash -v ~/.cache/huggingface:/root/.cache/huggingface --privileged=true --network host -e HF_TOKEN --env VLLM_CPU_KVCACHE_SPACE=4 --shm-size=4g --name cpu-test cpu-test
15+ docker run -itd --entrypoint /bin/bash -v ~ /.cache/huggingface:/root/.cache/huggingface --privileged=true --network host -e HF_TOKEN --name cpu-test cpu-test
16+
17+ # Run basic model test
18+ docker exec cpu-test bash -c "
19+ pip install pytest matplotlib einops transformers_stream_generator
20+ pytest -v -s tests/models -m \" not vlm\" --ignore=tests/models/test_embedding.py --ignore=tests/models/test_oot_registration.py --ignore=tests/models/test_registry.py --ignore=tests/models/test_jamba.py --ignore=tests/models/test_danube3_4b.py" # Mamba and Danube3-4B on CPU is not supported
21+
22+ # online inference
23+ docker exec cpu-test bash -c "
24+ python3 -m vllm.entrypoints.openai.api_server --model facebook/opt-125m &
25+ timeout 600 bash -c 'until curl localhost:8000/v1/models; do sleep 1; done' || exit 1
26+ python3 benchmarks/benchmark_serving.py \
27+ --backend vllm \
28+ --dataset-name random \
29+ --model facebook/opt-125m \
30+ --num-prompts 20 \
31+ --endpoint /v1/completions \
32+ --tokenizer facebook/opt-125m"
Original file line number Diff line number Diff line change @@ -2,21 +2,27 @@ FROM mambaorg/micromamba
22ARG MAMBA_DOCKERFILE_ACTIVATE=1
33USER root
44
5- RUN apt-get update -y && apt-get install -y git wget vim numactl gcc-12 g++-12 protobuf-compiler libprotobuf-dev && update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 10 --slave /usr/bin/g++ g++ /usr/bin/g++-12
5+ ENV PATH="/usr/local/cargo/bin:$PATH:/opt/conda/bin/"
6+
7+ RUN apt-get update -y && apt-get install -y git wget vim libnuma-dev libsndfile-dev libprotobuf-dev build-essential
68
79# Some packages in requirements-cpu are installed here
810# IBM provides optimized packages for ppc64le processors in the open-ce project for mamba
911# Currently these may not be available for venv or pip directly
10- RUN micromamba install -y -n base -c https://ftp.osuosl.org/pub/open-ce/1.11.0-p10/ -c defaults python=3.10 pytorch-cpu=2.1.2 torchvision-cpu=0.16.2 && micromamba clean --all --yes
12+ RUN micromamba install -y -n base -c https://ftp.osuosl.org/pub/open-ce/1.11.0-p10/ -c defaults python=3.10 torchvision-cpu=0.16.2 rust && micromamba clean --all --yes
1113
1214COPY ./ /workspace/vllm
1315
1416WORKDIR /workspace/vllm
1517
1618# These packages will be in rocketce eventually
17- RUN pip install -v -r requirements-cpu.txt --prefer-binary --extra-index-url https://repo.fury.io/mgiessing
19+ RUN pip install -v cmake torch==2.3.1 uvloop==0.20.0 -r requirements-cpu.txt --prefer-binary --extra-index-url https://repo.fury.io/mgiessing
1820
1921RUN VLLM_TARGET_DEVICE=cpu python3 setup.py install
2022
21- WORKDIR /vllm-workspace
22- ENTRYPOINT ["/opt/conda/bin/python3", "-m", "vllm.entrypoints.openai.api_server"]
23+ WORKDIR /workspace/
24+
25+ RUN ln -s /workspace/vllm/tests && ln -s /workspace/vllm/examples && ln -s /workspace/vllm/benchmarks
26+
27+ ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]
28+
You can’t perform that action at this time.
0 commit comments