sendnn-inference/docker/Dockerfile.amd64 at main · torch-spyre/sendnn-inference · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
# This is a reference dockerfile for SenDNN Inference support on an x86 host
ARG BASE_IMAGE_URL="quay.io/ibm-aiu/spyre-runtime"
ARG BASE_IMAGE_TAG="latest"

##############################################
# Base
##############################################
FROM ${BASE_IMAGE_URL}:${BASE_IMAGE_TAG} AS base

USER root

RUN dnf install -y \
        python3-pip.noarch \
        python3-wheel.noarch \
        git

##############################################
# Build stage
##############################################
FROM base as builder

# This stage builds SenDNN-Inference and installs its dependencies into a new venv.
# This is done with system site-packages access so that we can use the installs
# of torch_sendnn etc. from the driver image.

# set umask to keep everything group writeable
SHELL ["/usr/bin/bash", "-lc"]
RUN echo 'umask 002' >> /etc/profile.d/umask.sh \
    && chmod g+w /usr/src

# See https://docs.astral.sh/uv/guides/integration/docker/
# for info on using `uv` in docker builds
# UV_PYTHON_PREFERENCE=only-system is critical to ensure that we use the same
# python install from the base driver image.
ENV UV_COMPILE_BYTECODE=1 \
    UV_LINK_MODE=copy \
    UV_PYTHON_PREFERENCE=only-system \
    UV_PROJECT_ENVIRONMENT=/opt/vllm

# Create new venv to install everything in
RUN --mount=type=cache,target=/root/.cache/pip \
    --mount=type=cache,target=/root/.cache/uv \
    pip install uv && \
    uv venv /opt/vllm --seed --system-site-packages

# Always install vllm with empty target device
ENV VLLM_TARGET_DEVICE=empty

ADD . /usr/src/sendnn-inference

# Use uv to build the sendnn-inference project, with dependencies locked
WORKDIR /usr/src/sendnn-inference
RUN --mount=type=cache,target=/root/.cache/uv \
    uv sync --no-dev --no-editable --frozen

# clean up temp dir
RUN rm -rf /tmp/etc

##############################################
# Release stage
##############################################
FROM base as release

# Copy over the vllm venv
COPY --from=builder /opt/vllm /opt/vllm

# Required Spyre environment configuration
ENV DISTRIBUTED_STRATEGY_IGNORE_MODULES=WordEmbedding,Embedding \
    DTLOG_LEVEL=error \
    DT_DEEPRT_VERBOSE=-1 \
    FLEX_COMPUTE=SENTIENT \
    FLEX_DEVICE=PF \
    TOKENIZERS_PARALLELISM=false \
    TORCH_SENDNN_LOG=WARNING

# Required configuration file
COPY docker/.senlib.json /home/senuser

# Common vLLM-specific env setup
ENV MASTER_ADDR=localhost \
    MASTER_PORT=12355 \
    VLLM_PLUGINS=sendnn_inference

RUN true \
    && mkdir -p /home/senuser/.cache \
    && chmod -R g+rwx /home/ \
    && chown -R senuser:0 /home/
ENV HOME=/home/senuser
WORKDIR ${HOME}

USER 2000

ENV PATH="/home/senuser/.local/bin:$PATH"

# Place executables in the environment at the front of the path.
# This will make the default `python` and `pip` resolve to the uv-managed venv
ENV PATH="/opt/vllm/bin:$PATH"

# Copy helper script to configure AIUs at runtime before booting vLLM
COPY docker/simple_vllm_serve.sh /home/senuser

ENTRYPOINT ["/home/senuser/simple_vllm_serve.sh"]