Skip to content
This repository was archived by the owner on Dec 3, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 46 additions & 42 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -13,51 +13,55 @@
# See the License for the specific language governing permissions and
# limitations under the License.

# DEPRECATED: Use uv.Dockerfile instead.
# Dockerfile using uv environment.

# Use NVIDIA PyTorch container as base image
FROM nvcr.io/nvidia/pytorch:25.04-py3
ARG TARGETPLATFORM
ARG BASE_IMAGE=nvidia/cuda:12.6.3-cudnn-devel-ubuntu24.04

FROM ${BASE_IMAGE}

# Set the DEBIAN_FRONTEND environment variable to avoid interactive prompts during apt operations.
ENV DEBIAN_FRONTEND=noninteractive

# Install packages
RUN --mount=type=cache,target=/var/cache/apt \
--mount=type=cache,target=/var/lib/apt \
apt-get update && \
apt-get install -y --no-install-recommends \
curl \
git \
libgl1 \
libglib2.0-0 \
tree \
wget

# Install pkgx: https://pkgx.sh/
RUN curl -sSf https://pkgx.sh | sh

# Install uv: https://docs.astral.sh/uv/getting-started/installation/
# https://github.com/astral-sh/uv-docker-example/blob/main/Dockerfile
COPY --from=ghcr.io/astral-sh/uv:0.8.12 /uv /uvx /usr/local/bin/
# Enable bytecode compilation
ENV UV_COMPILE_BYTECODE=1
# Copy from the cache instead of linking since it's a mounted volume
ENV UV_LINK_MODE=copy
# Ensure installed tools can be executed out of the box
ENV UV_TOOL_BIN_DIR=/usr/local/bin

# Install just: https://just.systems/man/en/pre-built-binaries.html
RUN curl --proto '=https' --tlsv1.2 -sSf https://just.systems/install.sh | bash -s -- --to /usr/local/bin --tag 1.42.4

# Install basic tools
RUN apt-get -y update && apt-get install -y git tree ffmpeg wget
RUN rm /bin/sh && ln -s /bin/bash /bin/sh
RUN if [[ ${TARGETPLATFORM} == 'linux/amd64' ]]; then ln -s /lib64/libcuda.so.1 /lib64/libcuda.so; fi
RUN apt-get install -y libglib2.0-0
RUN sed -i -e 's/h11==0.14.0/h11==0.16.0/g' /etc/pip/constraint.txt

# Install Flash Attention 3
RUN MAX_JOBS=$(( $(nproc) / 4 )) pip install git+https://github.com/Dao-AILab/flash-attention.git@27f501d#subdirectory=hopper
COPY cosmos_predict2/utils/flash_attn_3/flash_attn_interface.py /usr/local/lib/python3.12/dist-packages/flash_attn_3/flash_attn_interface.py
COPY cosmos_predict2/utils/flash_attn_3/te_attn.diff /tmp/te_attn.diff
RUN patch /usr/local/lib/python3.12/dist-packages/transformer_engine/pytorch/attention.py /tmp/te_attn.diff

# Installing decord from source on ARM
COPY Video_Codec_SDK_13.0.19.zip* /workspace/Video_Codec_SDK_13.0.19.zip
RUN if [[ ${TARGETPLATFORM} == 'linux/arm64' ]]; then export DEBIAN_FRONTEND=noninteractive && \
apt-get -y update && \
apt-get install -y build-essential python3-dev python3-setuptools make cmake \
ffmpeg libavcodec-dev libavfilter-dev libavformat-dev libavutil-dev git ssh unzip nano python3-pip && \
git clone --recursive https://github.com/dmlc/decord && \
cd decord && \
find . -type f -exec sed -i "s/AVInputFormat \*/const AVInputFormat \*/g" {} \; && \
sed -i "s/[[:space:]]AVCodec \*dec/const AVCodec \*dec/" src/video/video_reader.cc && \
sed -i "s/avcodec\.h>/avcodec\.h>\n#include <libavcodec\/bsf\.h>/" src/video/ffmpeg/ffmpeg_common.h && \
mkdir build && cd build && \
scp /workspace/Video_Codec_SDK_13.0.19.zip . && \
unzip Video_Codec_SDK_13.0.19.zip && \
cp Video_Codec_SDK_13.0.19/Lib/linux/stubs/aarch64/* /usr/local/cuda/lib64/ && \
cp Video_Codec_SDK_13.0.19/Interface/* /usr/local/cuda/include && \
cmake .. -DUSE_CUDA=ON -DCMAKE_BUILD_TYPE=Release && \
make -j 4 && \
cd ../python && python3 setup.py install; fi
RUN if [[ ${TARGETPLATFORM} == 'linux/arm64' ]]; then apt remove -y python3-blinker; fi

# Install the dependencies from requirements-docker.txt
COPY ./requirements-docker.txt /requirements.txt
ARG NATTEN_CUDA_ARCH="8.0;8.6;8.9;9.0;10.0;10.3;12.0"
RUN pip install --no-cache-dir -r /requirements.txt
RUN mkdir -p /workspace
WORKDIR /workspace

# Install the project's dependencies using the lockfile and settings
RUN --mount=type=cache,target=/root/.cache/uv \
--mount=type=bind,source=uv.lock,target=uv.lock \
--mount=type=bind,source=pyproject.toml,target=pyproject.toml \
--mount=type=bind,source=.python-version,target=.python-version \
uv sync --locked --no-install-project --extra cu126

# Place executables in the environment at the front of the path
ENV PATH="/workspace/.venv/bin:$PATH"

ENTRYPOINT ["/workspace/bin/entrypoint.sh"]
CMD ["/bin/bash"]
25 changes: 5 additions & 20 deletions documentations/setup.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@

* NVIDIA GPUs with Ampere architecture (RTX 30 Series, A100) or newer
* NVIDIA driver compatible with CUDA 12.6
* Linux
* Linux x86-64
* glibc>=2.31 (e.g Ubuntu >=22.04)
* Python 3.10

## Installation

Expand All @@ -15,19 +17,8 @@ git clone [email protected]:nvidia-cosmos/cosmos-predict2.git
cd cosmos-predict2
```

### ARM installation

When using an ARM platform, like GB200, special steps are required to install the `decord` package.
You need to make sure that [NVIDIA Video Codec SDK](https://developer.nvidia.com/nvidia-video-codec-sdk/download) is downloaded in the root of the repository.
The installation will be handled by the Conda scripts or Dockerfile.

### Option 1: Virtual environment

System requirements:

* Linux x86-64
* glibc>=2.31 (e.g Ubuntu >=22.04)

Install system dependencies:

[uv](https://docs.astral.sh/uv/getting-started/installation/)
Expand All @@ -54,16 +45,10 @@ uv sync --extra cu126 --active --inexact

Please make sure you have access to Docker on your machine and the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html) is installed.

For x86-64, build and run the container:

```bash
docker run --gpus all --rm -v .:/workspace -v /workspace/.venv -it $(docker build -f uv.Dockerfile -q .)
```

For arm, pull and run a pre-built container:
Build and run the container:

```bash
docker run --gpus all --rm -v .:/workspace -it nvcr.io/nvidia/cosmos/cosmos-predict2-container:1.2
docker run --gpus all --rm -v .:/workspace -v /workspace/.venv -it $(docker build -q .)
```

## Downloading Checkpoints
Expand Down
13 changes: 2 additions & 11 deletions justfile
Original file line number Diff line number Diff line change
Expand Up @@ -36,16 +36,7 @@ release-check:
release pypi_token='dry-run' *args:
./bin/release.sh {{pypi_token}} {{args}}

# Build the docker image
docker-build cuda_version='12.6.3' *args:
docker build --build-arg CUDA_VERSION="{{cuda_version}}" -t cosmos-predict2:{{cuda_version}} -f uv.Dockerfile . {{args}}

# Run the docker container
docker cuda_version='12.6.3' *args:
docker *args:
# https://github.com/astral-sh/uv-docker-example/blob/main/run.sh
just docker-build "{{cuda_version}}"
docker run --gpus all --rm -v .:/workspace -v /workspace/.venv -it cosmos-predict2:{{cuda_version}} {{args}}

# Run the arm docker container
docker-arm *args:
docker run --gpus all --rm -v .:/workspace -it nvcr.io/nvidia/cosmos/cosmos-predict2-container:1.2 {{args}}
docker run --gpus all --rm -v .:/workspace -v /workspace/.venv -it $(docker build -q .)
5 changes: 5 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ dependencies = [

[project.optional-dependencies]
cu126 = [
"apex==0.1.0",
"flash-attn==2.6.3",
"natten==0.21.0",
"torch==2.6.0",
Expand Down Expand Up @@ -122,6 +123,7 @@ environments = [
"python_version == '3.10' and sys_platform == 'linux' and platform_machine == 'x86_64'",
]
no-build-package = [
"apex",
"flash-attn",
"natten",
"transformer-engine",
Expand All @@ -131,6 +133,9 @@ required-environments = [
]

[tool.uv.sources]
apex = [
{ index = "cosmos-cu126", extra = "cu126" },
]
flash-attn = [
{ index = "cosmos-cu126", extra = "cu126" },
]
Expand Down
66 changes: 0 additions & 66 deletions uv.Dockerfile

This file was deleted.

19 changes: 16 additions & 3 deletions uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.