Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 32 additions & 2 deletions .devcontainer/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ ENV DEFAULT_CONDA_ENV=rapids

FROM ${PYTHON_PACKAGE_MANAGER}-base

ARG TARGETARCH

ARG CUDA
ENV CUDAARCHS="RAPIDS"
ENV CUDA_VERSION="${CUDA_VERSION:-${CUDA}}"
Expand All @@ -32,7 +34,35 @@ ENV PYTHONSAFEPATH="1"
ENV PYTHONUNBUFFERED="1"
ENV PYTHONDONTWRITEBYTECODE="1"

ENV HISTFILE="/home/coder/.cache/._bash_history"

###
# sccache configuration
###
ENV AWS_ROLE_ARN="arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs"
ENV SCCACHE_REGION="us-east-2"
ENV SCCACHE_BUCKET="rapids-sccache-devs"
ENV VAULT_HOST="https://vault.ops.k8s.rapids.ai"
ENV HISTFILE="/home/coder/.cache/._bash_history"
# 2hr (1 minute longer than sccache-dist request timeout)
ENV SCCACHE_IDLE_TIMEOUT=7200

###
# sccache-dist configuration
###
# Enable sccache-dist by default
ENV DEVCONTAINER_UTILS_ENABLE_SCCACHE_DIST=1
# Compile locally if max retries exceeded
ENV SCCACHE_DIST_FALLBACK_TO_LOCAL_COMPILE=true
# Retry transient errors 4 times (for a total of 5 attempts)
ENV SCCACHE_DIST_MAX_RETRIES=4
ENV SCCACHE_DIST_CONNECT_TIMEOUT=30
ENV SCCACHE_DIST_CONNECTION_POOL=false
# 1hr 59min (to accommodate debug builds)
ENV SCCACHE_DIST_REQUEST_TIMEOUT=7140
ENV SCCACHE_DIST_KEEPALIVE_ENABLED=true
ENV SCCACHE_DIST_KEEPALIVE_INTERVAL=20
ENV SCCACHE_DIST_KEEPALIVE_TIMEOUT=600
ENV SCCACHE_DIST_URL="https://${TARGETARCH}.linux.sccache.rapids.nvidia.com"

# Build as much in parallel as possible
ENV INFER_NUM_DEVICE_ARCHITECTURES=1
ENV MAX_DEVICE_OBJ_TO_COMPILE_IN_PARALLEL=20
4 changes: 3 additions & 1 deletion .devcontainer/cuda12.9-conda/devcontainer.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@
"runArgs": [
"--rm",
"--name",
"${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.10-cuda12.9-conda"
"${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.10-cuda12.9-conda",
"--ulimit",
"nofile=500000"
],
"hostRequirements": {"gpu": "optional"},
"features": {
Expand Down
6 changes: 4 additions & 2 deletions .devcontainer/cuda12.9-pip/devcontainer.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,15 @@
"args": {
"CUDA": "12.9",
"PYTHON_PACKAGE_MANAGER": "pip",
"BASE": "rapidsai/devcontainers:25.10-cpp-cuda12.9-ucx1.18.0-openmpi5.0.7"
"BASE": "rapidsai/devcontainers:25.10-cpp-cuda12.9-ucx1.19.0-openmpi5.0.7"
}
},
"runArgs": [
"--rm",
"--name",
"${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.10-cuda12.9-pip"
"${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.10-cuda12.9-pip",
"--ulimit",
"nofile=500000"
],
"hostRequirements": {"gpu": "optional"},
"features": {
Expand Down
15 changes: 11 additions & 4 deletions .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -223,12 +223,19 @@ jobs:
needs: telemetry-setup
uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-25.10
with:
arch: '["amd64"]'
arch: '["amd64", "arm64"]'
cuda: '["12.9"]'
node_type: "cpu8"
rapids-aux-secret-1: GIST_REPO_READ_ORG_GITHUB_TOKEN
env: |
SCCACHE_DIST_MAX_RETRIES=inf
SCCACHE_SERVER_LOG=sccache=debug
SCCACHE_DIST_FALLBACK_TO_LOCAL_COMPILE=false
SCCACHE_DIST_AUTH_TOKEN_VAR=RAPIDS_AUX_SECRET_1
build_command: |
sccache -z;
build-all --verbose;
sccache -s;
sccache --zero-stats;
build-all -j0 --verbose 2>&1 | tee telemetry-artifacts/build.log;
sccache --show-adv-stats | tee telemetry-artifacts/sccache-stats.txt;
telemetry-summarize:
# This job must use a self-hosted runner to record telemetry traces.
runs-on: linux-amd64-cpu4
Expand Down