Skip to content

Commit 98fb21e

Browse files
authored
Merge branch 'main' into chhwang/local-channel
2 parents e4aba07 + 4d9bb9f commit 98fb21e

File tree

230 files changed

+14851
-9018
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

230 files changed

+14851
-9018
lines changed

.azure-pipelines/templates/nccl-test.yaml

Lines changed: 85 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -79,24 +79,24 @@ steps:
7979
parallel-scp -t 0 -r -h ${HOSTFILE} -x "-i ${KeyFilePath}" -O $SSH_OPTION ${ROOT_DIR} ${DST_DIR}
8080
workingDirectory: '$(System.DefaultWorkingDirectory)'
8181

82-
- task: Bash@3
83-
name: GenerateExecutionFile
84-
displayName: Generate execution file
85-
inputs:
86-
targetType: 'inline'
87-
script: |
88-
set -e
89-
HOSTFILE=$(System.DefaultWorkingDirectory)/mscclpp/test/deploy/hostfile_ci
90-
ROOT_DIR=$(System.DefaultWorkingDirectory)/mscclpp
91-
SSH_OPTION="StrictHostKeyChecking=no"
92-
KeyFilePath=${SSHKEYFILE_SECUREFILEPATH}
93-
parallel-ssh -i -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" \
94-
-O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c "\
95-
cd /root/mscclpp/msccl-users; \
96-
mkdir -p execution-files; \
97-
cd /root/mscclpp/msccl-users; \
98-
bash algos/mscclpp_a100/generate_execution_plan.sh"'
99-
workingDirectory: '$(System.DefaultWorkingDirectory)'
82+
# - task: Bash@3
83+
# name: GenerateExecutionFile
84+
# displayName: Generate execution file
85+
# inputs:
86+
# targetType: 'inline'
87+
# script: |
88+
# set -e
89+
# HOSTFILE=$(System.DefaultWorkingDirectory)/mscclpp/test/deploy/hostfile_ci
90+
# ROOT_DIR=$(System.DefaultWorkingDirectory)/mscclpp
91+
# SSH_OPTION="StrictHostKeyChecking=no"
92+
# KeyFilePath=${SSHKEYFILE_SECUREFILEPATH}
93+
# parallel-ssh -i -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" \
94+
# -O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c "\
95+
# cd /root/mscclpp/msccl-users; \
96+
# mkdir -p execution-files; \
97+
# cd /root/mscclpp/msccl-users; \
98+
# bash algos/mscclpp_a100/generate_execution_plan.sh"'
99+
# workingDirectory: '$(System.DefaultWorkingDirectory)'
100100

101101
- task: Bash@3
102102
name: InstallNcclTests
@@ -116,56 +116,56 @@ steps:
116116
MPI=1 MPI_HOME=/usr/local/mpi make -j"'
117117
workingDirectory: '$(System.DefaultWorkingDirectory)'
118118

119-
- task: Bash@3
120-
name: RunNcclAllReduceTest
121-
displayName: Run NCCL AllReduce Test
122-
inputs:
123-
targetType: inline
124-
script: |
125-
set -e
126-
HOSTFILE=$(System.DefaultWorkingDirectory)/mscclpp/test/deploy/hostfile_ci
127-
ROOT_DIR=$(System.DefaultWorkingDirectory)/mscclpp
128-
SSH_OPTION="StrictHostKeyChecking=no"
129-
KeyFilePath=${SSHKEYFILE_SECUREFILEPATH}
130-
parallel-ssh -i -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" \
131-
-O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c "\
132-
cd /root/mscclpp; \
133-
mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/apps/nccl/libmscclpp_nccl.so -x NCCL_DEBUG=WARN -x MSCCLPP_EXECUTION_PLAN_DIR=/root/mscclpp/msccl-users/execution-files /root/nccl-tests/build/all_reduce_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20"'
134-
workingDirectory: '$(System.DefaultWorkingDirectory)'
119+
# - task: Bash@3
120+
# name: RunNcclAllReduceTest
121+
# displayName: Run NCCL AllReduce Test
122+
# inputs:
123+
# targetType: inline
124+
# script: |
125+
# set -e
126+
# HOSTFILE=$(System.DefaultWorkingDirectory)/mscclpp/test/deploy/hostfile_ci
127+
# ROOT_DIR=$(System.DefaultWorkingDirectory)/mscclpp
128+
# SSH_OPTION="StrictHostKeyChecking=no"
129+
# KeyFilePath=${SSHKEYFILE_SECUREFILEPATH}
130+
# parallel-ssh -i -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" \
131+
# -O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c "\
132+
# cd /root/mscclpp; \
133+
# mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/apps/nccl/libmscclpp_nccl.so -x NCCL_DEBUG=WARN -x MSCCLPP_EXECUTION_PLAN_DIR=/root/mscclpp/msccl-users/execution-files /root/nccl-tests/build/all_reduce_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20"'
134+
# workingDirectory: '$(System.DefaultWorkingDirectory)'
135135

136-
- task: Bash@3
137-
name: RunNcclAllGatherTest
138-
displayName: Run NCCL AllGather Test
139-
inputs:
140-
targetType: inline
141-
script: |
142-
set -e
143-
HOSTFILE=$(System.DefaultWorkingDirectory)/mscclpp/test/deploy/hostfile_ci
144-
ROOT_DIR=$(System.DefaultWorkingDirectory)/mscclpp
145-
SSH_OPTION="StrictHostKeyChecking=no"
146-
KeyFilePath=${SSHKEYFILE_SECUREFILEPATH}
147-
parallel-ssh -i -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" \
148-
-O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c "\
149-
cd /root/mscclpp; \
150-
mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/apps/nccl/libmscclpp_nccl.so -x NCCL_DEBUG=WARN -x MSCCLPP_EXECUTION_PLAN_DIR=/root/mscclpp/msccl-users/execution-files /root/nccl-tests/build/all_gather_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20"'
151-
workingDirectory: '$(System.DefaultWorkingDirectory)'
136+
# - task: Bash@3
137+
# name: RunNcclAllGatherTest
138+
# displayName: Run NCCL AllGather Test
139+
# inputs:
140+
# targetType: inline
141+
# script: |
142+
# set -e
143+
# HOSTFILE=$(System.DefaultWorkingDirectory)/mscclpp/test/deploy/hostfile_ci
144+
# ROOT_DIR=$(System.DefaultWorkingDirectory)/mscclpp
145+
# SSH_OPTION="StrictHostKeyChecking=no"
146+
# KeyFilePath=${SSHKEYFILE_SECUREFILEPATH}
147+
# parallel-ssh -i -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" \
148+
# -O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c "\
149+
# cd /root/mscclpp; \
150+
# mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/apps/nccl/libmscclpp_nccl.so -x NCCL_DEBUG=WARN -x MSCCLPP_EXECUTION_PLAN_DIR=/root/mscclpp/msccl-users/execution-files /root/nccl-tests/build/all_gather_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20"'
151+
# workingDirectory: '$(System.DefaultWorkingDirectory)'
152152

153-
- task: Bash@3
154-
name: RunNcclReduceScatterTest
155-
displayName: Run NCCL Reduce Scatter Test
156-
inputs:
157-
targetType: inline
158-
script: |
159-
set -e
160-
HOSTFILE=$(System.DefaultWorkingDirectory)/mscclpp/test/deploy/hostfile_ci
161-
ROOT_DIR=$(System.DefaultWorkingDirectory)/mscclpp
162-
SSH_OPTION="StrictHostKeyChecking=no"
163-
KeyFilePath=${SSHKEYFILE_SECUREFILEPATH}
164-
parallel-ssh -i -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" \
165-
-O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c "\
166-
cd /root/mscclpp; \
167-
mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/apps/nccl/libmscclpp_nccl.so -x NCCL_DEBUG=WARN -x MSCCLPP_EXECUTION_PLAN_DIR=/root/mscclpp/msccl-users/execution-files /root/nccl-tests/build/reduce_scatter_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20"'
168-
workingDirectory: '$(System.DefaultWorkingDirectory)'
153+
# - task: Bash@3
154+
# name: RunNcclReduceScatterTest
155+
# displayName: Run NCCL Reduce Scatter Test
156+
# inputs:
157+
# targetType: inline
158+
# script: |
159+
# set -e
160+
# HOSTFILE=$(System.DefaultWorkingDirectory)/mscclpp/test/deploy/hostfile_ci
161+
# ROOT_DIR=$(System.DefaultWorkingDirectory)/mscclpp
162+
# SSH_OPTION="StrictHostKeyChecking=no"
163+
# KeyFilePath=${SSHKEYFILE_SECUREFILEPATH}
164+
# parallel-ssh -i -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" \
165+
# -O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c "\
166+
# cd /root/mscclpp; \
167+
# mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/apps/nccl/libmscclpp_nccl.so -x NCCL_DEBUG=WARN -x MSCCLPP_EXECUTION_PLAN_DIR=/root/mscclpp/msccl-users/execution-files /root/nccl-tests/build/reduce_scatter_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20"'
168+
# workingDirectory: '$(System.DefaultWorkingDirectory)'
169169

170170
- task: Bash@3
171171
name: InstallNccl
@@ -245,25 +245,25 @@ steps:
245245
mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/apps/nccl/libmscclpp_nccl.so -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION="allreduce" /root/nccl-tests/build/broadcast_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20"'
246246
workingDirectory: '$(System.DefaultWorkingDirectory)'
247247

248-
- task: Bash@3
249-
name: RunNcclReduceScatterFallbaclkToNcclTest
250-
displayName: Run NCCL ReduceScatter Test with or without Fallback to NCCL operation
251-
inputs:
252-
targetType: 'inline'
253-
script: |
254-
set -e
255-
HOSTFILE=$(System.DefaultWorkingDirectory)/mscclpp/test/deploy/hostfile_ci
256-
ROOT_DIR=$(System.DefaultWorkingDirectory)/mscclpp
257-
SSH_OPTION="StrictHostKeyChecking=no"
258-
KeyFilePath=${SSHKEYFILE_SECUREFILEPATH}
259-
parallel-ssh -i -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" \
260-
-O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c "\
261-
cd /root/mscclpp; \
262-
echo \"mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/apps/nccl/libmscclpp_nccl.so -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION=\"reducescatter\" /root/nccl-tests/build/reduce_scatter_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20\"; \
263-
mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/apps/nccl/libmscclpp_nccl.so -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION="reducescatter" /root/nccl-tests/build/reduce_scatter_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20; \
264-
echo \"mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/apps/nccl/libmscclpp_nccl.so -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION=\"broadcast\" -x MSCCLPP_EXECUTION_PLAN_DIR=/root/mscclpp/msccl-users/execution-files /root/nccl-tests/build/reduce_scatter_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20\"; \
265-
mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/apps/nccl/libmscclpp_nccl.so -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION="broadcast" -x MSCCLPP_EXECUTION_PLAN_DIR=/root/mscclpp/msccl-users/execution-files /root/nccl-tests/build/reduce_scatter_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20"'
266-
workingDirectory: '$(System.DefaultWorkingDirectory)'
248+
# - task: Bash@3
249+
# name: RunNcclReduceScatterFallbaclkToNcclTest
250+
# displayName: Run NCCL ReduceScatter Test with or without Fallback to NCCL operation
251+
# inputs:
252+
# targetType: 'inline'
253+
# script: |
254+
# set -e
255+
# HOSTFILE=$(System.DefaultWorkingDirectory)/mscclpp/test/deploy/hostfile_ci
256+
# ROOT_DIR=$(System.DefaultWorkingDirectory)/mscclpp
257+
# SSH_OPTION="StrictHostKeyChecking=no"
258+
# KeyFilePath=${SSHKEYFILE_SECUREFILEPATH}
259+
# parallel-ssh -i -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" \
260+
# -O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c "\
261+
# cd /root/mscclpp; \
262+
# echo \"mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/apps/nccl/libmscclpp_nccl.so -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION=\"reducescatter\" /root/nccl-tests/build/reduce_scatter_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20\"; \
263+
# mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/apps/nccl/libmscclpp_nccl.so -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION="reducescatter" /root/nccl-tests/build/reduce_scatter_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20; \
264+
# echo \"mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/apps/nccl/libmscclpp_nccl.so -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION=\"broadcast\" -x MSCCLPP_EXECUTION_PLAN_DIR=/root/mscclpp/msccl-users/execution-files /root/nccl-tests/build/reduce_scatter_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20\"; \
265+
# mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/apps/nccl/libmscclpp_nccl.so -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION="broadcast" -x MSCCLPP_EXECUTION_PLAN_DIR=/root/mscclpp/msccl-users/execution-files /root/nccl-tests/build/reduce_scatter_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20"'
266+
# workingDirectory: '$(System.DefaultWorkingDirectory)'
267267

268268
- task: AzureCLI@2
269269
name: StopVMSS

.devcontainer/Dockerfile

Lines changed: 48 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,55 @@
11
ARG BASE_IMAGE
22
FROM ${BASE_IMAGE}
3-
ARG USERNAME=mscclpp
3+
ARG USERNAME=devuser
44
ARG USER_UID=1000
55
ARG USER_GID=$USER_UID
6+
ARG SSH_PORT=22345
67

7-
# Create the user
8-
RUN groupadd --gid $USER_GID $USERNAME && \
9-
useradd --uid $USER_UID --gid $USER_GID -m $USERNAME && \
10-
echo $USERNAME ALL=\(root\) NOPASSWD:ALL > /etc/sudoers.d/$USERNAME && \
11-
chmod 0440 /etc/sudoers.d/$USERNAME
8+
# Create or modify the user
9+
RUN if getent group $USER_GID > /dev/null; then \
10+
EXISTING_GROUP=$(getent group $USER_GID | cut -d: -f1); \
11+
if [ "$EXISTING_GROUP" != "$USERNAME" ]; then \
12+
groupmod -n $USERNAME $EXISTING_GROUP; \
13+
fi; \
14+
else \
15+
groupadd --gid $USER_GID $USERNAME; \
16+
fi && \
17+
if id -u $USER_UID > /dev/null 2>&1; then \
18+
EXISTING_USER=$(getent passwd $USER_UID | cut -d: -f1); \
19+
if [ "$EXISTING_USER" != "$USERNAME" ]; then \
20+
usermod -l $USERNAME -d /home/$USERNAME -m $EXISTING_USER; \
21+
fi; \
22+
else \
23+
useradd --uid $USER_UID --gid $USER_GID -m $USERNAME; \
24+
fi && \
25+
usermod -g $USERNAME $USERNAME && \
26+
echo "$USERNAME ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers
27+
28+
RUN rm -rf /etc/apt/sources.list.d/cuda-* && \
29+
apt-get update && \
30+
apt install -y --no-install-recommends \
31+
clang-format \
32+
openssh-server \
33+
gdb \
34+
doxygen \
35+
graphviz \
36+
&& \
37+
apt-get autoremove -y && \
38+
apt-get clean && \
39+
rm -rf /var/lib/apt/lists/* /tmp/*
40+
41+
RUN python3 -m pip install --no-cache-dir \
42+
black \
43+
pytest \
44+
breathe \
45+
sphinx_rtd_theme \
46+
myst_parser \
47+
sphinxcontrib.mermaid
48+
49+
RUN sed -i "s/^Port 22/Port ${SSH_PORT}/" /etc/ssh/sshd_config && \
50+
mkdir -p /home/$USERNAME/.ssh && \
51+
ssh-keygen -t rsa -f /home/$USERNAME/.ssh/id_rsa -N "" -q && \
52+
cat /home/$USERNAME/.ssh/id_rsa.pub >> /home/$USERNAME/.ssh/authorized_keys && \
53+
chown -R $USERNAME:$USERNAME /home/$USERNAME/.ssh
1254

1355
USER $USERNAME

.devcontainer/devcontainer.json

Lines changed: 27 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,17 @@
33
"build": {
44
"dockerfile": "Dockerfile",
55
"args": {
6-
"BASE_IMAGE": "ghcr.io/microsoft/mscclpp/mscclpp:base-dev-cuda12.8"
6+
"BASE_IMAGE": "ghcr.io/microsoft/mscclpp/mscclpp:base-dev-cuda12.9",
7+
"USERNAME": "devuser",
8+
"SSH_PORT": "22345"
79
}
810
},
9-
"remoteUser": "mscclpp",
11+
"remoteUser": "devuser",
12+
"containerEnv": {
13+
"LC_ALL": "C",
14+
"LANG": "C",
15+
"LANGUAGE": "C"
16+
},
1017
"customizations": {
1118
"vscode": {
1219
"extensions": [
@@ -15,18 +22,30 @@
1522
"ms-python.vscode-pylance",
1623
// C++
1724
"ms-vscode.cpptools",
18-
"ms-vscode.cpptools-extension-pack",
1925
"ms-vscode.cmake-tools"
20-
]
26+
],
27+
"settings": {
28+
"terminal.integrated.defaultProfile.linux": "bash",
29+
"C_Cpp.default.includePath": [
30+
"${workspaceFolder}/**",
31+
"/usr/local/cuda/include",
32+
"/usr/include"
33+
],
34+
"C_Cpp.default.cStandard": "c17",
35+
"C_Cpp.default.cppStandard": "c++17"
36+
}
2137
}
2238
},
2339
"privileged": true,
2440
"runArgs": [
41+
"--cap-add=SYS_PTRACE",
2542
"--net=host",
2643
"--ipc=host",
27-
"--gpus=all",
28-
"--ulimit=memlock=-1:-1"
44+
"--ulimit=memlock=-1:-1",
45+
"--gpus=all"
2946
],
30-
"workspaceFolder": "/home/mscclpp/mscclpp",
31-
"workspaceMount": "source=${localWorkspaceFolder},target=/home/mscclpp/mscclpp,type=bind,consistency=cached"
47+
"workspaceFolder": "/home/devuser/mscclpp",
48+
"workspaceMount": "source=${localWorkspaceFolder},target=/home/devuser/mscclpp,type=bind,consistency=cached",
49+
"postStartCommand": "sudo service ssh start",
50+
"postCreateCommand": "bash /home/devuser/mscclpp/tools/install.sh nvidia /usr"
3251
}
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
{
2+
"name": "MSCCL++ Dev Container",
3+
"build": {
4+
"dockerfile": "Dockerfile",
5+
"args": {
6+
"BASE_IMAGE": "ghcr.io/microsoft/mscclpp/mscclpp:base-dev-rocm6.2",
7+
"USERNAME": "devuser",
8+
"SSH_PORT": "22345"
9+
}
10+
},
11+
"remoteUser": "devuser",
12+
"containerEnv": {
13+
"LC_ALL": "C",
14+
"LANG": "C",
15+
"LANGUAGE": "C"
16+
},
17+
"customizations": {
18+
"vscode": {
19+
"extensions": [
20+
// Python
21+
"ms-python.python",
22+
"ms-python.vscode-pylance",
23+
// C++
24+
"ms-vscode.cpptools",
25+
"ms-vscode.cmake-tools"
26+
],
27+
"settings": {
28+
"terminal.integrated.defaultProfile.linux": "bash",
29+
"C_Cpp.default.includePath": [
30+
"${workspaceFolder}/**",
31+
"/opt/rocm/include",
32+
"/usr/include"
33+
],
34+
"C_Cpp.default.cStandard": "c17",
35+
"C_Cpp.default.cppStandard": "c++17"
36+
}
37+
}
38+
},
39+
"privileged": true,
40+
"runArgs": [
41+
"--cap-add=SYS_PTRACE",
42+
"--net=host",
43+
"--ipc=host",
44+
"--ulimit=memlock=-1:-1",
45+
"--security-opt=seccomp=unconfined",
46+
"--group-add=video",
47+
"--device=/dev/kfd",
48+
"--device=/dev/dri"
49+
],
50+
"workspaceFolder": "/home/devuser/mscclpp",
51+
"workspaceMount": "source=${localWorkspaceFolder},target=/home/devuser/mscclpp,type=bind,consistency=cached",
52+
"postStartCommand": "sudo service ssh start",
53+
"postCreateCommand": "bash /home/devuser/mscclpp/tools/install.sh amd /usr"
54+
}
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
---
2+
applyTo: 'python/mscclpp/language/*.py'
3+
---
4+
5+
# Instructions for DSL API Documentation
6+
7+
## Overview
8+
The MSCCL++ DSL (Domain Specific Language) provides a Python API for defining distributed GPU communication patterns. All API functions should have comprehensive Google-style docstrings.
9+
10+
## Documentation Requirements
11+
- Add google-style docstrings to the DSL API functions in the `mscclpp.language` package.
12+
- Ensure that each function's docstring includes:
13+
- A brief description of what the function does.
14+
- Parameters with their types and descriptions.
15+
- Return type and description.
16+
- Any exceptions raised by the function, if applicable.
17+
- Usage examples where appropriate.
18+
19+
## Implementation Steps
20+
1. Open each Python file in the `python.mscclpp.language` folder, exclude `__init__.py` and internal folders.
21+
2. For each function in the file, add a Google-style docstring that follows the documentation requirements outlined above.
22+
3. Ensure that the docstrings are clear, concise, and accurately describe the function's behavior.
23+
4. Review the docstrings for consistency in style and formatting.

0 commit comments

Comments
 (0)