@@ -79,24 +79,24 @@ steps:
7979 parallel-scp -t 0 -r -h ${HOSTFILE} -x "-i ${KeyFilePath}" -O $SSH_OPTION ${ROOT_DIR} ${DST_DIR}
8080 workingDirectory : ' $(System.DefaultWorkingDirectory)'
8181
82- - task : Bash@3
83- name : GenerateExecutionFile
84- displayName : Generate execution file
85- inputs :
86- targetType : ' inline'
87- script : |
88- set -e
89- HOSTFILE=$(System.DefaultWorkingDirectory)/mscclpp/test/deploy/hostfile_ci
90- ROOT_DIR=$(System.DefaultWorkingDirectory)/mscclpp
91- SSH_OPTION="StrictHostKeyChecking=no"
92- KeyFilePath=${SSHKEYFILE_SECUREFILEPATH}
93- parallel-ssh -i -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" \
94- -O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c "\
95- cd /root/mscclpp/msccl-users; \
96- mkdir -p execution-files; \
97- cd /root/mscclpp/msccl-users; \
98- bash algos/mscclpp_a100/generate_execution_plan.sh"'
99- workingDirectory : ' $(System.DefaultWorkingDirectory)'
82+ # - task: Bash@3
83+ # name: GenerateExecutionFile
84+ # displayName: Generate execution file
85+ # inputs:
86+ # targetType: 'inline'
87+ # script: |
88+ # set -e
89+ # HOSTFILE=$(System.DefaultWorkingDirectory)/mscclpp/test/deploy/hostfile_ci
90+ # ROOT_DIR=$(System.DefaultWorkingDirectory)/mscclpp
91+ # SSH_OPTION="StrictHostKeyChecking=no"
92+ # KeyFilePath=${SSHKEYFILE_SECUREFILEPATH}
93+ # parallel-ssh -i -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" \
94+ # -O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c "\
95+ # cd /root/mscclpp/msccl-users; \
96+ # mkdir -p execution-files; \
97+ # cd /root/mscclpp/msccl-users; \
98+ # bash algos/mscclpp_a100/generate_execution_plan.sh"'
99+ # workingDirectory: '$(System.DefaultWorkingDirectory)'
100100
101101- task : Bash@3
102102 name : InstallNcclTests
@@ -116,56 +116,56 @@ steps:
116116 MPI=1 MPI_HOME=/usr/local/mpi make -j"'
117117 workingDirectory : ' $(System.DefaultWorkingDirectory)'
118118
119- - task : Bash@3
120- name : RunNcclAllReduceTest
121- displayName : Run NCCL AllReduce Test
122- inputs :
123- targetType : inline
124- script : |
125- set -e
126- HOSTFILE=$(System.DefaultWorkingDirectory)/mscclpp/test/deploy/hostfile_ci
127- ROOT_DIR=$(System.DefaultWorkingDirectory)/mscclpp
128- SSH_OPTION="StrictHostKeyChecking=no"
129- KeyFilePath=${SSHKEYFILE_SECUREFILEPATH}
130- parallel-ssh -i -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" \
131- -O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c "\
132- cd /root/mscclpp; \
133- mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/apps/nccl/libmscclpp_nccl.so -x NCCL_DEBUG=WARN -x MSCCLPP_EXECUTION_PLAN_DIR=/root/mscclpp/msccl-users/execution-files /root/nccl-tests/build/all_reduce_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20"'
134- workingDirectory : ' $(System.DefaultWorkingDirectory)'
119+ # - task: Bash@3
120+ # name: RunNcclAllReduceTest
121+ # displayName: Run NCCL AllReduce Test
122+ # inputs:
123+ # targetType: inline
124+ # script: |
125+ # set -e
126+ # HOSTFILE=$(System.DefaultWorkingDirectory)/mscclpp/test/deploy/hostfile_ci
127+ # ROOT_DIR=$(System.DefaultWorkingDirectory)/mscclpp
128+ # SSH_OPTION="StrictHostKeyChecking=no"
129+ # KeyFilePath=${SSHKEYFILE_SECUREFILEPATH}
130+ # parallel-ssh -i -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" \
131+ # -O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c "\
132+ # cd /root/mscclpp; \
133+ # mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/apps/nccl/libmscclpp_nccl.so -x NCCL_DEBUG=WARN -x MSCCLPP_EXECUTION_PLAN_DIR=/root/mscclpp/msccl-users/execution-files /root/nccl-tests/build/all_reduce_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20"'
134+ # workingDirectory: '$(System.DefaultWorkingDirectory)'
135135
136- - task : Bash@3
137- name : RunNcclAllGatherTest
138- displayName : Run NCCL AllGather Test
139- inputs :
140- targetType : inline
141- script : |
142- set -e
143- HOSTFILE=$(System.DefaultWorkingDirectory)/mscclpp/test/deploy/hostfile_ci
144- ROOT_DIR=$(System.DefaultWorkingDirectory)/mscclpp
145- SSH_OPTION="StrictHostKeyChecking=no"
146- KeyFilePath=${SSHKEYFILE_SECUREFILEPATH}
147- parallel-ssh -i -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" \
148- -O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c "\
149- cd /root/mscclpp; \
150- mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/apps/nccl/libmscclpp_nccl.so -x NCCL_DEBUG=WARN -x MSCCLPP_EXECUTION_PLAN_DIR=/root/mscclpp/msccl-users/execution-files /root/nccl-tests/build/all_gather_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20"'
151- workingDirectory : ' $(System.DefaultWorkingDirectory)'
136+ # - task: Bash@3
137+ # name: RunNcclAllGatherTest
138+ # displayName: Run NCCL AllGather Test
139+ # inputs:
140+ # targetType: inline
141+ # script: |
142+ # set -e
143+ # HOSTFILE=$(System.DefaultWorkingDirectory)/mscclpp/test/deploy/hostfile_ci
144+ # ROOT_DIR=$(System.DefaultWorkingDirectory)/mscclpp
145+ # SSH_OPTION="StrictHostKeyChecking=no"
146+ # KeyFilePath=${SSHKEYFILE_SECUREFILEPATH}
147+ # parallel-ssh -i -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" \
148+ # -O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c "\
149+ # cd /root/mscclpp; \
150+ # mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/apps/nccl/libmscclpp_nccl.so -x NCCL_DEBUG=WARN -x MSCCLPP_EXECUTION_PLAN_DIR=/root/mscclpp/msccl-users/execution-files /root/nccl-tests/build/all_gather_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20"'
151+ # workingDirectory: '$(System.DefaultWorkingDirectory)'
152152
153- - task : Bash@3
154- name : RunNcclReduceScatterTest
155- displayName : Run NCCL Reduce Scatter Test
156- inputs :
157- targetType : inline
158- script : |
159- set -e
160- HOSTFILE=$(System.DefaultWorkingDirectory)/mscclpp/test/deploy/hostfile_ci
161- ROOT_DIR=$(System.DefaultWorkingDirectory)/mscclpp
162- SSH_OPTION="StrictHostKeyChecking=no"
163- KeyFilePath=${SSHKEYFILE_SECUREFILEPATH}
164- parallel-ssh -i -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" \
165- -O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c "\
166- cd /root/mscclpp; \
167- mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/apps/nccl/libmscclpp_nccl.so -x NCCL_DEBUG=WARN -x MSCCLPP_EXECUTION_PLAN_DIR=/root/mscclpp/msccl-users/execution-files /root/nccl-tests/build/reduce_scatter_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20"'
168- workingDirectory : ' $(System.DefaultWorkingDirectory)'
153+ # - task: Bash@3
154+ # name: RunNcclReduceScatterTest
155+ # displayName: Run NCCL Reduce Scatter Test
156+ # inputs:
157+ # targetType: inline
158+ # script: |
159+ # set -e
160+ # HOSTFILE=$(System.DefaultWorkingDirectory)/mscclpp/test/deploy/hostfile_ci
161+ # ROOT_DIR=$(System.DefaultWorkingDirectory)/mscclpp
162+ # SSH_OPTION="StrictHostKeyChecking=no"
163+ # KeyFilePath=${SSHKEYFILE_SECUREFILEPATH}
164+ # parallel-ssh -i -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" \
165+ # -O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c "\
166+ # cd /root/mscclpp; \
167+ # mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/apps/nccl/libmscclpp_nccl.so -x NCCL_DEBUG=WARN -x MSCCLPP_EXECUTION_PLAN_DIR=/root/mscclpp/msccl-users/execution-files /root/nccl-tests/build/reduce_scatter_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20"'
168+ # workingDirectory: '$(System.DefaultWorkingDirectory)'
169169
170170- task : Bash@3
171171 name : InstallNccl
@@ -245,25 +245,25 @@ steps:
245245 mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/apps/nccl/libmscclpp_nccl.so -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION="allreduce" /root/nccl-tests/build/broadcast_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20"'
246246 workingDirectory : ' $(System.DefaultWorkingDirectory)'
247247
248- - task : Bash@3
249- name : RunNcclReduceScatterFallbaclkToNcclTest
250- displayName : Run NCCL ReduceScatter Test with or without Fallback to NCCL operation
251- inputs :
252- targetType : ' inline'
253- script : |
254- set -e
255- HOSTFILE=$(System.DefaultWorkingDirectory)/mscclpp/test/deploy/hostfile_ci
256- ROOT_DIR=$(System.DefaultWorkingDirectory)/mscclpp
257- SSH_OPTION="StrictHostKeyChecking=no"
258- KeyFilePath=${SSHKEYFILE_SECUREFILEPATH}
259- parallel-ssh -i -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" \
260- -O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c "\
261- cd /root/mscclpp; \
262- echo \"mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/apps/nccl/libmscclpp_nccl.so -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION=\"reducescatter\" /root/nccl-tests/build/reduce_scatter_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20\"; \
263- mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/apps/nccl/libmscclpp_nccl.so -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION="reducescatter" /root/nccl-tests/build/reduce_scatter_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20; \
264- echo \"mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/apps/nccl/libmscclpp_nccl.so -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION=\"broadcast\" -x MSCCLPP_EXECUTION_PLAN_DIR=/root/mscclpp/msccl-users/execution-files /root/nccl-tests/build/reduce_scatter_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20\"; \
265- mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/apps/nccl/libmscclpp_nccl.so -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION="broadcast" -x MSCCLPP_EXECUTION_PLAN_DIR=/root/mscclpp/msccl-users/execution-files /root/nccl-tests/build/reduce_scatter_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20"'
266- workingDirectory : ' $(System.DefaultWorkingDirectory)'
248+ # - task: Bash@3
249+ # name: RunNcclReduceScatterFallbaclkToNcclTest
250+ # displayName: Run NCCL ReduceScatter Test with or without Fallback to NCCL operation
251+ # inputs:
252+ # targetType: 'inline'
253+ # script: |
254+ # set -e
255+ # HOSTFILE=$(System.DefaultWorkingDirectory)/mscclpp/test/deploy/hostfile_ci
256+ # ROOT_DIR=$(System.DefaultWorkingDirectory)/mscclpp
257+ # SSH_OPTION="StrictHostKeyChecking=no"
258+ # KeyFilePath=${SSHKEYFILE_SECUREFILEPATH}
259+ # parallel-ssh -i -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" \
260+ # -O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c "\
261+ # cd /root/mscclpp; \
262+ # echo \"mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/apps/nccl/libmscclpp_nccl.so -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION=\"reducescatter\" /root/nccl-tests/build/reduce_scatter_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20\"; \
263+ # mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/apps/nccl/libmscclpp_nccl.so -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION="reducescatter" /root/nccl-tests/build/reduce_scatter_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20; \
264+ # echo \"mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/apps/nccl/libmscclpp_nccl.so -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION=\"broadcast\" -x MSCCLPP_EXECUTION_PLAN_DIR=/root/mscclpp/msccl-users/execution-files /root/nccl-tests/build/reduce_scatter_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20\"; \
265+ # mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/apps/nccl/libmscclpp_nccl.so -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION="broadcast" -x MSCCLPP_EXECUTION_PLAN_DIR=/root/mscclpp/msccl-users/execution-files /root/nccl-tests/build/reduce_scatter_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20"'
266+ # workingDirectory: '$(System.DefaultWorkingDirectory)'
267267
268268- task : AzureCLI@2
269269 name : StopVMSS
0 commit comments