DataStates
diff --git a/‎.github/ISSUE_TEMPLATE/deepspeed_chat_bug_report.md‎
Lines changed: 1 addition & 1 deletion b/‎.github/ISSUE_TEMPLATE/deepspeed_chat_bug_report.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/ISSUE_TEMPLATE/inference_bug_report.md‎
Lines changed: 1 addition & 1 deletion b/‎.github/ISSUE_TEMPLATE/inference_bug_report.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/nv-a6000.yml‎
Lines changed: 4 additions & 4 deletions b/‎.github/workflows/nv-a6000.yml‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎.github/workflows/nv-ds-chat.yml‎
Lines changed: 3 additions & 2 deletions b/‎.github/workflows/nv-ds-chat.yml‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎.github/workflows/nv-flash-attn.yml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/nv-flash-attn.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎.github/workflows/nv-human-eval.yml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/nv-human-eval.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎.github/workflows/nv-mii.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/nv-mii.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎CONTRIBUTING.md‎
Lines changed: 4 additions & 4 deletions b/‎CONTRIBUTING.md‎
Lines changed: 4 additions & 4 deletions
@@ -32,7 +32,7 @@ If applicable, add screenshots to help explain your problem.
 **System info (please complete the following information):**
  - OS: [e.g. Ubuntu 18.04]
  - GPU count and types [e.g. two machines with x8 A100s each]
- - (if applicable) what [DeepSpeed-MII](https://github.com/microsoft/deepspeed-mii) version are you using
+ - (if applicable) what [DeepSpeed-MII](https://github.com/deepspeedai/deepspeed-mii) version are you using
  - (if applicable) Hugging Face Transformers/Accelerate/etc. versions
  - Python version
  - Any other relevant info about your setup
 
@@ -29,7 +29,7 @@ If applicable, add screenshots to help explain your problem.
 **System info (please complete the following information):**
  - OS: [e.g. Ubuntu 18.04]
  - GPU count and types [e.g. two machines with x8 A100s each]
- - (if applicable) what [DeepSpeed-MII](https://github.com/microsoft/deepspeed-mii) version are you using
+ - (if applicable) what [DeepSpeed-MII](https://github.com/deepspeedai/deepspeed-mii) version are you using
  - (if applicable) Hugging Face Transformers/Accelerate/etc. versions
  - Python version
  - Any other relevant info about your setup
 
@@ -23,7 +23,7 @@ jobs:
   unit-tests:
     runs-on: [self-hosted, nvidia, a6000]
     container:
-      image: nvcr.io/nvidia/pytorch:24.03-py3
+      image: nvcr.io/nvidia/pytorch:24.09-py3
       ports:
         - 80
       options: --gpus all --shm-size "8G"
@@ -57,16 +57,16 @@ jobs:
         run: |
           unset TORCH_CUDA_ARCH_LIST # only jit compile for current arch
           cd tests
-          python -m pytest --color=yes --durations=0 --verbose -rF -m 'inference_v2' unit/ --torch_ver="2.3" --cuda_ver="12"
-          python -m pytest --color=yes --durations=0 --verbose -rF -m 'inference_v2_ops' unit/ --torch_ver="2.3" --cuda_ver="12"
+          python -m pytest --color=yes --durations=0 --verbose -rF -m 'inference_v2' unit/ --torch_ver="2.5" --cuda_ver="12"
+          python -m pytest --color=yes --durations=0 --verbose -rF -m 'inference_v2_ops' unit/ --torch_ver="2.5" --cuda_ver="12"
       - name: MII unit tests
         run: |
           BRANCH="main"
           if [[ ! -z "${{ github.event.inputs.mii_branch }}" ]]; then
               BRANCH="${{ github.event.inputs.mii_branch }}"
           fi
           echo "Cloning DeepSpeed-MII branch: $BRANCH"
-          git clone -b $BRANCH --depth=1 https://github.com/microsoft/DeepSpeed-MII.git
+          git clone -b $BRANCH --depth=1 https://github.com/deepspeedai/DeepSpeed-MII.git
           cd DeepSpeed-MII
           pip install .[dev]
           cd tests
 
@@ -37,7 +37,7 @@ jobs:
 
       - name: Install pytorch
         run: |
-          pip3 install -U --cache-dir $TORCH_CACHE torch --index-url https://download.pytorch.org/whl/cu121
+          pip install -U --cache-dir $TORCH_CACHE torch torchvision --index-url https://download.pytorch.org/whl/cu121
           python -c "import torch; print('torch:', torch.__version__, torch)"
           python -c "import torch; print('CUDA available:', torch.cuda.is_available())"
 
@@ -54,7 +54,7 @@ jobs:
               BRANCH="${{ github.event.inputs.dse_branch }}"
           fi
           echo "DeepSpeedExamples Branch: $BRANCH"
-          git clone -b $BRANCH https://github.com/microsoft/DeepSpeedExamples.git
+          git clone -b $BRANCH https://github.com/deepspeedai/DeepSpeedExamples.git
           cd DeepSpeedExamples/applications/DeepSpeed-Chat
           pip install -r requirements.txt
           pip install -e .
@@ -67,6 +67,7 @@ jobs:
         run: |
           cd DeepSpeedExamples/applications/DeepSpeed-Chat
           unset TORCH_CUDA_ARCH_LIST # only jit compile for current arch
+          unset NCCL_DEBUG
           cd tests
           pytest $PYTEST_OPTS ./
 
 
@@ -18,7 +18,7 @@ jobs:
   unit-tests:
     runs-on: [self-hosted, nvidia, a6000]
     container:
-      image: nvcr.io/nvidia/pytorch:24.03-py3
+      image: nvcr.io/nvidia/pytorch:24.09-py3
       ports:
         - 80
       options: --gpus all --shm-size "8G"
@@ -53,7 +53,7 @@ jobs:
         run: |
           unset TORCH_CUDA_ARCH_LIST # only jit compile for current arch
           cd tests
-          python -m pytest --color=yes --durations=0 --verbose -rF unit/sequence_parallelism/test_ulysses.py --torch_ver="2.3" --cuda_ver="12"
+          python -m pytest --color=yes --durations=0 --verbose -rF unit/sequence_parallelism/test_ulysses.py --torch_ver="2.5" --cuda_ver="12"
       - name: Open GitHub issue if nightly CI fails
         if: ${{ failure() && (github.event_name == 'schedule') }}
         uses: JasonEtco/create-an-issue@v2
 
@@ -11,7 +11,7 @@ jobs:
   unit-tests:
     runs-on: [self-hosted, nvidia, a6000]
     container:
-      image: nvcr.io/nvidia/pytorch:24.03-py3
+      image: nvcr.io/nvidia/pytorch:24.09-py3
       ports:
         - 80
       options: --gpus all --shm-size "8G"
@@ -50,4 +50,4 @@ jobs:
         run: |
           unset TORCH_CUDA_ARCH_LIST # only jit compile for current arch
           cd tests
-          python -m pytest --color=yes --durations=0 --verbose -rF -m 'evaluation' -k "test_human_eval" unit/ --torch_ver="2.3" --cuda_ver="12"
+          python -m pytest --color=yes --durations=0 --verbose -rF -m 'evaluation' -k "test_human_eval" unit/ --torch_ver="2.5" --cuda_ver="12"
@@ -66,7 +66,7 @@ jobs:
               BRANCH="${{ github.event.inputs.mii_branch }}"
           fi
           echo "Cloning DeepSpeed-MII branch: $BRANCH"
-          git clone -b $BRANCH --depth=1 https://github.com/microsoft/DeepSpeed-MII.git
+          git clone -b $BRANCH --depth=1 https://github.com/deepspeedai/DeepSpeed-MII.git
           cd DeepSpeed-MII
           pip install .[dev]
           unset TORCH_CUDA_ARCH_LIST # only jit compile for current arch
 
@@ -23,7 +23,7 @@ and then repeat the previous `git commit` command.
 ## Testing
 DeepSpeed tracks two types of tests: unit tests and more costly model convergence tests.
 The model convergence tests train
-[DeepSpeedExamples](https://github.com/microsoft/DeepSpeedExamples/) and measure
+[DeepSpeedExamples](https://github.com/deepspeedai/DeepSpeedExamples/) and measure
 end-to-end convergence and related metrics. Unit tests are found in `tests/unit/` and
 the model convergence tests are found in `tests/model/`.
 
@@ -40,7 +40,7 @@ tests. Note that [pytest-forked](https://github.com/pytest-dev/pytest-forked) an
 
 ### Model Tests
 To execute model tests, first [install DeepSpeed](#installation). The
-[DeepSpeedExamples](https://github.com/microsoft/DeepSpeedExamples/) repository is cloned
+[DeepSpeedExamples](https://github.com/deepspeedai/DeepSpeedExamples/) repository is cloned
 as part of this process. Next, execute the model test driver:
 ```bash
 cd tests/model/
@@ -85,8 +85,8 @@ Based on the issue we shall discuss the merit of the new feature and decide whet
 ### Step 2: implementation and verification
 Contributor will go ahead and implement the feature, and the DeepSpeed team will provide guidance/helps as needed. The required deliverables include:
 
-* A PR to [microsoft/DeepSpeed](https://github.com/microsoft/DeepSpeed) including (1) the feature implementation (2) unit tests (3) documentation (4) tutorial
-* A PR to [microsoft/DeepSpeedExamples](https://github.com/microsoft/DeepSpeedExamples) or [microsoft/Megatron-DeepSpeed](https://github.com/microsoft/Megatron-DeepSpeed) including the examples of how to use the feature (this is related to the planned testing experiments in proposal)
+* A PR to [deepspeedai/DeepSpeed](https://github.com/deepspeedai/DeepSpeed) including (1) the feature implementation (2) unit tests (3) documentation (4) tutorial
+* A PR to [deepspeedai/DeepSpeedExamples](https://github.com/deepspeedai/DeepSpeedExamples) or [deepspeedai/Megatron-DeepSpeed](https://github.com/deepspeedai/Megatron-DeepSpeed) including the examples of how to use the feature (this is related to the planned testing experiments in proposal)
 * In the implementation (code, documentation, tutorial), we require the feature author to record their GitHub username as a contact method for future questions/maintenance.
 
 After receiving the PRs, we will review them and merge them after necessary tests/fixes.