From 3fce28c30a96aa91c9023a95e0d4592b3ef35fad Mon Sep 17 00:00:00 2001 From: Yichen Zhang Date: Mon, 18 Aug 2025 19:57:01 +0800 Subject: [PATCH 1/2] fix the case in 'get_local_slices' func and add deepseek v3 to ci --- ci/auto_parallel/ci_auto_parallel.sh | 7 +++++++ python/paddle/distributed/auto_parallel/moe_utils.py | 7 +++++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/ci/auto_parallel/ci_auto_parallel.sh b/ci/auto_parallel/ci_auto_parallel.sh index add54a39619084..438d6a0252ed24 100644 --- a/ci/auto_parallel/ci_auto_parallel.sh +++ b/ci/auto_parallel/ci_auto_parallel.sh @@ -77,6 +77,7 @@ get_diff_TO_case(){ case_list[${#case_list[*]}]=llama_auto case_list[${#case_list[*]}]=gpt-3_auto case_list[${#case_list[*]}]=gpt-3_dygraph + case_list[${#case_list[*]}]=deepseek_auto } print_info(){ @@ -258,6 +259,12 @@ if [[ ${#case_list[*]} -ne 0 ]];then execute_func_list $cmd gpt-3_dygraph let case_num++ clean_file ${work_dir}/../PaddleNLP/llm + elif [[ ${case} == "deepseek_auto" ]];then + cmd=${work_dir}/../PaddleNLP/scripts/distribute/ci_case_audo.sh + timeout 5m bash $cmd prepare_case deepseek_case_list_auto $FLAGS_install_deps $FLAGS_download_data + execute_func_list $cmd deepseek_auto + let case_num++ + clean_file ${work_dir}/../PaddleNLP/llm/auto_parallel/deepseek-v3 else echo -e "\033[31m ---- no ${case} \033" let case_num++ diff --git a/python/paddle/distributed/auto_parallel/moe_utils.py b/python/paddle/distributed/auto_parallel/moe_utils.py index 2c050a45dffe28..11b823a804ada1 100644 --- a/python/paddle/distributed/auto_parallel/moe_utils.py +++ b/python/paddle/distributed/auto_parallel/moe_utils.py @@ -393,10 +393,13 @@ def get_rank2tensor_indices(sub_mesh_indices_info, sub_mesh_partial_info): def get_local_slices(tensor, mesh, placements): - if len(mesh.shape) != len(placements): + if len(mesh.shape) < len(placements): raise ValueError( - f"placements nums ({len(placements)}) must equal mesh_shape({len(mesh.shape)})" + f"placements length ({len(placements)}) must be smaller or equal to mesh_shape({len(mesh.shape)})" ) + if len(placements) < len(mesh.shape): + for _ in range(len(mesh.shape) - len(placements)): + placements.append(dist.Replicate()) sub_mesh_indices_info = {mesh: [(0, s) for s in tensor.shape]} sub_mesh_partial_info = {} From a112acdbaee879b53586fdfbd172edabf8324a6e Mon Sep 17 00:00:00 2001 From: Yichen Zhang Date: Tue, 19 Aug 2025 13:48:09 +0800 Subject: [PATCH 2/2] add unit test for get_local_slieces --- ci/auto_parallel/ci_auto_parallel.sh | 4 +++- .../semi_auto_parallel_moe_utils.py | 22 ++++++++++++++++++- test/auto_parallel/test_moe_utils.py | 4 ++-- 3 files changed, 26 insertions(+), 4 deletions(-) diff --git a/ci/auto_parallel/ci_auto_parallel.sh b/ci/auto_parallel/ci_auto_parallel.sh index 438d6a0252ed24..0d42f8b08a814e 100644 --- a/ci/auto_parallel/ci_auto_parallel.sh +++ b/ci/auto_parallel/ci_auto_parallel.sh @@ -260,9 +260,11 @@ if [[ ${#case_list[*]} -ne 0 ]];then let case_num++ clean_file ${work_dir}/../PaddleNLP/llm elif [[ ${case} == "deepseek_auto" ]];then - cmd=${work_dir}/../PaddleNLP/scripts/distribute/ci_case_audo.sh + cmd=${work_dir}/../PaddleNLP/scripts/distribute/ci_case_auto.sh timeout 5m bash $cmd prepare_case deepseek_case_list_auto $FLAGS_install_deps $FLAGS_download_data execute_func_list $cmd deepseek_auto + export FLAGS_install_deps=1 + export FLAGS_download_data="deepseek ""$FLAGS_download_data" let case_num++ clean_file ${work_dir}/../PaddleNLP/llm/auto_parallel/deepseek-v3 else diff --git a/test/auto_parallel/semi_auto_parallel_moe_utils.py b/test/auto_parallel/semi_auto_parallel_moe_utils.py index 861f261bada767..646b5f82b8637d 100644 --- a/test/auto_parallel/semi_auto_parallel_moe_utils.py +++ b/test/auto_parallel/semi_auto_parallel_moe_utils.py @@ -13,6 +13,7 @@ # limitations under the License. import os +import unittest import numpy as np @@ -26,7 +27,7 @@ ) -class TestMoEUtils: +class TestMoEUtils(unittest.TestCase): def __init__(self): self._dtype = os.getenv("dtype") self._seeds = eval(os.getenv("seeds")) @@ -160,6 +161,25 @@ def test_get_local_slices(self): dist_x.placements[1].reduce_type(), ) + y = paddle.arange(0, h * w).reshape(src_shape) + y_placements = [dist.Shard(0)] + dist_y = dist.shard_tensor(y, self._mesh0, y_placements) + dist_y_local_slices = get_local_slices( + dist_y, self._mesh0, y_placements + ) + np.testing.assert_equal( + dist_y_local_slices[0]['slice'], [(0, 2), (0, 4)] + ) + np.testing.assert_equal( + dist_y_local_slices[1]['slice'], [(2, 4), (0, 4)] + ) + + with self.assertRaises(ValueError): + tmp_placements = [dist.Shard(0), dist.Shard(1), dist.Replicate()] + dist_y_local_slices = get_local_slices( + dist_y, self._mesh0, tmp_placements + ) + # python -m paddle.distributed.launch --devices=0,1 semi_auto_parallel_moe_utils.py def test_reshard_general_case(self): """Test reshard when _only_reshard_mesh_shape returns False.""" diff --git a/test/auto_parallel/test_moe_utils.py b/test/auto_parallel/test_moe_utils.py index f40cfee3a678ed..dbc8e224f8f6fc 100644 --- a/test/auto_parallel/test_moe_utils.py +++ b/test/auto_parallel/test_moe_utils.py @@ -23,14 +23,14 @@ def setUp(self): num_of_devices=2, timeout=30, ) - self._default_envs = {"dtype": "float32", "seed": "2024"} + self._default_envs = {"dtype": "float32", "seeds": "2024"} self._changeable_envs = {"backend": ["gpu"]} def test_moe_utils(self): envs_list = test_base.gen_product_envs_list( { "dtype": "float32", - "seed": "2024", + "seeds": "2024", "FLAGS_enable_moe_utils": "true", }, {"backend": ["gpu"]},