Skip to content

Commit 5462101

Browse files
committed
Add interface to set nccl communicator configs
Signed-off-by: Sangkug Lym <[email protected]>
1 parent 1a16383 commit 5462101

16 files changed

+24
-0
lines changed

scripts/performance/argument_parser.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -294,6 +294,13 @@ def parse_cli_args():
294294
required=False,
295295
default=None,
296296
)
297+
parser.add_argument(
298+
"--nccl_communicator_config_path",
299+
type=str,
300+
help="Path to NCCL communicator config yaml file",
301+
required=False,
302+
default=None,
303+
)
297304

298305
def list_of_strings(arg):
299306
return arg.split(',')

scripts/performance/llm/finetune_deepseek_v3.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@ def override_recipe_configs(
101101
enable_cuda_graphs=enable_cuda_graphs,
102102
compute_dtype=args.compute_dtype,
103103
fp8_recipe=args.fp8_recipe,
104+
nccl_communicator_config_path=args.nccl_communicator_config_path,
104105
)
105106

106107
# disable HF ckpt loading

scripts/performance/llm/finetune_llama31_405b.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ def override_recipe_configs(
8989
activation_offload_layers=activation_offload_layers,
9090
compute_dtype=args.compute_dtype,
9191
fp8_recipe=args.fp8_recipe,
92+
nccl_communicator_config_path=args.nccl_communicator_config_path,
9293
)
9394
recipe = set_exp_logging_configs(
9495
recipe,

scripts/performance/llm/finetune_llama3_70b.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ def override_recipe_configs(
9696
activation_offload_layers=activation_offload_layers,
9797
compute_dtype=args.compute_dtype,
9898
fp8_recipe=args.fp8_recipe,
99+
nccl_communicator_config_path=args.nccl_communicator_config_path,
99100
)
100101
recipe = set_exp_logging_configs(
101102
recipe,

scripts/performance/llm/finetune_llama3_8b.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ def override_recipe_configs(
8282
enable_cuda_graphs=enable_cuda_graphs,
8383
compute_dtype=args.compute_dtype,
8484
fp8_recipe=args.fp8_recipe,
85+
nccl_communicator_config_path=args.nccl_communicator_config_path,
8586
)
8687
recipe = set_exp_logging_configs(
8788
recipe,

scripts/performance/llm/pretrain_gpt3_175b.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ def override_recipe_configs(
7979
activation_offload_layers=activation_offload_layers,
8080
compute_dtype=args.compute_dtype,
8181
fp8_recipe=args.fp8_recipe,
82+
nccl_communicator_config_path=args.nccl_communicator_config_path,
8283
)
8384
recipe = set_exp_logging_configs(
8485
recipe, "pre_train", "llm", "gpt3", args.tensorboard, args.wandb, args.wandb_prj_name, args.wandb_job_name

scripts/performance/llm/pretrain_llama31_405b.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ def override_recipe_configs(
7979
activation_offload_layers=activation_offload_layers,
8080
compute_dtype=args.compute_dtype,
8181
fp8_recipe=args.fp8_recipe,
82+
nccl_communicator_config_path=args.nccl_communicator_config_path,
8283
)
8384
recipe = set_exp_logging_configs(
8485
recipe, "pre_train", "llm", "llama3", args.tensorboard, args.wandb, args.wandb_prj_name, args.wandb_job_name

scripts/performance/llm/pretrain_llama3_70b.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ def override_recipe_configs(
7979
activation_offload_layers=activation_offload_layers,
8080
compute_dtype=args.compute_dtype,
8181
fp8_recipe=args.fp8_recipe,
82+
nccl_communicator_config_path=args.nccl_communicator_config_path,
8283
)
8384
recipe = set_exp_logging_configs(
8485
recipe, "pre_train", "llm", "llama3", args.tensorboard, args.wandb, args.wandb_prj_name, args.wandb_job_name

scripts/performance/llm/pretrain_llama3_8b.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ def override_recipe_configs(
6565
enable_cuda_graphs=enable_cuda_graphs,
6666
compute_dtype=args.compute_dtype,
6767
fp8_recipe=args.fp8_recipe,
68+
nccl_communicator_config_path=args.nccl_communicator_config_path,
6869
)
6970
recipe = set_exp_logging_configs(
7071
recipe, "pre_train", "llm", "llama3", args.tensorboard, args.wandb, args.wandb_prj_name, args.wandb_job_name

scripts/performance/llm/pretrain_mixtral_8x22b.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ def override_recipe_configs(
7373
activation_offload_layers,
7474
compute_dtype=args.compute_dtype,
7575
fp8_recipe=args.fp8_recipe,
76+
nccl_communicator_config_path=args.nccl_communicator_config_path,
7677
)
7778
recipe = set_exp_logging_configs(
7879
recipe, "pre_train", "llm", "mixtral", args.tensorboard, args.wandb, args.wandb_prj_name, args.wandb_job_name

0 commit comments

Comments
 (0)