Skip to content

Commit a24662f

Browse files
committed
Worked on ml- prefixed tests
1 parent 439e2bf commit a24662f

File tree

10 files changed

+30
-26
lines changed

10 files changed

+30
-26
lines changed

tools/cloud-build/daily-tests/tests/ml-a3-highgpu-slurm-cluster.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@
1717
# region, zone, nfs_ip, remote_mount_homefs, must be defined in build file
1818
# with --extra-vars flag!
1919
test_name: a3h-cluster
20-
deployment_name: a3hc-{{ build }}
21-
slurm_cluster_name: "a3hc{{ build[0:4] }}"
20+
deployment_name: g{{ build[0:3] }}{{ build[-3:] }}-a3hc
21+
slurm_cluster_name: "g{{ build[0:3] }}{{ build[-3:] }}a3h"
2222
workspace: /workspace
2323
blueprint_yaml: "{{ workspace }}/examples/machine-learning/a3-highgpu-8g/ml-slurm-a3-2-cluster.yaml"
2424
login_node: "{{ slurm_cluster_name }}-login-*"

tools/cloud-build/daily-tests/tests/ml-a3-highgpu-slurm-image.yml

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,16 +15,20 @@
1515
---
1616

1717
test_name: a3h-image
18-
deployment_name: a3himg{{ build }}
18+
deployment_name: f{{ build[0:3] }}{{ build[-3:] }}a3himg
19+
slurm_cluster_name: "i{{ build[0:3] }}{{ build[-3:] }}a3h"
1920
workspace: /workspace
2021
blueprint_yaml: "{{ workspace }}/examples/machine-learning/a3-highgpu-8g/ml-slurm-a3-1-image.yaml"
2122
packer_group_name: slurm-build
23+
network: "{{ deployment_name }}-net-0"
24+
sub_network: "{{ deployment_name }}-sub-net-0"
2225
packer_module_id: slurm-image
2326
delete_image: false
2427
cli_deployment_vars:
25-
network_name_system: default
26-
subnetwork_name_system: default
28+
network_name_system: "{{ network }}"
29+
subnetwork_name_system: "{{ sub_network }}"
2730
region: us-west1
2831
zone: us-west1-a
2932
source_image_project_id: deeplearning-platform
3033
source_image: dlvm-tcpd-cu120-718492384-ubuntu-2004-py310
34+
slurm_cluster_name: "{{ slurm_cluster_name }}"

tools/cloud-build/daily-tests/tests/ml-a3-megagpu-slurm-ubuntu.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@
1616

1717
# region, zone must be defined in build file with --extra-vars flag!
1818
test_name: a3m-slurm
19-
deployment_name: a3m-slurm-{{ build }}
20-
slurm_cluster_name: "a3m{{ build[0:4] }}"
19+
deployment_name: e{{ build[0:3] }}{{ build[-3:] }}-a3m-slurm
20+
slurm_cluster_name: "e{{ build[0:3] }}{{ build[-3:] }}a3m"
2121
workspace: /workspace
2222
blueprint_yaml: "{{ workspace }}/examples/machine-learning/a3-megagpu-8g/a3mega-slurm-blueprint.yaml"
2323
login_node: "{{ slurm_cluster_name }}-login-*"

tools/cloud-build/daily-tests/tests/ml-a3-ultragpu-jbvms.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,13 @@
1616

1717
# region, zone must be defined in build file with --extra-vars flag!
1818
test_name: a3u-jbvms
19-
deployment_name: a3u-jbvms-{{ build }}
19+
deployment_name: "a{{ build[0:3] }}{{ build[-3:] }}-a3u-jbvms"
2020
hostname_prefix: "{{ deployment_name }}-beowulf"
2121
workspace: /workspace
2222
blueprint_yaml: "{{ workspace }}/examples/machine-learning/a3-ultragpu-8g/a3ultra-vm.yaml"
2323
region: europe-west1
2424
zone: europe-west1-b
25-
network: "{{ test_name }}-net-0"
25+
network: "{{ deployment_name }}-net-0"
2626
remote_node: "{{ hostname_prefix }}-0"
2727
post_deploy_tests:
2828
- test-validation/test-mounts.yml
@@ -36,4 +36,4 @@ cli_deployment_vars:
3636
disk_size_gb: 200
3737
a3u_reservation_name: hpc-exr-2
3838
a3u_provisioning_model: RESERVATION_BOUND
39-
base_network_name: "{{ test_name }}"
39+
base_network_name: "{{ deployment_name }}"

tools/cloud-build/daily-tests/tests/ml-a3-ultragpu-slurm.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,15 +16,15 @@
1616

1717
# region, zone must be defined in build file with --extra-vars flag!
1818
test_name: a3u-slurm
19-
deployment_name: a3u-slurm-{{ build }}
20-
slurm_cluster_name: "a3u{{ build[0:4] }}"
19+
deployment_name: b{{ build[0:3] }}{{ build[-3:] }}-a3u-slurm
20+
slurm_cluster_name: "b{{ build[0:3] }}{{ build[-3:] }}a3u"
2121
workspace: /workspace
2222
blueprint_yaml: "{{ workspace }}/examples/machine-learning/a3-ultragpu-8g/a3ultra-slurm-blueprint.yaml"
2323
login_node: "{{ slurm_cluster_name }}-slurm-login-*"
2424
controller_node: "{{ slurm_cluster_name }}-controller"
2525
region: europe-west1
2626
zone: europe-west1-b
27-
network: "{{ test_name }}-net-0"
27+
network: "{{ deployment_name }}-net-0"
2828
post_deploy_tests:
2929
- test-validation/test-mounts.yml
3030
- test-validation/test-partitions.yml
@@ -49,4 +49,4 @@ cli_deployment_vars:
4949
disk_size_gb: 200
5050
a3u_cluster_size: 2
5151
a3u_reservation_name: hpc-exr-2
52-
base_network_name: "{{ test_name }}"
52+
base_network_name: "{{ deployment_name }}"

tools/cloud-build/daily-tests/tests/ml-a4-highgpu-slurm-flex.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,15 +16,15 @@
1616

1717
# region, zone must be defined in build file with --extra-vars flag!
1818
test_name: a4h-slurm-flex
19-
deployment_name: a4h-slurm-flex-{{ build }}
20-
slurm_cluster_name: "a4hf{{ build[0:4] }}"
19+
deployment_name: c{{ build[0:3] }}{{ build[-3:] }}-a4h-slurm-flex
20+
slurm_cluster_name: "c{{ build[0:3] }}{{ build[-3:] }}a4h"
2121
workspace: /workspace
2222
blueprint_yaml: "{{ workspace }}/examples/machine-learning/a4-highgpu-8g/a4high-slurm-blueprint.yaml"
2323
login_node: "{{ slurm_cluster_name }}-slurm-login-*"
2424
controller_node: "{{ slurm_cluster_name }}-controller"
2525
region: us-central1
2626
zone: us-central1-b
27-
network: "{{ test_name }}-net-0"
27+
network: "{{ deployment_name }}-net-0"
2828
post_deploy_tests:
2929
- test-validation/test-mounts.yml
3030
- test-validation/test-partitions.yml
@@ -47,5 +47,5 @@ cli_deployment_vars:
4747
slurm_cluster_name: "{{ slurm_cluster_name }}"
4848
disk_size_gb: 200
4949
a4h_cluster_size: 2
50-
base_network_name: "{{ test_name }}"
50+
base_network_name: "{{ deployment_name }}"
5151
a4h_dws_flex_enabled: true

tools/cloud-build/daily-tests/tests/ml-a4-highgpu-slurm.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,15 +16,15 @@
1616

1717
# region, zone must be defined in build file with --extra-vars flag!
1818
test_name: a4h-slurm
19-
deployment_name: a4h-slurm-{{ build }}
20-
slurm_cluster_name: "a4h{{ build[0:4] }}"
19+
deployment_name: d{{ build[0:3] }}{{ build[-3:] }}-a4h-slurm
20+
slurm_cluster_name: "d{{ build[0:3] }}{{ build[-3:] }}a4h"
2121
workspace: /workspace
2222
blueprint_yaml: "{{ workspace }}/examples/machine-learning/a4-highgpu-8g/a4high-slurm-blueprint.yaml"
2323
login_node: "{{ slurm_cluster_name }}-slurm-login-*"
2424
controller_node: "{{ slurm_cluster_name }}-controller"
2525
region: us-central1
2626
zone: us-central1-b
27-
network: "{{ test_name }}-net-0"
27+
network: "{{ deployment_name }}-net-0"
2828
post_deploy_tests:
2929
- test-validation/test-mounts.yml
3030
- test-validation/test-partitions.yml
@@ -49,4 +49,4 @@ cli_deployment_vars:
4949
disk_size_gb: 200
5050
a4h_cluster_size: 2
5151
a4h_reservation_name: nvidia-b200-db38b25a-c93d-4a7e-a3c5-ba4135be357e
52-
base_network_name: "{{ test_name }}"
52+
base_network_name: "{{ deployment_name }}"

tools/cloud-build/daily-tests/tests/ml-gke-e2e.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
# limitations under the License.
1414
---
1515
test_name: ml-gke-e2e
16-
deployment_name: ml-gke-e2e-{{ build }}
16+
deployment_name: g{{ build[0:3] }}{{ build[-3:] }}-ml-gke-e2e
1717
region: asia-southeast1
1818
zone: asia-southeast1-b # for remote node
1919
workspace: /workspace

tools/cloud-build/daily-tests/tests/ml-gke.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
# limitations under the License.
1414
---
1515
test_name: ml-gke
16-
deployment_name: ml-gke-{{ build }}
16+
deployment_name: t{{ build[0:3] }}{{ build[-3:] }}-ml-gke
1717
region: asia-southeast1
1818
zone: asia-southeast1-b # for remote node
1919
workspace: /workspace

tools/cloud-build/daily-tests/tests/ml-slurm.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@
1515
---
1616

1717
test_name: ml-slurm-v6
18-
deployment_name: ml-slurm-v6-{{ build }}
19-
network: "{{ test_name }}-net"
18+
deployment_name: d{{ build[0:3] }}{{ build[-3:] }}-ml-slurm-v6
19+
network: "{{ deployment_name }}-net"
2020
workspace: /workspace
2121
blueprint_yaml: "{{ workspace }}/examples/ml-slurm.yaml"
2222
packer_group_name: packer

0 commit comments

Comments
 (0)