From ad3767e70446e9c5c63ee864e0bcffbcdb1f9198 Mon Sep 17 00:00:00 2001 From: geomin12 Date: Tue, 18 Nov 2025 17:12:12 -0800 Subject: [PATCH 01/13] Adding CI nightly to run all machines and tests --- .github/workflows/ci_nightly.yml | 2 ++ .../github_actions/amdgpu_family_matrix.py | 20 +++---------------- build_tools/github_actions/configure_ci.py | 15 ++++++++++---- 3 files changed, 16 insertions(+), 21 deletions(-) diff --git a/.github/workflows/ci_nightly.yml b/.github/workflows/ci_nightly.yml index 0e4553620..e15f5e887 100644 --- a/.github/workflows/ci_nightly.yml +++ b/.github/workflows/ci_nightly.yml @@ -53,6 +53,8 @@ concurrency: jobs: setup: uses: ./.github/workflows/setup.yml + with: + build_variant: "release" linux_build_and_test: name: Linux::${{ matrix.variant.family }}::${{ matrix.variant.build_variant_label }} diff --git a/build_tools/github_actions/amdgpu_family_matrix.py b/build_tools/github_actions/amdgpu_family_matrix.py index ae81357a5..9c277f44c 100644 --- a/build_tools/github_actions/amdgpu_family_matrix.py +++ b/build_tools/github_actions/amdgpu_family_matrix.py @@ -49,10 +49,11 @@ "sanity_check_only_for_family": True, }, "windows": { - "test-runs-on": "", + "test-runs-on": "windows-gfx110X-gpu-rocm", "family": "gfx110X-dgpu", "bypass_tests_for_releases": True, "build_variants": ["release"], + "sanity_check_only_for_family": True, }, }, "gfx1151": { @@ -148,19 +149,6 @@ "expect_pytorch_failure": True, }, }, - "gfx110x": { - "linux": { - "test-runs-on": "linux-gfx1101-gpu-rocm", - "family": "gfx110X-dgpu", - "build_variants": ["release"], - "sanity_check_only_for_family": True, - }, - "windows": { - "test-runs-on": "windows-gfx110X-gpu-rocm", - "family": "gfx110X-dgpu", - "build_variants": ["release"], - }, - }, "gfx1150": { "linux": { "test-runs-on": "", @@ -191,8 +179,6 @@ def get_all_families_for_trigger_types(trigger_types): for trigger_type in trigger_types: if trigger_type in matrix_map: for family_name, family_config in matrix_map[trigger_type].items(): - # Only add if not already present (first occurrence wins) - if family_name not in result: - result[family_name] = family_config + result[family_name] = family_config return result diff --git a/build_tools/github_actions/configure_ci.py b/build_tools/github_actions/configure_ci.py index 15ec89ed0..d7473163d 100755 --- a/build_tools/github_actions/configure_ci.py +++ b/build_tools/github_actions/configure_ci.py @@ -288,7 +288,7 @@ def matrix_generator( if is_push and base_args.get("branch_name") == "main": active_trigger_types.extend(["presubmit", "postsubmit"]) if is_schedule: - active_trigger_types.append("nightly") + active_trigger_types.extend(["presubmit", "postsubmit", "nightly"]) # Get the appropriate family matrix based on active triggers # For workflow_dispatch and PR labels, we need to check all matrices @@ -382,9 +382,15 @@ def matrix_generator( if is_schedule: print(f"[SCHEDULE] Generating build matrix with {str(base_args)}") - # Add _only_ nightly targets. - for key in amdgpu_family_info_matrix_nightly: + # For nightly runs, we run all builds and full tests + amdgpu_family_info_matrix_all = (amdgpu_family_info_matrix_presubmit | amdgpu_family_info_matrix_postsubmit | amdgpu_family_info_matrix_nightly) + for key in amdgpu_family_info_matrix_all: selected_target_names.append(key) + + # For nightly runs, we want to run full tests regardless of limited machines, so we delete the sanity_check_only_for_family option + for key in lookup_matrix: + if "sanity_check_only_for_family" in lookup_matrix[key]: + del lookup_matrix[key]["sanity_check_only_for_family"] # Ensure the lists are unique unique_target_names = list(set(selected_target_names)) @@ -492,9 +498,10 @@ def main(base_args, linux_families, windows_families): test_type = "smoke" - # In the case of a scheduled run, we always want to build + # In the case of a scheduled run, we always want to build and we want to run full tests if is_schedule: enable_build_jobs = True + test_type = "full" else: modified_paths = get_modified_paths(base_ref) print("modified_paths (max 200):", modified_paths[:200]) From 233bf80d714eeb7cc3c47cf0fa0cac38333b8b60 Mon Sep 17 00:00:00 2001 From: geomin12 Date: Tue, 18 Nov 2025 17:16:50 -0800 Subject: [PATCH 02/13] testing --- build_tools/github_actions/configure_ci.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/build_tools/github_actions/configure_ci.py b/build_tools/github_actions/configure_ci.py index d7473163d..d7f2c96b3 100755 --- a/build_tools/github_actions/configure_ci.py +++ b/build_tools/github_actions/configure_ci.py @@ -379,7 +379,9 @@ def matrix_generator( ): selected_target_names.append(target) - if is_schedule: + # FOR TESTING ! PLS REMOVE ! + if is_schedule or is_workflow_dispatch: + # if is_schedule: print(f"[SCHEDULE] Generating build matrix with {str(base_args)}") # For nightly runs, we run all builds and full tests From 15908b2b73471d4ecb1d9ebdafd601a704b04c1c Mon Sep 17 00:00:00 2001 From: geomin12 Date: Tue, 18 Nov 2025 17:23:47 -0800 Subject: [PATCH 03/13] Adding test scenario --- build_tools/github_actions/configure_ci.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/build_tools/github_actions/configure_ci.py b/build_tools/github_actions/configure_ci.py index d7f2c96b3..36f5c83f6 100755 --- a/build_tools/github_actions/configure_ci.py +++ b/build_tools/github_actions/configure_ci.py @@ -501,7 +501,8 @@ def main(base_args, linux_families, windows_families): test_type = "smoke" # In the case of a scheduled run, we always want to build and we want to run full tests - if is_schedule: + # FOR TESTING !!!!!! + if is_schedule or is_workflow_dispatch: enable_build_jobs = True test_type = "full" else: From e931e286b84661f740b17b5b237149406391ade6 Mon Sep 17 00:00:00 2001 From: geomin12 Date: Tue, 18 Nov 2025 17:25:13 -0800 Subject: [PATCH 04/13] Adding lint --- build_tools/github_actions/configure_ci.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/build_tools/github_actions/configure_ci.py b/build_tools/github_actions/configure_ci.py index 36f5c83f6..9116384ef 100755 --- a/build_tools/github_actions/configure_ci.py +++ b/build_tools/github_actions/configure_ci.py @@ -379,16 +379,18 @@ def matrix_generator( ): selected_target_names.append(target) - # FOR TESTING ! PLS REMOVE ! - if is_schedule or is_workflow_dispatch: - # if is_schedule: + if is_schedule: print(f"[SCHEDULE] Generating build matrix with {str(base_args)}") # For nightly runs, we run all builds and full tests - amdgpu_family_info_matrix_all = (amdgpu_family_info_matrix_presubmit | amdgpu_family_info_matrix_postsubmit | amdgpu_family_info_matrix_nightly) + amdgpu_family_info_matrix_all = ( + amdgpu_family_info_matrix_presubmit + | amdgpu_family_info_matrix_postsubmit + | amdgpu_family_info_matrix_nightly + ) for key in amdgpu_family_info_matrix_all: selected_target_names.append(key) - + # For nightly runs, we want to run full tests regardless of limited machines, so we delete the sanity_check_only_for_family option for key in lookup_matrix: if "sanity_check_only_for_family" in lookup_matrix[key]: @@ -501,8 +503,7 @@ def main(base_args, linux_families, windows_families): test_type = "smoke" # In the case of a scheduled run, we always want to build and we want to run full tests - # FOR TESTING !!!!!! - if is_schedule or is_workflow_dispatch: + if is_schedule: enable_build_jobs = True test_type = "full" else: From 4d71fb2cee08c7464c27da4729148b2f9f0cdc2d Mon Sep 17 00:00:00 2001 From: geomin12 Date: Tue, 18 Nov 2025 17:35:58 -0800 Subject: [PATCH 05/13] Adding logic update --- build_tools/github_actions/configure_ci.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/build_tools/github_actions/configure_ci.py b/build_tools/github_actions/configure_ci.py index 9116384ef..e5c8d397e 100755 --- a/build_tools/github_actions/configure_ci.py +++ b/build_tools/github_actions/configure_ci.py @@ -379,7 +379,8 @@ def matrix_generator( ): selected_target_names.append(target) - if is_schedule: + # FOR TESTING!!!! + if is_schedule or is_workflow_dispatch: print(f"[SCHEDULE] Generating build matrix with {str(base_args)}") # For nightly runs, we run all builds and full tests @@ -392,9 +393,9 @@ def matrix_generator( selected_target_names.append(key) # For nightly runs, we want to run full tests regardless of limited machines, so we delete the sanity_check_only_for_family option - for key in lookup_matrix: - if "sanity_check_only_for_family" in lookup_matrix[key]: - del lookup_matrix[key]["sanity_check_only_for_family"] + for key in matrix_row: + if "sanity_check_only_for_family" in matrix_row[key][platform]: + del matrix_row[platform]["sanity_check_only_for_family"] # Ensure the lists are unique unique_target_names = list(set(selected_target_names)) @@ -503,7 +504,7 @@ def main(base_args, linux_families, windows_families): test_type = "smoke" # In the case of a scheduled run, we always want to build and we want to run full tests - if is_schedule: + if is_schedule or is_workflow_dispatch: enable_build_jobs = True test_type = "full" else: From 91cf65144570e766fb372484c27f1211be6d34ba Mon Sep 17 00:00:00 2001 From: geomin12 Date: Tue, 18 Nov 2025 17:37:55 -0800 Subject: [PATCH 06/13] Adding fix --- build_tools/github_actions/configure_ci.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/build_tools/github_actions/configure_ci.py b/build_tools/github_actions/configure_ci.py index e5c8d397e..228f334d0 100755 --- a/build_tools/github_actions/configure_ci.py +++ b/build_tools/github_actions/configure_ci.py @@ -393,9 +393,9 @@ def matrix_generator( selected_target_names.append(key) # For nightly runs, we want to run full tests regardless of limited machines, so we delete the sanity_check_only_for_family option - for key in matrix_row: - if "sanity_check_only_for_family" in matrix_row[key][platform]: - del matrix_row[platform]["sanity_check_only_for_family"] + for key in lookup_matrix: + if "sanity_check_only_for_family" in lookup_matrix[key][platform]: + del lookup_matrix[platform]["sanity_check_only_for_family"] # Ensure the lists are unique unique_target_names = list(set(selected_target_names)) From b29b9113c53a37ebad1e4c0181eb8132206aa06e Mon Sep 17 00:00:00 2001 From: geomin12 Date: Tue, 18 Nov 2025 17:39:01 -0800 Subject: [PATCH 07/13] Adding fix --- build_tools/github_actions/configure_ci.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build_tools/github_actions/configure_ci.py b/build_tools/github_actions/configure_ci.py index 228f334d0..4f8d8d6ed 100755 --- a/build_tools/github_actions/configure_ci.py +++ b/build_tools/github_actions/configure_ci.py @@ -395,7 +395,7 @@ def matrix_generator( # For nightly runs, we want to run full tests regardless of limited machines, so we delete the sanity_check_only_for_family option for key in lookup_matrix: if "sanity_check_only_for_family" in lookup_matrix[key][platform]: - del lookup_matrix[platform]["sanity_check_only_for_family"] + del lookup_matrix[key][platform]["sanity_check_only_for_family"] # Ensure the lists are unique unique_target_names = list(set(selected_target_names)) From 10a1c80b5e6eec84c5cf7d38c72e1bcd71a829b4 Mon Sep 17 00:00:00 2001 From: geomin12 Date: Tue, 18 Nov 2025 17:43:16 -0800 Subject: [PATCH 08/13] Adding fix --- build_tools/github_actions/configure_ci.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/build_tools/github_actions/configure_ci.py b/build_tools/github_actions/configure_ci.py index 4f8d8d6ed..64ea709ab 100755 --- a/build_tools/github_actions/configure_ci.py +++ b/build_tools/github_actions/configure_ci.py @@ -394,7 +394,10 @@ def matrix_generator( # For nightly runs, we want to run full tests regardless of limited machines, so we delete the sanity_check_only_for_family option for key in lookup_matrix: - if "sanity_check_only_for_family" in lookup_matrix[key][platform]: + if ( + platform in lookup_matrix[key] + and "sanity_check_only_for_family" in lookup_matrix[key][platform] + ): del lookup_matrix[key][platform]["sanity_check_only_for_family"] # Ensure the lists are unique From 279f93938889864092c09742ff8a2bdabb45d64c Mon Sep 17 00:00:00 2001 From: geomin12 Date: Tue, 18 Nov 2025 17:44:07 -0800 Subject: [PATCH 09/13] Removing test --- build_tools/github_actions/configure_ci.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/build_tools/github_actions/configure_ci.py b/build_tools/github_actions/configure_ci.py index 64ea709ab..75e559b0f 100755 --- a/build_tools/github_actions/configure_ci.py +++ b/build_tools/github_actions/configure_ci.py @@ -379,8 +379,7 @@ def matrix_generator( ): selected_target_names.append(target) - # FOR TESTING!!!! - if is_schedule or is_workflow_dispatch: + if is_schedule: print(f"[SCHEDULE] Generating build matrix with {str(base_args)}") # For nightly runs, we run all builds and full tests @@ -507,7 +506,7 @@ def main(base_args, linux_families, windows_families): test_type = "smoke" # In the case of a scheduled run, we always want to build and we want to run full tests - if is_schedule or is_workflow_dispatch: + if is_schedule: enable_build_jobs = True test_type = "full" else: From c1de4018ba67f54f0d38ec9b5d48ba067038ff2f Mon Sep 17 00:00:00 2001 From: geomin12 Date: Wed, 19 Nov 2025 11:03:39 -0800 Subject: [PATCH 10/13] Adjust to new machine --- build_tools/github_actions/amdgpu_family_matrix.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build_tools/github_actions/amdgpu_family_matrix.py b/build_tools/github_actions/amdgpu_family_matrix.py index 9c277f44c..8e59e0dc6 100644 --- a/build_tools/github_actions/amdgpu_family_matrix.py +++ b/build_tools/github_actions/amdgpu_family_matrix.py @@ -42,7 +42,7 @@ }, "gfx110x": { "linux": { - "test-runs-on": "linux-gfx110X-gpu-rocm", + "test-runs-on": "linux-gfx1101-gpu-rocm", "family": "gfx110X-dgpu", "bypass_tests_for_releases": True, "build_variants": ["release"], From 2877ba55f503e4ab7dec22c34bfc085eb457a845 Mon Sep 17 00:00:00 2001 From: geomin12 Date: Wed, 19 Nov 2025 15:51:57 -0800 Subject: [PATCH 11/13] PR comments --- build_tools/github_actions/configure_ci.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build_tools/github_actions/configure_ci.py b/build_tools/github_actions/configure_ci.py index 75e559b0f..7a2e0b3c0 100755 --- a/build_tools/github_actions/configure_ci.py +++ b/build_tools/github_actions/configure_ci.py @@ -391,12 +391,12 @@ def matrix_generator( for key in amdgpu_family_info_matrix_all: selected_target_names.append(key) - # For nightly runs, we want to run full tests regardless of limited machines, so we delete the sanity_check_only_for_family option for key in lookup_matrix: if ( platform in lookup_matrix[key] and "sanity_check_only_for_family" in lookup_matrix[key][platform] ): + # For nightly runs, we want to run full tests regardless of limited machines, so we delete the sanity_check_only_for_family option del lookup_matrix[key][platform]["sanity_check_only_for_family"] # Ensure the lists are unique From 575b6965d28b26cc2cfed3c36e00e838ba8b3dd2 Mon Sep 17 00:00:00 2001 From: geomin12 Date: Thu, 20 Nov 2025 15:16:21 -0800 Subject: [PATCH 12/13] Adding clarification --- build_tools/github_actions/amdgpu_family_matrix.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/build_tools/github_actions/amdgpu_family_matrix.py b/build_tools/github_actions/amdgpu_family_matrix.py index 8e59e0dc6..0bf1d0a6c 100644 --- a/build_tools/github_actions/amdgpu_family_matrix.py +++ b/build_tools/github_actions/amdgpu_family_matrix.py @@ -3,6 +3,14 @@ * Each entry determines which families and test runners are available to use * Each group determines which entries run by default on workflow triggers + +For presubmit, postsubmit and nightly family selection: + +- presubmit runs the targets from presubmit dictionary on pull requests +- postsubmit runs the targets from presubmit and postsubmit dictionaries on pushes to main branch +- nightly runs targets from presubmit, postsubmit and nightly dictionaries + +TODO(#2200): clarify AMD GPU family selection """ all_build_variants = { From c8ed6845d871d0dff1a07876e83e7df40b24b18f Mon Sep 17 00:00:00 2001 From: geomin12 Date: Thu, 20 Nov 2025 17:27:49 -0800 Subject: [PATCH 13/13] Revert back to all --- build_tools/github_actions/amdgpu_family_matrix.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build_tools/github_actions/amdgpu_family_matrix.py b/build_tools/github_actions/amdgpu_family_matrix.py index a6c666175..58cad7062 100644 --- a/build_tools/github_actions/amdgpu_family_matrix.py +++ b/build_tools/github_actions/amdgpu_family_matrix.py @@ -58,7 +58,7 @@ }, "windows": { "test-runs-on": "windows-gfx110X-gpu-rocm", - "family": "gfx110X-dgpu", + "family": "gfx110X-all", "bypass_tests_for_releases": True, "build_variants": ["release"], "sanity_check_only_for_family": True,