easybuilders · Flamefire · Jun 26, 2025 · Jun 26, 2025 · Jun 26, 2025 · Jun 26, 2025
diff --git a/easybuild/easyblocks/p/pytorch.py b/easybuild/easyblocks/p/pytorch.py
diff --git a/test/easyblocks/easyblock_specific.py b/test/easyblocks/easyblock_specific.py
@@ -35,13 +35,15 @@
 import tempfile
 import textwrap
 from io import StringIO
+from pathlib import Path
 from unittest import TestLoader, TextTestRunner
 from test.easyblocks.module import cleanup
 
 import easybuild.tools.options as eboptions
 import easybuild.easyblocks.generic.pythonpackage as pythonpackage
 import easybuild.easyblocks.l.lammps as lammps
 import easybuild.easyblocks.p.python as python
+import easybuild.easyblocks.p.pytorch as pytorch
 from easybuild.base.testing import TestCase
 from easybuild.easyblocks.generic.cmakemake import det_cmake_version
 from easybuild.easyblocks.generic.toolchain import Toolchain
@@ -495,8 +497,8 @@ def test_translate_lammps_version(self):
             '29Aug2024_update2': '2024.08.29.2',
             '28Oct2024': '2024.10.28',
         }
-        for key in lammps_versions:
-            self.assertEqual(lammps.translate_lammps_version(key), lammps_versions[key])
+        for key, expected_version in lammps_versions.items():
+            self.assertEqual(lammps.translate_lammps_version(key), expected_version)
 
         version_file = os.path.join(self.tmpdir, 'src', 'version.h')
         version_txt = '\n'.join([
@@ -517,6 +519,166 @@ def test_translate_lammps_version(self):
 
         self.assertEqual(lammps.translate_lammps_version('d3adb33f', path=self.tmpdir), '2025.04.02.3')
 
+    def test_pytorch_test_log_parsing(self):
+        """Verify parsing of XML files produced by PyTorch tests."""
+        TestState = pytorch.TestState
+
+        test_log_dir = Path(__file__).parent.parent / 'pytorch_test_logs'
+
+        results = pytorch.get_test_results(test_log_dir / 'test-reports')
+        results2 = pytorch.get_test_results(test_log_dir)
+        self.assertEqual(results.keys(), results2.keys())
+        for name, suite in results.items():
+            self.assertEqual((name, suite.summary), (name, results2[name].summary))
+        del results2
+
+        self.assertEqual(len(results), 15)
+
+        # 2 small test suites used as a smoke test using a most features
+        self.assertIn('backends/xeon/test_launch', results)
+        suite = results['backends/xeon/test_launch']
+        self.assertEqual((suite.errors, suite.failures, suite.num_tests, suite.skipped), (1, 2, 8, 3))
+        # Failure in one file, success in the other --> Success
+        self.assertEqual(suite['TestTorchrun.test_cpu_info'].state, TestState.SUCCESS)
+        # New in 2nd file
+        self.assertEqual(suite['TestTorchrun.test_multi_threads'].state, TestState.SUCCESS)
+        self.assertEqual(suite['TestTorchrun.test_reshape_cpu_float64'].state, TestState.FAILURE)
+        self.assertEqual(suite['TestTorchrun.test_foo'].state, TestState.SKIPPED)
+        self.assertEqual(suite['TestTorchrun.test_bar'].state, TestState.ERROR)
+        self.assertEqual(suite.get_errored_tests(), ['TestTorchrun.test_bar'])
+        self.assertEqual(suite.get_failed_tests(), ['TestTorchrun.test_reshape_cpu_float64', 'TestTorchrun.test_baz'])
+        self.assertIn('test_autoload', results)
+        suite = results['test_autoload']
+        self.assertEqual((suite.errors, suite.failures, suite.num_tests, suite.skipped), (0, 0, 2, 1))
+        self.assertEqual(suite['TestBackendAutoload.test_autoload'].state, TestState.SUCCESS)
+        self.assertEqual(suite['TestBackendAutoload.test_unload'].state, TestState.SKIPPED)
+
+        # Verify summaries which should be enough to catch most issues
+        report = '\n'.join(sorted(f'{suite.name}: {suite.summary}' for suite in results.values()))
+        self.assertEqual(report, textwrap.dedent("""
+            backends/xeon/test_launch: 2 failed, 2 passed, 3 skipped, 1 errors
+            dist-gloo-init-env/distr/algorithms/quantization/test_quantization: 0 failed, 1 passed, 0 skipped, 0 errors
+            dist-gloo-init-file/distr/algorithms/quantization/test_quantization: 0 failed, 1 passed, 0 skipped, 0 errors
+            dist-nccl-init-env/distr/algorithms/quantization/test_quantization: 0 failed, 1 passed, 0 skipped, 0 errors
+            dist-nccl-init-file/distr/algorithms/quantization/test_quantization: 0 failed, 1 passed, 0 skipped, 0 errors
+            dist/foo/bar: 0 failed, 4 passed, 0 skipped, 0 errors
+            distributed/tensor/test_dtensor_ops: 0 failed, 2 passed, 2 skipped, 0 errors
+            dynamo/test_dynamic_shapes: 3 failed, 14 passed, 0 skipped, 0 errors
+            dynamo/test_misc: 1 failed, 9 passed, 0 skipped, 0 errors
+            inductor/test_aot_inductor_arrayref: 2 failed, 0 passed, 0 skipped, 0 errors
+            inductor/test_cudagraph_trees: 1 failed, 0 passed, 0 skipped, 0 errors
+            jit/test_builtins: 0 failed, 1 passed, 0 skipped, 0 errors
+            test_autoload: 0 failed, 1 passed, 1 skipped, 0 errors
+            test_nestedtensor: 3 failed, 2 passed, 3 skipped, 1 errors
+            test_quantization: 0 failed, 12 passed, 5 skipped, 0 errors
+        """).strip())
+        tests = '\n'.join(sorted(f'{test.name}: {test.state.value}'
+                                 for suite in results.values()
+                                 for test in suite.get_tests()))
+        self.assertEqual(tests, textwrap.dedent("""
+            AOTInductorTestABICompatibleCpuWithStackAllocation.test_fail_and_skip: failure
+            AOTInductorTestABICompatibleCpuWithStackAllocation.test_skip_and_fail: failure
+            CudaGraphTreeTests.test_workspace_allocation_error: failure
+            DistQuantizationTests.test_all_gather_fp16: success
+            DistQuantizationTests.test_all_gather_fp16: success
+            DistQuantizationTests.test_all_gather_fp16: success
+            DistQuantizationTests.test_all_gather_fp16: success
+            DynamicShapesCtxManagerTests.test_autograd_profiler_dynamic_shapes: success
+            DynamicShapesCtxManagerTests.test_generic_context_manager_with_graph_break_dynamic_shapes: success
+            DynamicShapesCtxManagerTests.test_generic_ctx_manager_with_graph_break_dynamic_shapes: success
+            DynamicShapesMiscTests.test_outside_linear_module_free_dynamic_shapes: failure
+            DynamicShapesMiscTests.test_packaging_version_parse_dynamic_shapes: success
+            DynamicShapesMiscTests.test_pair_dynamic_shapes: success
+            DynamicShapesMiscTests.test_param_shape_binops_dynamic_shapes: success
+            DynamicShapesMiscTests.test_parameter_free_dynamic_shapes: failure
+            DynamicShapesMiscTests.test_patched_builtin_functions_dynamic_shapes: success
+            DynamicShapesMiscTests.test_proxy_frozen_dataclass_dynamic_shapes: success
+            DynamicShapesMiscTests.test_pt2_compliant_ops_are_allowed_dynamic_shapes: success
+            DynamicShapesMiscTests.test_pt2_compliant_overload_dynamic_shapes: success
+            DynamicShapesMiscTests.test_pure_python_accumulate_dynamic_shapes: success
+            DynamicShapesMiscTests.test_py_guards_mark_dynamic_dynamic_shapes: success
+            DynamicShapesMiscTests.test_python_slice_dynamic_shapes: success
+            DynamicShapesMiscTests.test_pytree_tree_flatten_unflatten_dynamic_shapes: success
+            DynamicShapesMiscTests.test_pytree_tree_leaves_dynamic_shapes: failure
+            MiscTests.test_packaging_version_parse: success
+            MiscTests.test_pair: success
+            MiscTests.test_param_shape_binops: success
+            MiscTests.test_parameter_free: failure
+            MiscTests.test_pytree_tree_map: success
+            MiscTests.test_shape_env_no_recording: success
+            MiscTests.test_shape_env_recorded_function_fallback: success
+            MiscTests.test_yield_from_in_a_loop: success
+            TestBackendAutoload.test_autoload: success
+            TestBackendAutoload.test_unload: skipped
+            TestBuiltins.test_name: success
+            TestCustomFunction.test_autograd_function_with_matmul_folding_at_output: success
+            TestDTensorOpsCPU.test_dtensor_op_db_H_cpu_float16: success
+            TestDTensorOpsCPU.test_dtensor_op_db_H_cpu_float32: success
+            TestDTensorOpsCPU.test_dtensor_op_db_H_cpu_float64: skipped
+            TestDTensorOpsCPU.test_dtensor_op_db_H_cpu_int8: skipped
+            TestDynamicQuantizedOps.test_qrnncell: success
+            TestFakeQuantizeOps.test_backward_per_channel: skipped
+            TestFakeQuantizeOps.test_backward_per_channel_cachemask_cpu: success
+            TestFakeQuantizeOps.test_backward_per_channel_cachemask_cuda: success
+            TestName.test_bar: success
+            TestNestedTensor.test_bmm_cuda_gpu_float16: failure
+            TestNestedTensor.test_bmm_cuda_gpu_float32: failure
+            TestNestedTensor.test_bmm_cuda_gpu_float64: error
+            TestNestedTensor.test_cat: success
+            TestNestedTensor.test_copy_: success
+            TestNestedTensor.test_reshape_cpu_float16: skipped
+            TestNestedTensor.test_reshape_cpu_float32: skipped
+            TestNestedTensor.test_reshape_cpu_float64: failure
+            TestNestedTensorSubclassCPU.test_linear_backward_memory_usage_cpu_float32: skipped
+            TestNumericDebugger.test_quantize_pt2e_preserve_handle: success
+            TestNumericDebugger.test_re_export_preserve_handle: success
+            TestPadding.test_reflection_pad1d: success
+            TestQuantizedConv.test_conv_reorder_issue_onednn: success
+            TestQuantizedConv.test_conv_transpose_reorder_issue_onednn: success
+            TestQuantizedFunctionalOps.test_relu_api: success
+            TestQuantizedLinear.test_qlinear_cudnn: skipped
+            TestQuantizedLinear.test_qlinear_gelu_pt2e: success
+            TestQuantizedOps.test_adaptive_avg_pool2d_nhwc: success
+            TestQuantizedOps.test_adaptive_avg_pool: skipped
+            TestQuantizedOps.test_qadd_relu_cudnn: skipped
+            TestQuantizedOps.test_qadd_relu_cudnn_nhwc: skipped
+            TestQuantizedOps.test_qadd_relu_different_qparams: success
+            TestTorchrun.test_bar: error
+            TestTorchrun.test_baz: failure
+            TestTorchrun.test_cpu_info: success
+            TestTorchrun.test_foo2: skipped
+            TestTorchrun.test_foo3: skipped
+            TestTorchrun.test_foo: skipped
+            TestTorchrun.test_multi_threads: success
+            TestTorchrun.test_reshape_cpu_float64: failure
+            TestTracer.test_jit_save: success
+            bar.test_2.test_func3: success
+            bar.test_foo.TestBar.test_func2: success
+            bar.test_foo.TestName.test_func1: success
+        """).strip())
+
+        #  Some error cases
+        error_log_dir = test_log_dir / 'faulty-reports'
+
+        self.assertErrorRegex(ValueError, "<testsuites> or <testsuite>",
+                              pytorch.get_test_results, error_log_dir / 'root')
+        self.assertErrorRegex(ValueError, "Failed to parse",
+                              pytorch.get_test_results, error_log_dir / 'invalid_xml')
+        self.assertErrorRegex(ValueError, "multiple reported files",
+                              pytorch.get_test_results, error_log_dir / 'multi_file')
+        self.assertErrorRegex(ValueError, "Path from folder and filename should be equal",
+                              pytorch.get_test_results, error_log_dir / 'different_file_name')
+        self.assertErrorRegex(ValueError, "Unexpected file attribute",
+                              pytorch.get_test_results, error_log_dir / 'file_attribute')
+        self.assertErrorRegex(ValueError, "Invalid state",
+                              pytorch.get_test_results, error_log_dir / 'skip_and_failed')
+        self.assertErrorRegex(ValueError, "no test",
+                              pytorch.get_test_results, error_log_dir / 'no_tests')
+        self.assertErrorRegex(ValueError, "Invalid test count",
+                              pytorch.get_test_results, error_log_dir / 'consistency')
+        self.assertErrorRegex(ValueError, "Duplicate test",
+                              pytorch.get_test_results, error_log_dir / 'duplicate')
+
 
 def suite(loader):
     """Return all easyblock-specific tests."""

diff --git a/test/pytorch_test_logs/README.md b/test/pytorch_test_logs/README.md
@@ -0,0 +1,5 @@
+# PyTorch test result files
+
+This Folder contains files as written by the PyTorch test step (via `unittest-xml-reportin`) to be used in tests of the parsing in the PyTorch easyblock.
+
+Most files are simplified or constructed in a way to reproduce a specific corner case of the parser or format.
diff --git a/test/pytorch_test_logs/cleanup_files.py b/test/pytorch_test_logs/cleanup_files.py
@@ -0,0 +1,127 @@
+#!/usr/bin/env python3
+
+"""This script strips content and filenames of PyTorch test result XML files in a deterministic way and formats them.
+The intent is to keep the general structure of the files but still make them shorter and easier to read.
+
+Usage: Pass the target directory as the single argument or
+run this script to format the XML files in the "full" directory next to the script.
+"""
+
+import re
+import subprocess
+import sys
+from hashlib import md5
+from pathlib import Path
+
+
+def shorten_filename(path: Path) -> Path:
+    """Shorten the file name by truncating random part of .e.g. test_quantization-d1303cbc2b57cf06.xml"""
+    match = re.search(r'-(?P<hash>[a-z0-9]{6,})\.xml$', path.name)
+    if match:
+        fixed_part: str = path.name[:match.start()]
+        short_hash = match['hash'][:5]
+        new_name: Path = path.with_name(f"{fixed_part}-{short_hash}.xml")
+        path.rename(new_name)
+        return new_name
+    return path
+
+
+def shorten_content(path: Path):
+    """Shorten attribute values and tag content (stdout, stderr, etc.) in the XML file."""
+    content: str = path.read_text(encoding='utf-8')
+
+    # Shorten messages in tags: <skipped message="...">
+    content = re.sub(r'message="[^"]+"', 'message="..."', content)
+    # Shorten time
+    content = re.sub(r'time="[^"]+"', 'time="4.2"', content)
+    # Ignore timestamp & hostname
+    content = re.sub(r'timestamp="[^"]+"', '', content)
+    content = re.sub(r'hostname="[^"]+"', '', content)
+    # Remove type attribute from <skipped> tags
+    content = re.sub(r'(<skipped)\s+type="[^"]+"', r'\1', content)
+
+    # Remove stdout/stderr from about half of the files.
+    # For the other half just shorten it.
+    remove_output: bool = int(md5(str(path.name).encode('utf-8')).hexdigest(), 16) % 2 == 0
+
+    # Shorten output shown between various tags
+    for tag in ["error", "failure", "skipped", "system-out", "system-err", "rerun"]:
+        # Beware of multiline content in tags and empty tags (<tag/> or <tag key="value"/>)
+        pattern = re.compile(
+            rf'(<{tag}([^>/]*?)>)(.*?)</{tag}>',
+            re.DOTALL
+        )
+        if remove_output and tag in ["system-out", "system-err"]:
+            content = pattern.sub('', content)
+        else:
+            content = pattern.sub(rf'\1[snip]</{tag}>', content)
+
+    # Remove empty lines
+    content = re.sub(r'\n\s*\n', '\n', content)
+    # Combine empty tags
+    content = re.sub(r'(<(\w+) [^>]*)>\s*</\2>', r'\1/>', content)
+
+    path.write_text(content, encoding='utf-8')
+
+
+def format_xml(path: Path) -> bool:
+    try:
+        subprocess.check_output(
+            ["xmllint", "--format", str(path), "-o", str(path)],
+            encoding='utf-8',
+            stderr=subprocess.STDOUT,
+        )
+    except subprocess.CalledProcessError as e:
+        # Ignore error "Start tag expected" for empty files
+        if '<!-- Empty' not in path.read_text(encoding='utf-8'):
+            print(f'\nError formatting {path}: {e.output}', file=sys.stderr)
+            return False
+    return True
+
+
+def remove_if_empty(path: Path) -> bool:
+    content = path.read_text(encoding='utf-8')
+    if not re.search(r'<testsuite[^>]*[^/]>', content) and '<!--' not in content:
+        path.unlink()
+        return True
+    return False
+
+
+def main():
+    default_directory = Path(__file__).resolve().parent / "test-reports"
+    if '--help' in sys.argv or '-h' in sys.argv:
+        print("Usage: python cleanup_files.py [target_directory]")
+        print(f"Default target directory {default_directory}.")
+        sys.exit(1)
+    target_dir = Path(sys.argv[1]) if len(sys.argv) > 1 else default_directory
+    xml_files = list(target_dir.rglob("*.xml"))
+    num_files = len(xml_files)
+
+    reply = input(f"Process {num_files} XML files in {target_dir}? [y/n] ").strip()
+    if not re.match(r'^[Yy]$', reply):
+        print("Aborting.")
+        sys.exit(1)
+
+    print(f"Processing file 0/{num_files}...", end='', flush=True)
+
+    for i, path in enumerate(xml_files, 1):
+        print(f"\rProcessing file {i}/{num_files}...", end='', flush=True)
+
+        if remove_if_empty(path):
+            continue
+
+        path = shorten_filename(path)
+        shorten_content(path)
+        if not format_xml(path):
+            sys.exit(1)
+
+    # Delete empty directories
+    for d in sorted(target_dir.rglob("*"), key=lambda p: -len(str(p))):
+        if d.is_dir() and not any(d.iterdir()):
+            d.rmdir()
+
+    print(" done.")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/test/pytorch_test_logs/faulty-reports/consistency/test_name/test_name-1.xml b/test/pytorch_test_logs/faulty-reports/consistency/test_name/test_name-1.xml
@@ -0,0 +1,18 @@
+<?xml version="1.0"?>
+<testsuites>
+  <!-- 'tests' attribute is to low -->
+  <testsuite name="pytest" errors="2" failures="1" skipped="1" tests="2" time="4.2">
+    <testcase classname="TestName" name="test1" time="4.2" file="test_name.py">
+      <skipped message=""/>
+    </testcase>
+    <testcase classname="TestName" name="test2" time="4.2" file="test_name.py">
+      <failure message="...">[snip]</failure>
+    </testcase>
+    <testcase classname="TestName" name="test3" time="4.2" file="test_name.py">
+      <error message="...">[snip]</error>
+    </testcase>
+    <testcase classname="TestName" name="test4" time="4.2" file="test_name.py">
+      <error message="...">[snip]</error>
+    </testcase>
+  </testsuite>
+</testsuites>
diff --git a/...test_logs/faulty-reports/different_file_name/sync.skip.test_name/sync.skip.test_foo-1.xml b/...test_logs/faulty-reports/different_file_name/sync.skip.test_name/sync.skip.test_foo-1.xml
@@ -0,0 +1,7 @@
+<?xml version="1.0"?>
+<testsuites>
+  <testsuite name="pytest" errors="0" failures="0" skipped="0" tests="1" time="4.2">
+    <!-- Correct but wrong file or folder name -->
+    <testcase classname="TestName" name="test_foo" time="4.2" file="sync/skip.py"/>
+  </testsuite>
+</testsuites>
diff --git a/test/pytorch_test_logs/faulty-reports/duplicate/test_name/test_name-1.xml b/test/pytorch_test_logs/faulty-reports/duplicate/test_name/test_name-1.xml
@@ -0,0 +1,7 @@
+<?xml version="1.0"?>
+<testsuites>
+  <testsuite name="pytest" errors="0" failures="0" skipped="0" tests="1" time="4.2">
+    <testcase classname="TestName" name="test2" time="4.2" file="test_name.py"/>
+    <testcase classname="TestName" name="test2" time="4.2" file="test_name.py"/>
+  </testsuite>
+</testsuites>
diff --git a/...torch_test_logs/faulty-reports/file_attribute/test_name/TEST-foo.test_name.TestName-1.xml b/...torch_test_logs/faulty-reports/file_attribute/test_name/TEST-foo.test_name.TestName-1.xml
@@ -0,0 +1,7 @@
+<?xml version="1.0"?>
+<testsuites>
+  <testsuite name="pytest" errors="0" failures="0" skipped="0" tests="1" time="4.2">
+    <!-- Expected from filename: foo/test_name.py -->
+    <testcase classname="TestName" name="test2" time="4.2" file="foo/test_foo.py"/>
+  </testsuite>
+</testsuites>
diff --git a/test/pytorch_test_logs/faulty-reports/invalid_xml/test_name/test_name-1.xml b/test/pytorch_test_logs/faulty-reports/invalid_xml/test_name/test_name-1.xml
@@ -0,0 +1,5 @@
+<testsuite name="pytest" errors="1" failures="0" skipped="0" tests="1" time="4.2">
+  <testcase classname="TestName" name="test1" time="4.2" file="test_name.py">
+  <!-- wrong closing tag so XML fails to parse -->
+  </wrong>
+</testsuite>
diff --git a/test/pytorch_test_logs/faulty-reports/multi_file/test_name/TEST-Name-1.xml b/test/pytorch_test_logs/faulty-reports/multi_file/test_name/TEST-Name-1.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0"?>
+<testsuites>
+  <testsuite name="pytest" errors="0" failures="0" skipped="0" tests="2" time="4.2">
+    <testcase classname="TestName" name="test1" time="4.2" file="test_foo.py"/>
+    <!-- Different filename -->
+    <testcase classname="TestName" name="test2" time="4.2" file="test_bar.py"/>
+  </testsuite>
+</testsuites>
diff --git a/test/pytorch_test_logs/faulty-reports/no_tests/test_name/test_name-1.xml b/test/pytorch_test_logs/faulty-reports/no_tests/test_name/test_name-1.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<testsuites>
+  <testsuite name="pytest" errors="0" failures="0" skipped="0" tests="1" time="4.2">
+    <!-- Empty -->
+  </testsuite>
+</testsuites>
diff --git a/test/pytorch_test_logs/faulty-reports/root/test_name/test_name-1.xml b/test/pytorch_test_logs/faulty-reports/root/test_name/test_name-1.xml
@@ -0,0 +1,3 @@
+<?xml version="1.0"?>
+<!-- Missing testsuite/testsuites at root -->
+<testcase classname="MissingRootTag" name="test1" time="3.14" file="test_name.py"/>