Fix test_no_weak_ref_output_decorator

sarckk · sarckk · commit b30cc6376df2 · 2025-11-11T23:20:52.000-08:00
Signed-off-by: Yong Hoon Shin &lt;yhshin@meta.com&gt;
diff --git a/tests/compile/test_decorator.py b/tests/compile/test_decorator.py
@@ -289,18 +289,40 @@ def test_conditional_compile_enable_if(use_inductor_graph_partition, monkeypatch
         run_model(vllm_config, mod_A, cudagraph_runtime_mode)
 
 
-def test_no_weak_ref_output_decorator():
+@pytest.mark.parametrize("use_inductor_graph_partition", [True, False])
+def test_no_weak_ref_output_decorator(use_inductor_graph_partition, monkeypatch):
+    # disable compile cache so that we can count the number of compilations
+    # appropriately
+    monkeypatch.setenv("VLLM_DISABLE_COMPILE_CACHE", "1")
+
+    if use_inductor_graph_partition and not is_torch_equal_or_newer("2.9.0.dev"):
+        pytest.skip("inductor graph partition is only available in PyTorch 2.9+")
+
     # piecewise
     vllm_config = VllmConfig(
         compilation_config=CompilationConfig(
             mode=CompilationMode.VLLM_COMPILE,
             use_cudagraph=True,
-            splitting_ops=["silly.attention"],
+            splitting_ops=["silly::attention"],
             cudagraph_capture_sizes=[1, 2],
+            use_inductor_graph_partition=use_inductor_graph_partition,
         )
     )
     cudagraph_runtime_mode = CUDAGraphMode.PIECEWISE
 
+    expected_num_graphs_seen = 1
+    expected_num_cudagraph_captured = (
+        4  # num_cudagraph_sizes * num cudagraphs to capture
+    )
+    if use_inductor_graph_partition:
+        expected_num_piecewise_graphs_seen = 1
+        expected_num_piecewise_capturable_graphs_seen = 1
+        expected_num_backend_compilations = 1
+    else:
+        expected_num_piecewise_graphs_seen = 3
+        expected_num_piecewise_capturable_graphs_seen = 2
+        expected_num_backend_compilations = 2
+
     @support_torch_compile(no_weak_ref_output=False)
     class A(nn.Module):
         def __init__(
@@ -333,12 +355,11 @@ class C(B): ...
 
     # A has support_torch_compile
     with compilation_counter.expect(
-        num_graphs_seen=1,
-        num_piecewise_graphs_seen=3,
-        num_piecewise_capturable_graphs_seen=2,
-        num_backend_compilations=2,
-        num_cudagraph_captured=4,
-        # num_cudagraph_sizes * num_piecewise_capturable_graphs_seen
+        num_graphs_seen=expected_num_graphs_seen,
+        num_piecewise_graphs_seen=expected_num_piecewise_graphs_seen,
+        num_piecewise_capturable_graphs_seen=expected_num_piecewise_capturable_graphs_seen,
+        num_backend_compilations=expected_num_backend_compilations,
+        num_cudagraph_captured=expected_num_cudagraph_captured,
     ):
         run_model(vllm_config, mod_A, cudagraph_runtime_mode)
 
@@ -349,11 +370,11 @@ class C(B): ...
 
     # B also has support_torch_compile
     with compilation_counter.expect(
-        num_graphs_seen=1,
-        num_piecewise_graphs_seen=3,
-        num_piecewise_capturable_graphs_seen=2,
-        num_backend_compilations=2,
-        num_cudagraph_captured=4,
+        num_graphs_seen=expected_num_graphs_seen,
+        num_piecewise_graphs_seen=expected_num_piecewise_graphs_seen,
+        num_piecewise_capturable_graphs_seen=expected_num_piecewise_capturable_graphs_seen,
+        num_backend_compilations=expected_num_backend_compilations,
+        num_cudagraph_captured=expected_num_cudagraph_captured,
     ):
         run_model(vllm_config, mod_B, cudagraph_runtime_mode)
 
@@ -364,10 +385,10 @@ class C(B): ...
 
     # C has support_torch_compile
     with compilation_counter.expect(
-        num_graphs_seen=1,
-        num_piecewise_graphs_seen=3,
-        num_piecewise_capturable_graphs_seen=2,
-        num_backend_compilations=2,
-        num_cudagraph_captured=4,
+        num_graphs_seen=expected_num_graphs_seen,
+        num_piecewise_graphs_seen=expected_num_piecewise_graphs_seen,
+        num_piecewise_capturable_graphs_seen=expected_num_piecewise_capturable_graphs_seen,
+        num_backend_compilations=expected_num_backend_compilations,
+        num_cudagraph_captured=expected_num_cudagraph_captured,
     ):
         run_model(vllm_config, mod_C, cudagraph_runtime_mode)
diff --git a/vllm/compilation/decorators.py b/vllm/compilation/decorators.py
@@ -286,11 +286,6 @@ def _support_torch_compile(
     """
     A decorator to add support for compiling the forward method of a class.
     """
-    setattr(cls, IGNORE_COMPILE_KEY, False)
-
-    # setting as attribute on cls ensures child class will override parent class
-    setattr(cls, LAST_PIECEWISE_GRAPH_WEAKREF_KEY, no_weak_ref_output)
-
     if TorchCompileWrapperWithCustomDispatcher in cls.__bases__:
         # support decorating multiple times
         return cls