[torch.compile] add a flag to disable custom op (vllm-project#8488)

youkaichao · garg-amit · commit b6dbb0001256 · 2024-10-28T06:04:00.000Z
Signed-off-by: Amit Garg &lt;mitgarg17495@gmail.com&gt;
diff --git a/tests/compile/test_full_graph.py b/tests/compile/test_full_graph.py
@@ -6,7 +6,8 @@
 @pytest.mark.parametrize("model", ["meta-llama/Meta-Llama-3-8B"])
 def test_full_graph(model):
     # make sure these models can be captured in full graph mode
-    os.environ["VLLM_TEST_DYNAMO_GRAPH_CAPTURE"] = "1"
+    if "VLLM_TEST_DYNAMO_GRAPH_CAPTURE" not in os.environ:
+        os.environ["VLLM_TEST_DYNAMO_GRAPH_CAPTURE"] = "1"
 
     from vllm import LLM, SamplingParams
     prompts = [
diff --git a/vllm/envs.py b/vllm/envs.py
@@ -202,6 +202,11 @@ def get_default_config_root():
     (os.environ.get("VLLM_DYNAMO_USE_CUSTOM_DISPATCHER", "True").lower() in
      ("true", "1")),
 
+    # Internal flag to control whether we use custom op,
+    # or use the native pytorch implementation
+    "VLLM_TEST_COMPILE_NO_CUSTOM_OPS":
+    lambda: int(os.environ.get("VLLM_TEST_COMPILE_NO_CUSTOM_OPS", "0")),
+
     # Internal flag to enable Dynamo fullgraph capture
     "VLLM_TEST_DYNAMO_FULLGRAPH_CAPTURE":
     lambda: bool(
diff --git a/vllm/model_executor/custom_op.py b/vllm/model_executor/custom_op.py
@@ -1,5 +1,6 @@
 import torch.nn as nn
 
+import vllm.envs as envs
 from vllm.platforms import current_platform
 from vllm.utils import is_cpu, is_hip, is_xpu
 
@@ -53,6 +54,10 @@ def forward_gaudi(self, *args, **kwargs):
     def dispatch_forward(self):
         # NOTE(woosuk): Here we assume that vLLM was built for only one
         # specific backend. Currently, we do not support dynamic dispatching.
+
+        if envs.VLLM_TEST_COMPILE_NO_CUSTOM_OPS:
+            return self.forward_native
+
         if is_hip():
             return self.forward_hip
         elif is_cpu():