|
18 | 18 | VLLM_MULTI_NODE = os.getenv("VLLM_MULTI_NODE", "0") == "1" |
19 | 19 |
|
20 | 20 |
|
21 | | -@pytest.mark.parametrize(("TP_SIZE, PP_SIZE, EAGER_MODE, CHUNKED_PREFILL, " |
22 | | - "MODEL_NAME, DIST_BACKEND"), |
23 | | - [ |
24 | | - (2, 2, 0, 1, "meta-llama/Meta-Llama-3-8B", "mp"), |
25 | | - (2, 2, 1, 0, "meta-llama/Meta-Llama-3-8B", "mp"), |
26 | | - (1, 3, 0, 0, "meta-llama/Meta-Llama-3-8B", "mp"), |
27 | | - (1, 4, 0, 1, "meta-llama/Meta-Llama-3-8B", "mp"), |
28 | | - (1, 4, 1, 0, "meta-llama/Meta-Llama-3-8B", "mp"), |
29 | | - (1, 3, 0, 0, "meta-llama/Meta-Llama-3-8B", "ray"), |
30 | | - (1, 4, 0, 1, "meta-llama/Meta-Llama-3-8B", "ray"), |
31 | | - (1, 4, 1, 0, "meta-llama/Meta-Llama-3-8B", "ray"), |
32 | | - (2, 2, 1, 0, "meta-llama/Meta-Llama-3-8B", "ray"), |
33 | | - (2, 2, 0, 1, "meta-llama/Meta-Llama-3-8B", "ray"), |
34 | | - ]) |
| 21 | +@pytest.mark.parametrize( |
| 22 | + ("TP_SIZE, PP_SIZE, EAGER_MODE, CHUNKED_PREFILL, TRUST_REMOTE_CODE, " |
| 23 | + "MODEL_NAME, DIST_BACKEND"), |
| 24 | + [ |
| 25 | + (2, 2, 0, 1, 0, "meta-llama/Meta-Llama-3-8B", "mp"), |
| 26 | + (2, 2, 1, 0, 0, "meta-llama/Meta-Llama-3-8B", "mp"), |
| 27 | + (1, 3, 0, 0, 0, "meta-llama/Meta-Llama-3-8B", "mp"), |
| 28 | + (1, 4, 0, 1, 0, "meta-llama/Meta-Llama-3-8B", "mp"), |
| 29 | + (1, 4, 1, 0, 0, "meta-llama/Meta-Llama-3-8B", "mp"), |
| 30 | + (1, 3, 0, 0, 0, "meta-llama/Meta-Llama-3-8B", "ray"), |
| 31 | + (1, 4, 0, 1, 0, "meta-llama/Meta-Llama-3-8B", "ray"), |
| 32 | + (1, 4, 1, 0, 0, "meta-llama/Meta-Llama-3-8B", "ray"), |
| 33 | + (2, 2, 1, 0, 0, "meta-llama/Meta-Llama-3-8B", "ray"), |
| 34 | + (2, 2, 0, 1, 0, "meta-llama/Meta-Llama-3-8B", "ray"), |
| 35 | + (2, 2, 1, 1, 1, "internlm/internlm2_5-7b-chat", "ray"), |
| 36 | + ], |
| 37 | +) |
35 | 38 | @fork_new_process_for_each_test |
36 | | -def test_compare_tp(TP_SIZE, PP_SIZE, EAGER_MODE, CHUNKED_PREFILL, MODEL_NAME, |
37 | | - DIST_BACKEND): |
| 39 | +def test_compare_tp(TP_SIZE, PP_SIZE, EAGER_MODE, CHUNKED_PREFILL, |
| 40 | + TRUST_REMOTE_CODE, MODEL_NAME, DIST_BACKEND): |
38 | 41 | if VLLM_MULTI_NODE and DIST_BACKEND == "mp": |
39 | 42 | pytest.skip("Skipping multi-node pipeline parallel test for " |
40 | 43 | "multiprocessing distributed backend") |
@@ -71,6 +74,9 @@ def test_compare_tp(TP_SIZE, PP_SIZE, EAGER_MODE, CHUNKED_PREFILL, MODEL_NAME, |
71 | 74 | if EAGER_MODE: |
72 | 75 | pp_args.append("--enforce-eager") |
73 | 76 | tp_args.append("--enforce-eager") |
| 77 | + if TRUST_REMOTE_CODE: |
| 78 | + pp_args.append("--trust-remote-code") |
| 79 | + tp_args.append("--trust-remote-code") |
74 | 80 | pp_env = None |
75 | 81 | if (DIST_BACKEND == "ray" and TP_SIZE == 2 and PP_SIZE == 2 |
76 | 82 | and CHUNKED_PREFILL): |
|
0 commit comments