Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Other Tests

!!! note
Unless otherwise specified, all the continuous batching tests are running with `max_model_len=256`
Unless otherwise specified, all the continuous batching tests are running with `max_model_len=512`

::: tests.e2e.test_spyre_cb
options:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Output Tests

!!! note
Unless otherwise specified, all the continuous batching tests are running with `max_model_len=256`
Unless otherwise specified, all the continuous batching tests are running with `max_model_len=512`

::: tests.e2e.test_spyre_basic
options:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Scheduler Steps Tests

!!! note
Unless otherwise specified, all the continuous batching tests are running with `max_model_len=256`
Unless otherwise specified, all the continuous batching tests are running with `max_model_len=512`

::: tests.e2e.test_spyre_cb_scheduler_steps
3 changes: 1 addition & 2 deletions tests/aftu/test_compare_graphs.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def mock_get_mask_dtype(mocker: MockerFixture):
@pytest.mark.spyre
@pytest.mark.cb
def test_compare_graphs_cb(model: ModelInfo, max_num_seqs: int,
monkeypatch: pytest.MonkeyPatch,
max_model_len: int, monkeypatch: pytest.MonkeyPatch,
mocker: MockerFixture):
"""Test that the spyre worker correctly outputs
continuous batches of requests by comparing to HF"""
Expand All @@ -45,7 +45,6 @@ def test_compare_graphs_cb(model: ModelInfo, max_num_seqs: int,
if script_dir is None:
pytest.skip("aiu-fms-testing-utils is required "
"and is not installed to run this test")
max_model_len = 256

model_path = get_model_path(model)

Expand Down
5 changes: 3 additions & 2 deletions tests/e2e/test_spyre_async_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ async def generate(
"output_kind", [RequestOutputKind.DELTA, RequestOutputKind.FINAL_ONLY])
@pytest.mark.asyncio
async def test_abort(model: ModelInfo, backend: str, cb: int,
max_model_len: int, max_num_seqs: int,
warmup_shapes: DecodeWarmupShapes,
output_kind: RequestOutputKind,
monkeypatch: pytest.MonkeyPatch):
Expand All @@ -72,8 +73,8 @@ async def test_abort(model: ModelInfo, backend: str, cb: int,
engine = AsyncLLM.from_engine_args(
AsyncEngineArgs(model=model.name,
tokenizer=model.name,
max_model_len=256,
max_num_seqs=4,
max_model_len=max_model_len,
max_num_seqs=max_num_seqs,
revision=model.revision))
has_unfinished_requests = \
engine.output_processor.has_unfinished_requests
Expand Down
2 changes: 1 addition & 1 deletion tests/e2e/test_spyre_static_batching_limits.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def test_max_prompt_len_and_new_tokens(model: ModelInfo,

llm = get_cached_llm(
model=model,
max_model_len=256,
max_model_len=256, # unused
tensor_parallel_size=1,
backend=backend,
monkeypatch=monkeypatch,
Expand Down