We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent e28533a commit 7151f92Copy full SHA for 7151f92
examples/offline_inference/spec_decode.py
@@ -79,9 +79,7 @@ def main():
79
trust_remote_code=True,
80
tensor_parallel_size=args.tp,
81
enable_chunked_prefill=args.enable_chunked_prefill,
82
- max_num_batched_tokens=args.max_num_batched_tokens,
83
enforce_eager=args.enforce_eager,
84
- max_num_seqs=args.max_num_seqs,
85
gpu_memory_utilization=0.8,
86
speculative_config=speculative_config,
87
disable_log_stats=False,
0 commit comments