|
| 1 | +# SPDX-License-Identifier: Apache-2.0 |
| 2 | +import os |
| 3 | + |
| 4 | +from vllm import LLM, SamplingParams |
| 5 | + |
| 6 | +# vLLM does not guarantee the reproducibility of the results by default, |
| 7 | +# for the sake of performance. You need to do the following to achieve |
| 8 | +# reproducible results: |
| 9 | +# 1. Turn off multiprocessing to make the scheduling deterministic. |
| 10 | +# NOTE(woosuk): This is not needed and will be ignored for V0. |
| 11 | +os.environ["VLLM_ENABLE_V1_MULTIPROCESSING"] = "0" |
| 12 | +# 2. Fix the global seed for reproducibility. The default seed is None, which is |
| 13 | +# not reproducible. |
| 14 | +SEED = 42 |
| 15 | + |
| 16 | +# NOTE(woosuk): Even with the above two settings, vLLM only provides |
| 17 | +# reproducibility when it runs on the same hardware and the same vLLM version. |
| 18 | +# Also, the online serving API (`vllm serve`) does not support reproducibility |
| 19 | +# because it is almost impossible to make the scheduling deterministic in the |
| 20 | +# online serving setting. |
| 21 | + |
| 22 | +llm = LLM(model="facebook/opt-125m", seed=SEED) |
| 23 | + |
| 24 | +prompts = [ |
| 25 | + "Hello, my name is", |
| 26 | + "The president of the United States is", |
| 27 | + "The capital of France is", |
| 28 | + "The future of AI is", |
| 29 | +] |
| 30 | +sampling_params = SamplingParams(temperature=0.8, top_p=0.95) |
| 31 | + |
| 32 | +outputs = llm.generate(prompts, sampling_params) |
| 33 | +for output in outputs: |
| 34 | + prompt = output.prompt |
| 35 | + generated_text = output.outputs[0].text |
| 36 | + print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}") |
0 commit comments