Skip to content

Commit f97e0ae

Browse files
committed
added example
1 parent b74a125 commit f97e0ae

File tree

1 file changed

+36
-0
lines changed

1 file changed

+36
-0
lines changed
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
'''
2+
Example of setting up LLM with multi-step enabled.
3+
In actuality, async engine would be a more sensible choice
4+
from a performance perspective. However this example is useful
5+
for demonstration & debugging of multi-step code.
6+
'''
7+
8+
from vllm import LLM, SamplingParams
9+
10+
# Sample prompts.
11+
prompts = [
12+
"Hello, my name is",
13+
"The president of the United States is",
14+
"The capital of France is",
15+
"The future of AI is",
16+
]
17+
# Create a sampling params object.
18+
sampling_params = SamplingParams(temperature=0.8, top_p=0.95)
19+
20+
# Create an LLM.
21+
llm = LLM(
22+
model="JackFram/llama-160m",
23+
swap_space=16,
24+
tensor_parallel_size=1,
25+
gpu_memory_utilization=0.9,
26+
num_scheduler_steps=8,
27+
use_v2_block_manager=True,
28+
)
29+
# Generate texts from the prompts. The output is a list of RequestOutput objects
30+
# that contain the prompt, generated text, and other information.
31+
outputs = llm.generate(prompts, sampling_params)
32+
# Print the outputs.
33+
for output in outputs:
34+
prompt = output.prompt
35+
generated_text = output.outputs[0].text
36+
print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")

0 commit comments

Comments
 (0)