File tree Expand file tree Collapse file tree 1 file changed +36
-0
lines changed
Expand file tree Collapse file tree 1 file changed +36
-0
lines changed Original file line number Diff line number Diff line change 1+ '''
2+ Example of setting up LLM with multi-step enabled.
3+ In actuality, async engine would be a more sensible choice
4+ from a performance perspective. However this example is useful
5+ for demonstration & debugging of multi-step code.
6+ '''
7+
8+ from vllm import LLM , SamplingParams
9+
10+ # Sample prompts.
11+ prompts = [
12+ "Hello, my name is" ,
13+ "The president of the United States is" ,
14+ "The capital of France is" ,
15+ "The future of AI is" ,
16+ ]
17+ # Create a sampling params object.
18+ sampling_params = SamplingParams (temperature = 0.8 , top_p = 0.95 )
19+
20+ # Create an LLM.
21+ llm = LLM (
22+ model = "JackFram/llama-160m" ,
23+ swap_space = 16 ,
24+ tensor_parallel_size = 1 ,
25+ gpu_memory_utilization = 0.9 ,
26+ num_scheduler_steps = 8 ,
27+ use_v2_block_manager = True ,
28+ )
29+ # Generate texts from the prompts. The output is a list of RequestOutput objects
30+ # that contain the prompt, generated text, and other information.
31+ outputs = llm .generate (prompts , sampling_params )
32+ # Print the outputs.
33+ for output in outputs :
34+ prompt = output .prompt
35+ generated_text = output .outputs [0 ].text
36+ print (f"Prompt: { prompt !r} , Generated text: { generated_text !r} " )
You can’t perform that action at this time.
0 commit comments