Skip to content

Commit 642d31b

Browse files
committed
first working attempt at logprobs
1 parent f969241 commit 642d31b

File tree

97 files changed

+468
-278
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

97 files changed

+468
-278
lines changed

examples/offline_inference_multi_step.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
gpu_memory_utilization=0.9,
2626
num_scheduler_steps=8,
2727
use_v2_block_manager=True,
28+
enforce_eager=True,
2829
)
2930
# Generate texts from the prompts. The output is a list of RequestOutput objects
3031
# that contain the prompt, generated text, and other information.

tests/spec_decode/test_multi_step_worker.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,9 @@
55
import pytest
66
import torch
77

8+
from vllm.model_executor.layers.sampler import SamplerOutput
89
from vllm.model_executor.utils import set_random_seed
9-
from vllm.sequence import ExecuteModelRequest, Logprob, SamplerOutput
10+
from vllm.sequence import ExecuteModelRequest, Logprob
1011
from vllm.spec_decode.draft_model_runner import TP1DraftModelRunner
1112
from vllm.spec_decode.multi_step_worker import MultiStepWorker
1213
from vllm.spec_decode.top1_proposer import Top1Proposer

tests/spec_decode/test_spec_decode_worker.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,9 @@
77
import pytest
88
import torch
99

10+
from vllm.model_executor.layers.sampler import SamplerOutput
1011
from vllm.model_executor.utils import set_random_seed
11-
from vllm.sequence import ExecuteModelRequest, SamplerOutput, SequenceOutput
12+
from vllm.sequence import ExecuteModelRequest, SequenceOutput
1213
from vllm.spec_decode.interfaces import SpeculativeProposals
1314
from vllm.spec_decode.metrics import (AsyncMetricsCollector,
1415
SpecDecodeWorkerMetrics)

tests/spec_decode/utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,12 @@
88
import torch
99

1010
from vllm.engine.arg_utils import EngineArgs
11+
from vllm.model_executor.layers.sampler import SamplerOutput
1112
from vllm.model_executor.utils import set_random_seed
1213
from vllm.sampling_params import SamplingParams
1314
from vllm.sequence import (VLLM_TOKEN_ID_ARRAY_TYPE,
1415
CompletionSequenceGroupOutput, Logprob,
15-
SamplerOutput, SequenceData, SequenceGroupMetadata,
16-
SequenceOutput)
16+
SequenceData, SequenceGroupMetadata, SequenceOutput)
1717
from vllm.utils import get_distributed_init_method, get_ip, get_open_port
1818
from vllm.worker.cache_engine import CacheEngine
1919
from vllm.worker.model_runner import ModelRunner

tests/test_sequence.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,10 @@
22

33
import pytest
44

5+
from vllm.model_executor.layers.sampler import SamplerOutput
56
from vllm.sequence import (VLLM_TOKEN_ID_ARRAY_TYPE,
6-
CompletionSequenceGroupOutput, SamplerOutput,
7-
SequenceData, SequenceOutput)
7+
CompletionSequenceGroupOutput, SequenceData,
8+
SequenceOutput)
89

910
from .core.utils import create_dummy_prompt
1011

vllm/engine/async_llm_engine.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,12 @@
2525
from vllm.inputs.parse import is_explicit_encoder_decoder_prompt
2626
from vllm.logger import init_logger
2727
from vllm.lora.request import LoRARequest
28+
from vllm.model_executor.layers.sampler import SamplerOutput
2829
from vllm.outputs import EmbeddingRequestOutput, RequestOutput
2930
from vllm.pooling_params import PoolingParams
3031
from vllm.prompt_adapter.request import PromptAdapterRequest
3132
from vllm.sampling_params import SamplingParams
32-
from vllm.sequence import (ExecuteModelRequest, SamplerOutput,
33-
SequenceGroupMetadata)
33+
from vllm.sequence import ExecuteModelRequest, SequenceGroupMetadata
3434
from vllm.usage.usage_lib import UsageContext
3535
from vllm.utils import print_warning_once
3636

vllm/engine/llm_engine.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,16 +29,16 @@
2929
from vllm.inputs.parse import is_explicit_encoder_decoder_prompt
3030
from vllm.logger import init_logger
3131
from vllm.lora.request import LoRARequest
32+
from vllm.model_executor.layers.sampler import SamplerOutput
3233
from vllm.multimodal import MultiModalDataDict
3334
from vllm.outputs import (EmbeddingRequestOutput, RequestOutput,
3435
RequestOutputFactory)
3536
from vllm.pooling_params import PoolingParams
3637
from vllm.prompt_adapter.request import PromptAdapterRequest
3738
from vllm.sampling_params import SamplingParams
3839
from vllm.sequence import (EmbeddingSequenceGroupOutput, ExecuteModelRequest,
39-
PoolerOutput, SamplerOutput, Sequence,
40-
SequenceGroup, SequenceGroupMetadata,
41-
SequenceStatus)
40+
PoolerOutput, Sequence, SequenceGroup,
41+
SequenceGroupMetadata, SequenceStatus)
4242
from vllm.tracing import (SpanAttributes, SpanKind, extract_trace_context,
4343
init_tracer)
4444
from vllm.transformers_utils.config import try_get_generation_config

vllm/engine/output_processor/util.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@
22
from typing import Sequence as GenericSequence
33
from typing import Union
44

5-
from vllm.sequence import PoolerOutput, SamplerOutput, SequenceGroupOutput
5+
from vllm.model_executor.layers.sampler import SamplerOutput
6+
from vllm.sequence import PoolerOutput, SequenceGroupOutput
67

78

89
def create_output_by_sequence_group(

vllm/engine/protocol.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,11 @@
77
from vllm.core.scheduler import SchedulerOutputs
88
from vllm.inputs.data import PromptInputs
99
from vllm.lora.request import LoRARequest
10+
from vllm.model_executor.layers.sampler import SamplerOutput
1011
from vllm.outputs import EmbeddingRequestOutput, RequestOutput
1112
from vllm.pooling_params import PoolingParams
1213
from vllm.prompt_adapter.request import PromptAdapterRequest
1314
from vllm.sampling_params import SamplingParams
14-
from vllm.sequence import SamplerOutput
1515

1616

1717
@runtime_checkable

vllm/executor/cpu_executor.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,9 @@
1111
ResultHandler, WorkerMonitor)
1212
from vllm.logger import init_logger
1313
from vllm.lora.request import LoRARequest
14+
from vllm.model_executor.layers.sampler import SamplerOutput
1415
from vllm.prompt_adapter.request import PromptAdapterRequest
15-
from vllm.sequence import ExecuteModelRequest, SamplerOutput
16+
from vllm.sequence import ExecuteModelRequest
1617
from vllm.utils import (GiB_bytes, get_distributed_init_method, get_open_port,
1718
get_vllm_instance_id, make_async)
1819
from vllm.worker.worker_base import WorkerWrapperBase

0 commit comments

Comments
 (0)