Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion vllm/entrypoints/openai/serving_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ async def create_chat_completion(
lora_request = self._maybe_get_adapters(
request, supports_default_mm_loras=True)

model_name = self._get_model_name(request.model, lora_request)
model_name = self.models.model_name(lora_request)

tokenizer = await self.engine_client.get_tokenizer(lora_request)

Expand Down
2 changes: 1 addition & 1 deletion vllm/entrypoints/openai/serving_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ async def create_classify(
request: ClassificationRequest,
raw_request: Request,
) -> Union[ClassificationResponse, ErrorResponse]:
model_name = self._get_model_name(request.model)
model_name = self.models.model_name()
request_id = (f"{self.request_id_prefix}-"
f"{self._base_request_id(raw_request)}")

Expand Down
2 changes: 1 addition & 1 deletion vllm/entrypoints/openai/serving_completion.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,7 @@ async def create_completion(

result_generator = merge_async_iterators(*generators)

model_name = self._get_model_name(request.model, lora_request)
model_name = self.models.model_name(lora_request)
num_prompts = len(engine_prompts)

# Similar to the OpenAI API, when n != best_of, we do not stream the
Expand Down
2 changes: 1 addition & 1 deletion vllm/entrypoints/openai/serving_embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -599,7 +599,7 @@ async def create_embedding(
See https://platform.openai.com/docs/api-reference/embeddings/create
for the API specification. This API mimics the OpenAI Embedding API.
"""
model_name = self._get_model_name(request.model)
model_name = self.models.model_name()
request_id = (
f"{self.request_id_prefix}-"
f"{self._base_request_id(raw_request, request.request_id)}")
Expand Down
11 changes: 0 additions & 11 deletions vllm/entrypoints/openai/serving_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -980,17 +980,6 @@ def _is_model_supported(self, model_name: Optional[str]) -> bool:
return True
return self.models.is_base_model(model_name)

def _get_model_name(
self,
model_name: Optional[str] = None,
lora_request: Optional[LoRARequest] = None,
) -> str:
if lora_request:
return lora_request.lora_name
if not model_name:
return self.models.base_model_paths[0].name
return model_name


def clamp_prompt_logprobs(
prompt_logprobs: Union[PromptLogprobs,
Expand Down
2 changes: 1 addition & 1 deletion vllm/entrypoints/openai/serving_pooling.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ async def create_pooling(
if error_check_ret is not None:
return error_check_ret

model_name = self._get_model_name(request.model)
model_name = self.models.model_name()

request_id = f"pool-{self._base_request_id(raw_request)}"
created_time = int(time.time())
Expand Down
2 changes: 1 addition & 1 deletion vllm/entrypoints/openai/serving_responses.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ async def create_responses(

try:
lora_request = self._maybe_get_adapters(request)
model_name = self._get_model_name(request.model, lora_request)
model_name = self.models.model_name(lora_request)
tokenizer = await self.engine_client.get_tokenizer(lora_request)

if self.use_harmony:
Expand Down
4 changes: 2 additions & 2 deletions vllm/entrypoints/openai/serving_score.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,7 +353,7 @@ async def create_score(
final_res_batch,
request_id,
created_time,
self._get_model_name(request.model),
self.models.model_name(),
)
except asyncio.CancelledError:
return self.create_error_response("Client disconnected")
Expand Down Expand Up @@ -399,7 +399,7 @@ async def do_rerank(
return self.request_output_to_rerank_response(
final_res_batch,
request_id,
self._get_model_name(request.model),
self.models.model_name(),
documents,
top_n,
)
Expand Down