Skip to content

Commit c79fb40

Browse files
committed
Modify max_tokens and modify the log
Signed-off-by: amy-why-3459 <[email protected]>
1 parent 7bfd3c0 commit c79fb40

3 files changed

Lines changed: 17 additions & 7 deletions

File tree

vllm_omni/distributed/omni_connectors/adapter.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -245,7 +245,7 @@ def get_through_connector(connector, target_stage_id, stage_id, req_id, connecto
245245
if payload_data:
246246
connector.request_prompt_token_ids[req_id] = payload_data.get("thinker_input_ids", [])
247247
connector.get_requests[req_id] += 1
248-
logger.debug(f"[Stage-{stage_id}] Received one chunk for request {connector_get_key}")
248+
logger.debug("[Stage-%d] Received one chunk for request %s", stage_id, connector_get_key)
249249
break
250250
time.sleep(0.01)
251251
return payload_data
@@ -325,7 +325,7 @@ def put_chunk(
325325
logger.error(f"Failed to use custom_process_input_func for payload extraction: {e}")
326326

327327
if not payload_data:
328-
logger.warning(f"[Stage-{stage_id}] No payload data to send for request {request_id}")
328+
logger.warning("[Stage-%d] No payload data to send for request %s", stage_id, request_id)
329329
return
330330

331331
if stage_id == 0 and chunk_id == 0:
@@ -341,7 +341,7 @@ def put_chunk(
341341
payload_data["thinker_hidden_states"] = torch.cat(
342342
(save_payload.get("thinker_hidden_states"), payload_data.get("thinker_hidden_states")), dim=0
343343
)
344-
logger.info(f"[Stage-{stage_id}] Merged embeddings and hidden states for request {request_id}")
344+
logger.debug("[Stage-%d] Merged embeddings and hidden states for request %s", stage_id, request_id)
345345

346346
if stage_id == 1:
347347
# TODO: Make parameters configurable and optimize algorithms
@@ -367,7 +367,7 @@ def put_chunk(
367367

368368
if success:
369369
connector.put_requests[request_id] += 1
370-
logger.info(f"[Stage-{stage_id}] Sent {connector_put_key}")
370+
logger.debug("[Stage-%d] Sent %s", stage_id, connector_put_key)
371371

372372

373373
def compute_talker_prompt_ids_length(prompt_ids: list[int]) -> int:

vllm_omni/distributed/omni_connectors/connectors/shm_connector.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# SPDX-License-Identifier: Apache-2.0
22
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
33

4+
import fcntl
45
import time
56
from collections import defaultdict
67
from typing import Any
@@ -53,7 +54,12 @@ def put(self, from_stage: str, to_stage: str, put_key: str, data: Any) -> tuple[
5354
# if size > self.threshold:
5455
if True: # TODO: correct put & get logic
5556
# Use Shared Memory
56-
meta = shm_write_bytes(payload, name=put_key)
57+
lock_file = f"/dev/shm/shm_{put_key}_lockfile.lock"
58+
with open(lock_file, "w") as lockf:
59+
fcntl.flock(lockf, fcntl.LOCK_EX)
60+
meta = shm_write_bytes(payload, name=put_key)
61+
fcntl.flock(lockf, fcntl.LOCK_UN)
62+
5763
# meta contains {'name': ..., 'size': ...}
5864
metadata[put_key] = {"shm": meta, "size": size}
5965
self._metrics["shm_writes"] += 1
@@ -97,7 +103,11 @@ def get(self, from_stage: str, to_stage: str, get_key: str, metadata=None) -> tu
97103
return None, 0
98104

99105
try:
100-
data_bytes = shm_read_bytes({"name": get_key, "size": shm.size})
106+
lock_file = f"/dev/shm/shm_{get_key}_lockfile.lock"
107+
with open(lock_file) as lockf:
108+
fcntl.flock(lockf, fcntl.LOCK_SH)
109+
data_bytes = shm_read_bytes({"name": get_key, "size": shm.size})
110+
fcntl.flock(lockf, fcntl.LOCK_UN)
101111
obj = self.deserialize_obj(data_bytes)
102112
return obj, shm.size
103113
finally:

vllm_omni/model_executor/stage_configs/qwen3_omni_moe_async_chunk.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ stage_args:
6363
default_sampling_params:
6464
temperature: 0.9
6565
top_k: 50
66-
max_tokens: 4096
66+
max_tokens: 2048 # TODO: The max_tokens of the async_chunk feature cannot exceed 2048.
6767
seed: 42
6868
detokenize: False
6969
repetition_penalty: 1.05

0 commit comments

Comments
 (0)