Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion examples/agent/voice_agent/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ async def main() -> None:
name="Friday",
sys_prompt="You are a helpful assistant",
model=OpenAIChatModel(
model_name="qwen-omni-turbo",
model_name="qwen3-omni-flash",
client_args={
"base_url": "https://dashscope.aliyuncs.com/"
"compatible-mode/v1",
Expand Down
29 changes: 29 additions & 0 deletions src/agentscope/model/_openai_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,31 @@
AsyncStream = "openai.types.chat.AsyncStream"


def _format_audio_data_for_qwen_omni(messages: list[dict]) -> None:
"""Qwen-omni uses OpenAI-compatible API but requires different audio
data format than OpenAI with "data:;base64," prefix.
Refer to `Qwen-omni documentation
<https://bailian.console.aliyun.com/?tab=doc#/doc/?type=model&url=2867839>`_
for more details.

Args:
messages (`list[dict]`):
The list of message dictionaries from OpenAI formatter.
"""
for msg in messages:
if isinstance(msg.get("content"), list):
for block in msg["content"]:
if (
isinstance(block, dict)
and "input_audio" in block
and isinstance(block["input_audio"].get("data"), str)
):
if not block["input_audio"]["data"].startswith("http"):
block["input_audio"] = (
"data:;base64," + block["input_audio"]["data"]
)


class OpenAIChatModel(ChatModelBase):
"""The OpenAI chat model class."""

Expand Down Expand Up @@ -154,6 +179,10 @@ async def __call__(
"and 'content' key for OpenAI API.",
)

# Qwen-omni requires different base64 audio format from openai
if "omni" in self.model_name.lower():
_format_audio_data_for_qwen_omni(messages)

kwargs = {
"model": self.model_name,
"messages": messages,
Expand Down
Loading