MoonshotAI · kingdomseed · Feb 20, 2026 · Feb 20, 2026 · Feb 20, 2026
diff --git a/docs/en/configuration/config-files.md b/docs/en/configuration/config-files.md
@@ -79,6 +79,7 @@ tool_call_timeout_ms = 60000
 | `api_key` | `string` | Yes | API key |
 | `env` | `table` | No | Environment variables to set before creating provider instance |
 | `custom_headers` | `table` | No | Custom HTTP headers to attach to requests |
+| `default_query` | `table` | No | Default query parameters to attach to requests |
 
 Example:
 
@@ -88,6 +89,7 @@ type = "kimi"
 base_url = "https://api.moonshot.cn/v1"
 api_key = "sk-xxx"
 custom_headers = { "X-Custom-Header" = "value" }
+default_query = { "foo" = "bar" }
 ```
 
 ### `models`

diff --git a/docs/en/configuration/providers.md b/docs/en/configuration/providers.md
@@ -57,6 +57,33 @@ base_url = "https://api.openai.com/v1"
 api_key = "sk-xxx"
 ```
 
+#### Azure OpenAI (Chat Completions)
+
+Azure OpenAI uses a slightly different request shape (requires an `api-version` query parameter and
+`api-key` header auth). You can configure it using `openai_legacy`:
+
+```toml
+[providers.azure-openai]
+type = "openai_legacy"
+base_url = "https://<resource>.cognitiveservices.azure.com/openai/deployments/<deployment>"
+api_key = "" # recommended: set via env var AZURE_OPENAI_API_KEY instead of saving to disk
+default_query = { "api-version" = "2024-05-01-preview" }
+```
+
+Recommended env vars (Azure):
+
+```bash
+export AZURE_OPENAI_API_KEY="..."
+export AZURE_COGNITIVE_SERVICES_API_VERSION="2024-05-01-preview"
+```
+
+Notes for Moonshot AI “sold directly by Azure” models:
+
+- `Kimi-K2.5`: chat-completions with `reasoning_content`, supports text + image input and tool calling.
+- `Kimi-K2-Thinking`: chat-completions with `reasoning_content`, text-only input, tool calling.
+- Both are documented as 262,144 token input and 262,144 token output. See Microsoft Learn for details:
+  `https://learn.microsoft.com/en-us/azure/ai-foundry/foundry-models/concepts/models-sold-directly-by-azure?view=foundry-classic&pivots=azure-direct-others&tabs=global-standard-aoai%2Cglobal-standard#moonshot-ai-models-sold-directly-by-azure`
+
 ### `openai_responses`
 
 For OpenAI Responses API (newer API format).

diff --git a/docs/zh/configuration/config-files.md b/docs/zh/configuration/config-files.md
@@ -79,6 +79,7 @@ tool_call_timeout_ms = 60000
 | `api_key` | `string` | 是 | API 密钥 |
 | `env` | `table` | 否 | 创建供应商实例前设置的环境变量 |
 | `custom_headers` | `table` | 否 | 请求时附加的自定义 HTTP 头 |
+| `default_query` | `table` | 否 | 请求时附加的默认 Query 参数 |
 
 示例：
 
@@ -88,6 +89,7 @@ type = "kimi"
 base_url = "https://api.moonshot.cn/v1"
 api_key = "sk-xxx"
 custom_headers = { "X-Custom-Header" = "value" }
+default_query = { "foo" = "bar" }
 ```
 
 ### `models`

diff --git a/docs/zh/configuration/providers.md b/docs/zh/configuration/providers.md
@@ -57,6 +57,33 @@ base_url = "https://api.openai.com/v1"
 api_key = "sk-xxx"
 ```
 
+#### Azure OpenAI（Chat Completions）
+
+Azure OpenAI 的请求方式略有不同（需要 `api-version` Query 参数，并使用 `api-key` 头进行鉴权）。
+可以用 `openai_legacy` 来配置：
+
+```toml
+[providers.azure-openai]
+type = "openai_legacy"
+base_url = "https://<resource>.cognitiveservices.azure.com/openai/deployments/<deployment>"
+api_key = "" # 建议通过环境变量 AZURE_OPENAI_API_KEY 设置，避免写入磁盘
+default_query = { "api-version" = "2024-05-01-preview" }
+```
+
+推荐环境变量（Azure）：
+
+```bash
+export AZURE_OPENAI_API_KEY="..."
+export AZURE_COGNITIVE_SERVICES_API_VERSION="2024-05-01-preview"
+```
+
+关于 “Direct from Azure” 的 Moonshot AI 模型说明（参考 Microsoft Learn）：
+
+- `Kimi-K2.5`：chat-completions（包含 `reasoning_content`），支持文本 + 图片输入，支持 tool calling。
+- `Kimi-K2-Thinking`：chat-completions（包含 `reasoning_content`），仅文本输入，支持 tool calling。
+- 文档中两者均标注为 262,144 token 输入与 262,144 token 输出：
+  `https://learn.microsoft.com/en-us/azure/ai-foundry/foundry-models/concepts/models-sold-directly-by-azure?view=foundry-classic&pivots=azure-direct-others&tabs=global-standard-aoai%2Cglobal-standard#moonshot-ai-models-sold-directly-by-azure`
+
 ### `openai_responses`
 
 用于 OpenAI Responses API（较新的 API 格式）。
@@ -149,4 +176,3 @@ capabilities = ["thinking", "image_in"]
 | `moonshot_fetch` | `FetchURL` | 回退到本地抓取 |
 
 使用其他平台时，`FetchURL` 工具仍可使用，但会回退到本地抓取。
-
diff --git a/src/kimi_cli/config.py b/src/kimi_cli/config.py
@@ -44,6 +44,8 @@ class LLMProvider(BaseModel):
     """Environment variables to set before creating the provider instance"""
     custom_headers: dict[str, str] | None = None
     """Custom headers to include in API requests"""
+    default_query: dict[str, object] | None = None
+    """Default query parameters to include in API requests"""
     oauth: OAuthRef | None = None
     """OAuth credential reference (do not store tokens here)."""
 

diff --git a/src/kimi_cli/llm.py b/src/kimi_cli/llm.py
@@ -4,7 +4,7 @@
 import os
 from dataclasses import dataclass
 from pathlib import Path
-from typing import TYPE_CHECKING, Literal, cast, get_args
+from typing import TYPE_CHECKING, Any, Literal, cast, get_args
 
 from kosong.chat_provider import ChatProvider
 from pydantic import SecretStr
@@ -86,7 +86,8 @@ def augment_provider_with_env_vars(provider: LLMProvider, model: LLMModel) -> di
         case "openai_legacy" | "openai_responses":
             if base_url := os.getenv("OPENAI_BASE_URL"):
                 provider.base_url = base_url
-            if api_key := os.getenv("OPENAI_API_KEY"):
+            api_key = os.getenv("OPENAI_API_KEY") or os.getenv("AZURE_OPENAI_API_KEY")
+            if api_key:
                 provider.api_key = SecretStr(api_key)
         case _:
             pass
@@ -103,6 +104,53 @@ def _kimi_default_headers(provider: LLMProvider, oauth: OAuthManager | None) ->
     return headers
 
 
+def _is_azure_openai_base_url(base_url: str) -> bool:
+    url = base_url.lower()
+    return (
+        ".openai.azure.com" in url
+        or ".cognitiveservices.azure.com" in url
+        or "/openai/deployments/" in url
+        or url.rstrip("/").endswith("/openai")
+    )
+
+
+def _openai_client_kwargs(provider: LLMProvider, *, resolved_api_key: str) -> dict[str, Any]:
+    """Return kwargs forwarded into `openai.AsyncOpenAI(...)` (via Kosong providers).
+
+    This is used by `openai_legacy` and `openai_responses`.
+
+    Azure OpenAI requires:
+    - `api-version` query parameter
+    - `api-key` header auth
+    """
+    client_kwargs: dict[str, Any] = {}
+
+    default_headers: dict[str, str] = {}
+    if provider.custom_headers:
+        default_headers.update(provider.custom_headers)
+
+    default_query: dict[str, object] = {}
+    if provider.default_query:
+        default_query.update(provider.default_query)
+
+    if provider.base_url and _is_azure_openai_base_url(provider.base_url):
+        if "api-version" not in default_query and (
+            api_version := (
+                os.getenv("AZURE_COGNITIVE_SERVICES_API_VERSION")
+                or os.getenv("AZURE_OPENAI_API_VERSION")
+            )
+        ):
+            default_query["api-version"] = api_version
+        default_headers.setdefault("api-key", resolved_api_key)
+
+    if default_headers:
+        client_kwargs["default_headers"] = default_headers
+    if default_query:
+        client_kwargs["default_query"] = default_query
+
+    return client_kwargs
+
+
 def create_llm(
     provider: LLMProvider,
     model: LLMModel,
@@ -152,6 +200,7 @@ def create_llm(
                 model=model.model,
                 base_url=provider.base_url,
                 api_key=resolved_api_key,
+                **_openai_client_kwargs(provider, resolved_api_key=resolved_api_key),
             )
         case "openai_responses":
             from kosong.contrib.chat_provider.openai_responses import OpenAIResponses
@@ -160,6 +209,7 @@ def create_llm(
                 model=model.model,
                 base_url=provider.base_url,
                 api_key=resolved_api_key,
+                **_openai_client_kwargs(provider, resolved_api_key=resolved_api_key),
             )
         case "anthropic":
             from kosong.contrib.chat_provider.anthropic import Anthropic

diff --git a/tests/core/test_create_llm.py b/tests/core/test_create_llm.py
@@ -3,6 +3,7 @@
 from inline_snapshot import snapshot
 from kosong.chat_provider.echo import EchoChatProvider
 from kosong.chat_provider.kimi import Kimi
+from kosong.contrib.chat_provider.openai_legacy import OpenAILegacy
 from pydantic import SecretStr
 
 from kimi_cli.config import LLMModel, LLMProvider
@@ -93,3 +94,26 @@ def test_create_llm_requires_base_url_for_kimi():
     model = LLMModel(provider="kimi", model="kimi-base", max_context_size=4096)
 
     assert create_llm(provider, model) is None
+
+
+def test_create_llm_openai_legacy_azure_adds_api_key_header_and_api_version(monkeypatch):
+    provider = LLMProvider(
+        type="openai_legacy",
+        base_url="https://example.cognitiveservices.azure.com/openai/deployments/test-deployment",
+        api_key=SecretStr("test-key"),
+    )
+    model = LLMModel(
+        provider="azure-openai",
+        model="test-deployment",
+        max_context_size=4096,
+        capabilities=None,
+    )
+
+    monkeypatch.setenv("AZURE_COGNITIVE_SERVICES_API_VERSION", "2024-05-01-preview")
+
+    llm = create_llm(provider, model)
+    assert llm is not None
+    assert isinstance(llm.chat_provider, OpenAILegacy)
+
+    assert llm.chat_provider.client.default_query == snapshot({"api-version": "2024-05-01-preview"})
+    assert llm.chat_provider.client.default_headers.get("api-key") == snapshot("test-key")