Skip to content
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
f3fcb9e
test(core): standardize provider tests with from_provider() parameter…
claude Nov 6, 2025
6fd61f5
feat(tests): consolidate providers into core test suite
claude Nov 6, 2025
74e2e56
docs(tests): add workflow update instructions for maintainers
claude Nov 6, 2025
c44fffa
fix(tests): update models to claude-haiku-4-5-latest and gemini-2.5-f…
claude Nov 6, 2025
c796c1c
fix(tests): complete model updates in util.py and README
claude Nov 6, 2025
7c1fc73
docs(tests): add comprehensive parameterization and provider-specific…
claude Nov 6, 2025
3866388
docs(tests): answer key questions about parameterization and provider…
claude Nov 6, 2025
c7cd45e
feat(tests): add unified multimodal tests to core suite
claude Nov 6, 2025
7f26778
refactor(tests): massive cleanup - delete all duplicate tests
claude Nov 6, 2025
2fbf2fc
Refactor: Update instructor modes for Fireworks and Perplexity
cursoragent Nov 6, 2025
eaf5a05
feat(tests): add unified multimodal tests to core suite
claude Nov 6, 2025
e6c6cf3
docs(tests): remove temporary analysis markdown files
claude Nov 6, 2025
04c8017
Refactor: Separate core provider tests and update test matrix
cursoragent Nov 6, 2025
afe8c14
refactor(tests): delete more duplicate test files
claude Nov 6, 2025
4f15c89
feat(xai): enhance tool handling and add capability definitions for p…
jxnl Nov 6, 2025
e5ce61a
fix(tests): stabilize core provider response modes
jxnl Nov 6, 2025
a3d0fc0
fix(ci): fix ruff linting errors and type check issues
jxnl Nov 6, 2025
515ac81
fix(types): add type ignores for xAI SDK method calls
jxnl Nov 6, 2025
8209d5a
fix(anthropic): respect strict JSON control character handling
jxnl Nov 6, 2025
de36d2b
Merge remote-tracking branch 'origin/main' into claude/standardize-fr…
jxnl Nov 12, 2025
5a6b0b2
refactor(tests): remove provider-specific tests and utility configura…
jxnl Nov 12, 2025
9ff3df0
fix(tests): update test commands to use asyncio mode
jxnl Nov 12, 2025
ad165b5
feat(tests): expand core provider tests for OpenAI, Anthropic, Google…
jxnl Nov 12, 2025
6da9110
fix(tests): skip unsupported provider capabilities for Google Gemini
jxnl Nov 12, 2025
ef2af12
docs(google): add known limitations as of Nov 12, 2024
jxnl Nov 12, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 44 additions & 8 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,11 @@ jobs:
- name: Install the project
run: uv sync --all-extras
- name: Run core tests
run: uv run pytest tests/ -k 'not llm and not openai and not gemini and not anthropic and not cohere and not vertexai and not mistral and not xai and not docs'
run: >-
uv run pytest tests/
-k 'not test_core_providers and not test_openai and not test_anthropic
and not test_gemini and not test_genai and not test_writer and not
test_vertexai and not docs'
env:
INSTRUCTOR_ENV: CI
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
Expand All @@ -31,15 +35,46 @@ jobs:
XAI_API_KEY: ${{ secrets.XAI_API_KEY }}
GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}

core-provider-tests:
name: Core Provider Tests
runs-on: ubuntu-latest
needs: core-tests

steps:
- uses: actions/checkout@v2
- name: Install uv
uses: astral-sh/setup-uv@v4
with:
enable-cache: true
- name: Set up Python
run: uv python install 3.11
- name: Install the project
run: uv sync --all-extras
- name: Run core provider tests
run: uv run pytest tests/llm/test_core_providers -v
env:
INSTRUCTOR_ENV: CI
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
COHERE_API_KEY: ${{ secrets.COHERE_API_KEY }}
XAI_API_KEY: ${{ secrets.XAI_API_KEY }}
MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }}
CEREBRAS_API_KEY: ${{ secrets.CEREBRAS_API_KEY }}
FIREWORKS_API_KEY: ${{ secrets.FIREWORKS_API_KEY }}
WRITER_API_KEY: ${{ secrets.WRITER_API_KEY }}
PERPLEXITY_API_KEY: ${{ secrets.PERPLEXITY_API_KEY }}

# Provider tests run in parallel
provider-tests:
name: ${{ matrix.provider.name }} Tests
runs-on: ubuntu-latest
needs: core-provider-tests
strategy:
fail-fast: false
matrix:
provider:
- name: Openai
- name: OpenAI
env_key: OPENAI_API_KEY
test_path: tests/llm/test_openai
- name: Anthropic
Expand All @@ -51,12 +86,12 @@ jobs:
- name: Google GenAI
env_key: GOOGLE_API_KEY
test_path: tests/llm/test_genai
- name: Cohere
env_key: COHERE_API_KEY
test_path: tests/llm/test_cohere
- name: XAI
env_key: XAI_API_KEY
test_path: tests/llm/test_xai
- name: Vertex AI
env_key: GOOGLE_API_KEY
test_path: tests/llm/test_vertexai
- name: Writer
env_key: WRITER_API_KEY
test_path: tests/llm/test_writer

steps:
- uses: actions/checkout@v2
Expand All @@ -78,6 +113,7 @@ jobs:
auto-client-test:
name: Auto Client Tests
runs-on: ubuntu-latest
needs: provider-tests

steps:
- uses: actions/checkout@v2
Expand Down
2 changes: 1 addition & 1 deletion CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co

## Commands
- Install deps: `uv pip install -e ".[dev,anthropic]"` or `poetry install --with dev,anthropic`
- Run tests: `uv run pytest tests/`
- Run tests: `uv run pytest tests/ -n auto`
- Run specific test: `uv run pytest tests/path_to_test.py::test_name`
- Skip LLM tests: `uv run pytest tests/ -k 'not llm and not openai'`
- Type check: `uv run ty check`
Expand Down
60 changes: 58 additions & 2 deletions instructor/providers/xai/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ async def acreate(
parameters=_get_model_schema(response_model),
)
chat.proto.tools.append(tool)
chat.proto.tool_choice.mode = xchat.chat_pb2.ToolMode.TOOL_MODE_AUTO
chat.proto.tool_choice.CopyFrom(xchat.required_tool(tool.function.name))
if is_stream:
args = (
resp.tool_calls[0].function.arguments
Expand All @@ -175,6 +175,34 @@ async def acreate(
)
else:
resp = await chat.sample()
if not resp.tool_calls:
# If no tool calls, try to extract from text content
from ...processing.function_calls import _validate_model_from_json
from ...utils import extract_json_from_codeblock

# Try to extract JSON from text content
text_content = ""
if hasattr(resp, "text") and resp.text:
text_content = resp.text
elif hasattr(resp, "content") and resp.content:
if isinstance(resp.content, str):
text_content = resp.content
elif isinstance(resp.content, list) and resp.content:
text_content = str(resp.content[0])

if text_content:
json_str = extract_json_from_codeblock(text_content)
parsed = _validate_model_from_json(
response_model, json_str, None, strict
)
parsed._raw_response = resp
return parsed

raise ValueError(
f"No tool calls returned from xAI and no text content available. "
f"Response: {resp}"
)

args = resp.tool_calls[0].function.arguments
from ...processing.function_calls import _validate_model_from_json

Expand Down Expand Up @@ -239,7 +267,7 @@ def create(
parameters=_get_model_schema(response_model),
)
chat.proto.tools.append(tool)
chat.proto.tool_choice.mode = xchat.chat_pb2.ToolMode.TOOL_MODE_AUTO
chat.proto.tool_choice.CopyFrom(xchat.required_tool(tool.function.name))
if is_stream:
for resp, _ in chat.stream():
# For xAI, tool_calls are returned at the end of the response.
Expand All @@ -258,6 +286,34 @@ def create(
)
else:
resp = chat.sample()
if not resp.tool_calls:
# If no tool calls, try to extract from text content
from ...processing.function_calls import _validate_model_from_json
from ...utils import extract_json_from_codeblock

# Try to extract JSON from text content
text_content = ""
if hasattr(resp, "text") and resp.text:
text_content = resp.text
elif hasattr(resp, "content") and resp.content:
if isinstance(resp.content, str):
text_content = resp.content
elif isinstance(resp.content, list) and resp.content:
text_content = str(resp.content[0])

if text_content:
json_str = extract_json_from_codeblock(text_content)
parsed = _validate_model_from_json(
response_model, json_str, None, strict
)
parsed._raw_response = resp
return parsed

raise ValueError(
f"No tool calls returned from xAI and no text content available. "
f"Response: {resp}"
)

args = resp.tool_calls[0].function.arguments
from ...processing.function_calls import _validate_model_from_json

Expand Down
183 changes: 183 additions & 0 deletions tests/llm/shared_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
"""
Shared configuration for multi-provider tests.

This module provides common test configuration for running the same tests
across multiple providers (OpenAI, Anthropic, Google, Cohere, xAI, Mistral,
Cerebras, Fireworks, Writer, Perplexity).
"""

import os

import instructor
import pytest


# Provider configurations: (model_string, mode, required_env_var, required_package)
PROVIDER_CONFIGS = [
(
"openai/gpt-5-nano",
instructor.Mode.TOOLS,
"OPENAI_API_KEY",
"openai",
),
(
"anthropic/claude-3-5-haiku-latest",
instructor.Mode.ANTHROPIC_TOOLS,
"ANTHROPIC_API_KEY",
"anthropic",
),
(
"google/gemini-2.5-flash",
instructor.Mode.GENAI_TOOLS,
"GOOGLE_API_KEY",
"google.genai",
),
(
"cohere/command-a-03-2025",
instructor.Mode.COHERE_TOOLS,
"COHERE_API_KEY",
"cohere",
),
(
"xai/grok-3-mini",
instructor.Mode.XAI_TOOLS,
"XAI_API_KEY",
"xai_sdk",
),
(
"mistral/ministral-8b-latest",
instructor.Mode.MISTRAL_TOOLS,
"MISTRAL_API_KEY",
"mistralai",
),
(
"cerebras/llama3.1-70b",
instructor.Mode.CEREBRAS_TOOLS,
"CEREBRAS_API_KEY",
"cerebras",
),
(
"fireworks/llama-v3p1-70b-instruct",
instructor.Mode.FIREWORKS_TOOLS,
"FIREWORKS_API_KEY",
"fireworks",
Comment on lines +59 to +63
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Use Fireworks-specific mode when building provider configs

The shared provider matrix assigns instructor.Mode.TOOLS to the Fireworks model, but from_fireworks() only accepts FIREWORKS_TOOLS or FIREWORKS_JSON. When a Fireworks API key and package are present, from_provider() will raise a ModeError before any test runs. This makes the whole Fireworks test suite unusable. The mode should be switched to one of the supported Fireworks modes so that tests can actually execute.

Useful? React with 👍 / 👎.

),
(
"writer/palmyra-x-004",
instructor.Mode.WRITER_TOOLS,
"WRITER_API_KEY",
"writerai",
),
(
"perplexity/llama-3.1-sonar-large-128k-online",
instructor.Mode.PERPLEXITY_JSON,
"PERPLEXITY_API_KEY",
"openai", # Perplexity transports over OpenAI-compatible API
),
]


def get_available_providers() -> list[tuple[str, instructor.Mode]]:
"""
Get list of available providers based on API keys and installed packages.

Returns:
List of tuples (model_string, mode) for available providers
"""
available = []

for model, mode, env_var, package in PROVIDER_CONFIGS:
# Check if API key is set
if not os.getenv(env_var):
continue

# Check if package is installed
try:
parts = package.split(".")
if len(parts) > 1:
__import__(parts[0])
# For nested imports like google.genai
__import__(package)
else:
__import__(package)
available.append((model, mode))
except ImportError:
continue

return available


def pytest_generate_tests(metafunc):
"""
Pytest hook to generate parametrized tests for available providers.

This is used in test files that have 'provider_config' as a parameter.
"""
if "provider_config" in metafunc.fixturenames:
available = get_available_providers()
if not available:
pytest.skip("No providers available (missing API keys or packages)")

# Generate test IDs like "openai" "anthropic" "google"
ids = [model.split("/")[0] for model, _ in available]
metafunc.parametrize("provider_config", available, ids=ids)


def pytest_configure(config):
"""Register custom markers for provider-specific tests."""
config.addinivalue_line("markers", "openai: mark test as requiring OpenAI provider")
config.addinivalue_line(
"markers", "anthropic: mark test as requiring Anthropic provider"
)
config.addinivalue_line("markers", "google: mark test as requiring Google provider")
config.addinivalue_line("markers", "cohere: mark test as requiring Cohere provider")
config.addinivalue_line("markers", "xai: mark test as requiring xAI provider")
config.addinivalue_line(
"markers", "mistral: mark test as requiring Mistral provider"
)
config.addinivalue_line(
"markers", "cerebras: mark test as requiring Cerebras provider"
)
config.addinivalue_line(
"markers", "fireworks: mark test as requiring Fireworks provider"
)
config.addinivalue_line("markers", "writer: mark test as requiring Writer provider")
config.addinivalue_line(
"markers", "perplexity: mark test as requiring Perplexity provider"
)


# Convenience function to skip if specific provider not available
def skip_if_provider_unavailable(provider_name: str):
"""
Skip test if specific provider is not available.

Args:
provider_name: One of "openai", "anthropic", "google", "cohere", "xai",
"mistral", "cerebras", "fireworks", "writer", "perplexity"
"""
config_map = {
"openai": ("OPENAI_API_KEY", "openai"),
"anthropic": ("ANTHROPIC_API_KEY", "anthropic"),
"google": ("GOOGLE_API_KEY", "google.genai"),
"cohere": ("COHERE_API_KEY", "cohere"),
"xai": ("XAI_API_KEY", "xai_sdk"),
"mistral": ("MISTRAL_API_KEY", "mistralai"),
"cerebras": ("CEREBRAS_API_KEY", "cerebras"),
"fireworks": ("FIREWORKS_API_KEY", "fireworks"),
"writer": ("WRITER_API_KEY", "writerai"),
"perplexity": ("PERPLEXITY_API_KEY", "openai"),
}

if provider_name not in config_map:
pytest.skip(f"Unknown provider: {provider_name}")

env_var, package = config_map[provider_name]

if not os.getenv(env_var):
pytest.skip(f"{env_var} not set")

try:
__import__(package)
except ImportError:
pytest.skip(f"{package} package not installed")
Loading
Loading