Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions instructor/processing/function_calls.py
Original file line number Diff line number Diff line change
Expand Up @@ -402,8 +402,12 @@ def parse_bedrock_json(
strict: Optional[bool] = None,
) -> BaseModel:
if isinstance(completion, dict):
text = completion.get("output").get("message").get("content")[0].get("text")

# OpenAI will send the first content to be 'reasoningText', and then 'text'
content = completion["output"]["message"]["content"]
text_content = next((c for c in content if "text" in c), None)
if not text_content:
raise ValueError("Unexpected format. No text content found.")
text = text_content["text"]
match = re.search(r"```?json(.*?)```?", text, re.DOTALL)
if match:
text = match.group(1).strip()
Expand Down
107 changes: 107 additions & 0 deletions tests/test_json_extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from instructor.processing.function_calls import (
_extract_text_content,
_validate_model_from_json,
OpenAISchema,
)
from pydantic import BaseModel

Expand Down Expand Up @@ -275,3 +276,109 @@ def test_validate_model_json_error(self):

# Pydantic directly raises validation errors now, not our custom message
assert "Invalid JSON" in str(excinfo.value)


class PersonSchema(OpenAISchema):
"""Test model that inherits from OpenAISchema."""

name: str
age: int
skills: list[str] = []


class TestBedrockJSONParsing:
"""Test the parse_bedrock_json functionality."""

def test_parse_bedrock_json_simple(self):
"""Test parsing Bedrock JSON with simple text content."""
completion = {
"output": {
"message": {
"content": [{"text": '{"name": "John", "age": 30, "skills": []}'}]
}
}
}

result = PersonSchema.parse_bedrock_json(completion)
assert result.name == "John"
assert result.age == 30
assert result.skills == []

def test_parse_bedrock_json_with_reasoning_content(self):
"""Test parsing Bedrock JSON when reasoningText comes before text content.

This tests the fix for reasoning models where content array may have
reasoningText as first element instead of text.
"""
completion = {
"output": {
"message": {
"content": [
{"reasoningText": "Thinking about the response..."},
{"text": '{"name": "Alice", "age": 25, "skills": ["python"]}'},
]
}
}
}

result = PersonSchema.parse_bedrock_json(completion)
assert result.name == "Alice"
assert result.age == 25
assert result.skills == ["python"]

def test_parse_bedrock_json_with_codeblock(self):
"""Test parsing Bedrock JSON when response is wrapped in markdown codeblock."""
completion = {
"output": {
"message": {
"content": [
{
"text": '```json\n{"name": "Bob", "age": 40, "skills": ["go", "rust"]}\n```'
}
]
}
}
}

result = PersonSchema.parse_bedrock_json(completion)
assert result.name == "Bob"
assert result.age == 40
assert result.skills == ["go", "rust"]

def test_parse_bedrock_json_no_text_content(self):
"""Test parsing Bedrock JSON when no text content is found."""
completion = {
"output": {
"message": {
"content": [
{"reasoningText": "Only reasoning, no text response"},
{"otherContent": "Some other type"},
]
}
}
}

with pytest.raises(ValueError) as excinfo:
PersonSchema.parse_bedrock_json(completion)

assert "No text content found" in str(excinfo.value)

def test_parse_bedrock_json_multiple_text_contents(self):
"""Test parsing Bedrock JSON picks the first text content when multiple exist."""
completion = {
"output": {
"message": {
"content": [
{"reasoningText": "Thinking..."},
{"text": '{"name": "First", "age": 30, "skills": ["python"]}'},
{"text": '{"name": "Second", "age": 40, "skills": ["java"]}'},
]
}
}
}

result = PersonSchema.parse_bedrock_json(completion)
# Should pick the first text content
assert result.name == "First"
assert result.age == 30
assert result.skills == ["python"]