Skip to content
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions sentry_sdk/_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -359,3 +359,7 @@ class SDKInfo(TypedDict):
)

HttpStatusCodeRange = Union[int, Container[int]]

class TextPart(TypedDict):
type: Literal["text"]
content: str
6 changes: 6 additions & 0 deletions sentry_sdk/consts.py
Original file line number Diff line number Diff line change
Expand Up @@ -542,6 +542,12 @@ class SPANDATA:
Example: 2048
"""

GEN_AI_SYSTEM_INSTRUCTIONS = "gen_ai.system_instructions"
"""
The system instructions passed to the model.
Example: [{"type": "text", "text": "You are a helpful assistant."},{"type": "text", "text": "Be concise and clear."}]
"""

GEN_AI_REQUEST_MESSAGES = "gen_ai.request.messages"
"""
The messages passed to the model. The "content" can be a string or an array of objects.
Expand Down
82 changes: 62 additions & 20 deletions sentry_sdk/integrations/google_genai/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,16 @@
event_from_exception,
safe_serialize,
)
from google.genai.types import GenerateContentConfig
from google.genai.types import GenerateContentConfig, Part, Content
from itertools import chain

if TYPE_CHECKING:
from sentry_sdk.tracing import Span
from sentry_sdk._types import TextPart
from google.genai.types import (
GenerateContentResponse,
ContentListUnion,
ContentUnionDict,
Tool,
Model,
EmbedContentResponse,
Expand Down Expand Up @@ -720,6 +723,56 @@ def extract_finish_reasons(
return finish_reasons if finish_reasons else None


def _transform_system_instruction_one_level(
system_instructions: "ContentUnionDict",
) -> "list[TextPart]":
text_parts: "list[TextPart]" = []

if isinstance(system_instructions, str):
return [{"type": "text", "content": system_instructions}]

if isinstance(system_instructions, Part) and system_instructions.text:
return [{"type": "text", "content": system_instructions.text}]

if isinstance(system_instructions, Content):
for part in system_instructions.parts or []:
if part.text:
text_parts.append({"type": "text", "content": part.text})
return text_parts

if isinstance(system_instructions, dict):
if system_instructions.get("text"):
return [{"type": "text", "content": system_instructions["text"]}]

parts = system_instructions.get("parts", [])
for part in parts:
if isinstance(part, Part) and part.text:
text_parts.append({"type": "text", "content": part.text})
elif isinstance(system_instructions, dict) and part.get("text"):
text_parts.append({"type": "text", "content": part["text"]})
return text_parts

return text_parts


def _transform_system_instructions(
system_instructions: "ContentUnionDict",
) -> "list[TextPart]":
text_parts: "list[TextPart]" = []

if isinstance(system_instructions, list):
text_parts = list(
chain.from_iterable(
_transform_system_instruction_one_level(instructions)
for instructions in system_instructions
)
)

return text_parts

return _transform_system_instruction_one_level(system_instructions)


def set_span_data_for_request(
span: "Span",
integration: "Any",
Expand All @@ -743,25 +796,14 @@ def set_span_data_for_request(
# Add system instruction if present
if config and hasattr(config, "system_instruction"):
system_instruction = config.system_instruction
if system_instruction:
system_messages = extract_contents_messages(system_instruction)
# System instruction should be a single system message
# Extract text from all messages and combine into one system message
system_texts = []
for msg in system_messages:
content = msg.get("content")
if isinstance(content, list):
# Extract text from content parts
for part in content:
if isinstance(part, dict) and part.get("type") == "text":
system_texts.append(part.get("text", ""))
elif isinstance(content, str):
system_texts.append(content)

if system_texts:
messages.append(
{"role": "system", "content": " ".join(system_texts)}
)

if system_instruction is not None:
set_data_normalized(
span,
SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS,
_transform_system_instructions(system_instruction),
unpack=False,
)

# Extract messages from contents
contents_messages = extract_contents_messages(contents)
Expand Down
132 changes: 119 additions & 13 deletions tests/integrations/google_genai/test_google_genai.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from google import genai
from google.genai import types as genai_types
from google.genai.types import Content, Part

from sentry_sdk import start_transaction
from sentry_sdk._types import BLOB_DATA_SUBSTITUTE
Expand Down Expand Up @@ -186,6 +187,7 @@ def test_nonstreaming_generate_content(
response_texts = json.loads(response_text)
assert response_texts == ["Hello! How can I help you today?"]
else:
assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in invoke_span["data"]
assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in invoke_span["data"]
assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_span["data"]

Expand All @@ -202,8 +204,107 @@ def test_nonstreaming_generate_content(
assert invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100


# Threatened bot to generate as many cases as possible
@pytest.mark.parametrize(
"system_instructions,expected_texts",
[
("You are a helpful assistant", ["You are a helpful assistant"]),
(
["You are a translator", "Translate to French"],
["You are a translator", "Translate to French"],
),
(
Content(role="user", parts=[Part(text="You are a helpful assistant")]),
["You are a helpful assistant"],
),
(
Content(
role="user",
parts=[
Part(text="You are a translator"),
Part(text="Translate to French"),
],
),
["You are a translator", "Translate to French"],
),
(
{"parts": [{"text": "You are a helpful assistant"}], "role": "user"},
["You are a helpful assistant"],
),
(
{
"parts": [
{"text": "You are a translator"},
{"text": "Translate to French"},
],
"role": "user",
},
["You are a translator", "Translate to French"],
),
(Part(text="You are a helpful assistant"), ["You are a helpful assistant"]),
({"text": "You are a helpful assistant"}, ["You are a helpful assistant"]),
(
[Part(text="You are a translator"), Part(text="Translate to French")],
["You are a translator", "Translate to French"],
),
(
[{"text": "You are a translator"}, {"text": "Translate to French"}],
["You are a translator", "Translate to French"],
),
(
[Part(text="First instruction"), {"text": "Second instruction"}],
["First instruction", "Second instruction"],
),
(
{
"parts": [
Part(text="First instruction"),
{"text": "Second instruction"},
],
"role": "user",
},
["First instruction", "Second instruction"],
),
(None, None),
("", []),
({}, []),
({"parts": []}, []),
(Content(role="user", parts=[]), []),
(
{
"parts": [
{"text": "Text part"},
{"file_data": {"file_uri": "gs://bucket/file.pdf"}},
],
"role": "user",
},
["Text part"],
),
(
{
"parts": [
{"text": "First"},
Part(text="Second"),
{"text": "Third"},
],
"role": "user",
},
["First", "Second", "Third"],
),
(
{
"parts": [
Part(text="First"),
Part(text="Second"),
Part(text="Third"),
],
},
["First", "Second", "Third"],
),
],
)
def test_generate_content_with_system_instruction(
sentry_init, capture_events, mock_genai_client
sentry_init, capture_events, mock_genai_client, system_instructions, expected_texts
):
sentry_init(
integrations=[GoogleGenAIIntegration(include_prompts=True)],
Expand All @@ -219,7 +320,7 @@ def test_generate_content_with_system_instruction(
):
with start_transaction(name="google_genai"):
config = create_test_config(
system_instruction="You are a helpful assistant",
system_instruction=system_instructions,
temperature=0.5,
)
mock_genai_client.models.generate_content(
Expand All @@ -229,14 +330,21 @@ def test_generate_content_with_system_instruction(
(event,) = events
invoke_span = event["spans"][0]

# Check that system instruction is included in messages
if expected_texts is None:
assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in invoke_span["data"]
return

# (PII is enabled and include_prompts is True in this test)
messages_str = invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
# Parse the JSON string to verify content
messages = json.loads(messages_str)
assert len(messages) == 2
assert messages[0] == {"role": "system", "content": "You are a helpful assistant"}
assert messages[1] == {"role": "user", "content": "What is 2+2?"}
system_instructions = json.loads(
invoke_span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
)

if isinstance(expected_texts, str):
assert system_instructions == [{"type": "text", "content": expected_texts}]
else:
assert system_instructions == [
{"type": "text", "content": text} for text in expected_texts
]


def test_generate_content_with_tools(sentry_init, capture_events, mock_genai_client):
Expand Down Expand Up @@ -933,10 +1041,8 @@ def test_google_genai_message_truncation(
with start_transaction(name="google_genai"):
mock_genai_client.models.generate_content(
model="gemini-1.5-flash",
contents=small_content,
config=create_test_config(
system_instruction=large_content,
),
contents=[large_content, small_content],
config=create_test_config(),
)

(event,) = events
Expand Down
Loading