From 401109aeb1f2b9d12431d7becabab4341b91f9b8 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Tue, 12 May 2026 15:55:30 +0200 Subject: [PATCH 01/10] feat: Remove truncation when stream_gen_ai_spans is enabled --- sentry_sdk/ai/utils.py | 8 + .../integrations/anthropic/test_anthropic.py | 329 +++------- .../google_genai/test_google_genai.py | 166 ++--- .../integrations/langchain/test_langchain.py | 137 ++--- .../integrations/langgraph/test_langgraph.py | 251 ++------ tests/integrations/litellm/test_litellm.py | 104 +--- tests/integrations/openai/test_openai.py | 60 +- .../openai_agents/test_openai_agents.py | 571 ++++++------------ 8 files changed, 439 insertions(+), 1187 deletions(-) diff --git a/sentry_sdk/ai/utils.py b/sentry_sdk/ai/utils.py index 8efa077ce5..4bd65ced76 100644 --- a/sentry_sdk/ai/utils.py +++ b/sentry_sdk/ai/utils.py @@ -741,6 +741,10 @@ def truncate_and_annotate_messages( scope: "Any", max_single_message_chars: int = MAX_SINGLE_MESSAGE_CONTENT_CHARS, ) -> "Optional[List[Dict[str, Any]]]": + client = sentry_sdk.get_client() + if client.options["_experiments"].get("stream_gen_ai_spans", False): + return messages + if not messages: return None @@ -761,6 +765,10 @@ def truncate_and_annotate_embedding_inputs( scope: "Any", max_bytes: int = MAX_GEN_AI_MESSAGE_BYTES, ) -> "Optional[List[Dict[str, Any]]]": + client = sentry_sdk.get_client() + if client.options["_experiments"].get("stream_gen_ai_spans", False): + return messages + if not messages: return None diff --git a/tests/integrations/anthropic/test_anthropic.py b/tests/integrations/anthropic/test_anthropic.py index 2e240b9c8f..02de047711 100644 --- a/tests/integrations/anthropic/test_anthropic.py +++ b/tests/integrations/anthropic/test_anthropic.py @@ -3625,20 +3625,14 @@ def mock_messages_create(*args, **kwargs): assert stored_messages[0]["role"] == expected_role -@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) -def test_anthropic_message_truncation( - sentry_init, - capture_events, - capture_items, - stream_gen_ai_spans, -): +def test_anthropic_message_truncation(sentry_init, capture_events): """Test that large messages are truncated properly in Anthropic integration.""" sentry_init( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) + events = capture_events() client = Anthropic(api_key="z") client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) @@ -3654,83 +3648,42 @@ def test_anthropic_message_truncation( {"role": "user", "content": "small message 5"}, ] - if stream_gen_ai_spans: - items = capture_items("transaction", "span") - - with start_transaction(): - client.messages.create(max_tokens=1024, messages=messages, model="model") - - spans = [item.payload for item in items if item.type == "span"] - chat_spans = [ - span - for span in spans - if span["attributes"].get("sentry.op") == OP.GEN_AI_CHAT - ] - - assert len(chat_spans) > 0 - - chat_span = chat_spans[0] - assert chat_span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert chat_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["attributes"] - - messages_data = chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - - assert isinstance(messages_data, str) - - parsed_messages = json.loads(messages_data) - assert isinstance(parsed_messages, list) - assert len(parsed_messages) == 1 - assert "small message 5" in str(parsed_messages[0]) - - tx = next(item.payload for item in items if item.type == "transaction") - else: - events = capture_events() - - with start_transaction(): - client.messages.create(max_tokens=1024, messages=messages, model="model") - - assert len(events) > 0 - tx = events[0] - assert tx["type"] == "transaction" + with start_transaction(): + client.messages.create(max_tokens=1024, messages=messages, model="model") - chat_spans = [ - span for span in tx.get("spans", []) if span.get("op") == OP.GEN_AI_CHAT - ] + assert len(events) > 0 + tx = events[0] + assert tx["type"] == "transaction" - assert len(chat_spans) > 0 + chat_spans = [ + span for span in tx.get("spans", []) if span.get("op") == OP.GEN_AI_CHAT + ] + assert len(chat_spans) > 0 - chat_span = chat_spans[0] - assert chat_span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert chat_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["data"] + chat_span = chat_spans[0] + assert chat_span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert chat_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["data"] - messages_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - assert isinstance(messages_data, str) + messages_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + assert isinstance(messages_data, str) - parsed_messages = json.loads(messages_data) - assert isinstance(parsed_messages, list) - assert len(parsed_messages) == 1 - assert "small message 5" in str(parsed_messages[0]) + parsed_messages = json.loads(messages_data) + assert isinstance(parsed_messages, list) + assert len(parsed_messages) == 1 + assert "small message 5" in str(parsed_messages[0]) assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5 -@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) -@pytest.mark.asyncio -async def test_anthropic_message_truncation_async( - sentry_init, - capture_events, - capture_items, - stream_gen_ai_spans, -): +async def test_anthropic_message_truncation_async(sentry_init, capture_events): """Test that large messages are truncated properly in Anthropic integration.""" sentry_init( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) + events = capture_events() client = AsyncAnthropic(api_key="z") client.messages._post = mock.AsyncMock(return_value=EXAMPLE_MESSAGE) @@ -3746,70 +3699,30 @@ async def test_anthropic_message_truncation_async( {"role": "user", "content": "small message 5"}, ] - if stream_gen_ai_spans: - items = capture_items("transaction", "span") - - with start_transaction(): - await client.messages.create( - max_tokens=1024, messages=messages, model="model" - ) - - spans = [item.payload for item in items if item.type == "span"] - chat_spans = [ - span - for span in spans - if span["attributes"].get("sentry.op") == OP.GEN_AI_CHAT - ] - - assert len(chat_spans) > 0 - - chat_span = chat_spans[0] + with start_transaction(): + await client.messages.create(max_tokens=1024, messages=messages, model="model") - assert chat_span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert chat_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["attributes"] + assert len(events) > 0 + tx = events[0] + assert tx["type"] == "transaction" - messages_data = chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - - assert isinstance(messages_data, str) - - parsed_messages = json.loads(messages_data) - assert isinstance(parsed_messages, list) - assert len(parsed_messages) == 1 - assert "small message 5" in str(parsed_messages[0]) - - tx = next(item.payload for item in items if item.type == "transaction") - else: - events = capture_events() - - with start_transaction(): - await client.messages.create( - max_tokens=1024, messages=messages, model="model" - ) - - assert len(events) > 0 - tx = events[0] - assert tx["type"] == "transaction" - - chat_spans = [ - span for span in tx.get("spans", []) if span.get("op") == OP.GEN_AI_CHAT - ] - - assert len(chat_spans) > 0 - - chat_span = chat_spans[0] + chat_spans = [ + span for span in tx.get("spans", []) if span.get("op") == OP.GEN_AI_CHAT + ] + assert len(chat_spans) > 0 - assert chat_span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert chat_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["data"] + chat_span = chat_spans[0] + assert chat_span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert chat_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["data"] - messages_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - assert isinstance(messages_data, str) + messages_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + assert isinstance(messages_data, str) - parsed_messages = json.loads(messages_data) - assert isinstance(parsed_messages, list) - assert len(parsed_messages) == 1 - assert "small message 5" in str(parsed_messages[0]) + parsed_messages = json.loads(messages_data) + assert isinstance(parsed_messages, list) + assert len(parsed_messages) == 1 + assert "small message 5" in str(parsed_messages[0]) assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5 @@ -5203,21 +5116,14 @@ def test_transform_message_content_list_anthropic(): # Integration tests for binary data in messages -@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) -def test_message_with_base64_image( - sentry_init, - capture_events, - capture_items, - stream_gen_ai_spans, -): +def test_message_with_base64_image(sentry_init, capture_events): """Test that messages with base64 images are properly captured.""" sentry_init( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - + events = capture_events() client = Anthropic(api_key="z") client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) @@ -5238,31 +5144,15 @@ def test_message_with_base64_image( } ] - if stream_gen_ai_spans: - items = capture_items("transaction", "span") - - with start_transaction(name="anthropic"): - client.messages.create(max_tokens=1024, messages=messages, model="model") - - spans = [item.payload for item in items if item.type == "span"] - (span,) = spans - - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"] - stored_messages = json.loads( - span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - ) - else: - events = capture_events() + with start_transaction(name="anthropic"): + client.messages.create(max_tokens=1024, messages=messages, model="model") - with start_transaction(name="anthropic"): - client.messages.create(max_tokens=1024, messages=messages, model="model") + assert len(events) == 1 + (event,) = events + (span,) = event["spans"] - assert len(events) == 1 - (event,) = events - (span,) = event["spans"] - - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] - stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] + stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) assert len(stored_messages) == 1 assert stored_messages[0]["role"] == "user" @@ -5412,21 +5302,14 @@ def test_message_with_file_image( } -@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) -def test_message_with_base64_pdf( - sentry_init, - capture_events, - capture_items, - stream_gen_ai_spans, -): +def test_message_with_base64_pdf(sentry_init, capture_events): """Test that messages with base64-encoded PDF documents are properly captured.""" sentry_init( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - + events = capture_events() client = Anthropic(api_key="z") client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) @@ -5447,30 +5330,14 @@ def test_message_with_base64_pdf( } ] - if stream_gen_ai_spans: - items = capture_items("transaction", "span") - - with start_transaction(name="anthropic"): - client.messages.create(max_tokens=1024, messages=messages, model="model") - - spans = [item.payload for item in items if item.type == "span"] - (span,) = spans - - stored_messages = json.loads( - span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - ) - else: - events = capture_events() - - with start_transaction(name="anthropic"): - client.messages.create(max_tokens=1024, messages=messages, model="model") - - assert len(events) == 1 - (event,) = events - (span,) = event["spans"] + with start_transaction(name="anthropic"): + client.messages.create(max_tokens=1024, messages=messages, model="model") - stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + assert len(events) == 1 + (event,) = events + (span,) = event["spans"] + stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) content = stored_messages[0]["content"] assert content[1] == { "type": "blob", @@ -5615,21 +5482,14 @@ def test_message_with_file_document( } -@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) -def test_message_with_mixed_content( - sentry_init, - capture_events, - capture_items, - stream_gen_ai_spans, -): +def test_message_with_mixed_content(sentry_init, capture_events): """Test that messages with mixed content (text, images, documents) are properly captured.""" sentry_init( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - + events = capture_events() client = Anthropic(api_key="z") client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) @@ -5666,30 +5526,14 @@ def test_message_with_mixed_content( } ] - if stream_gen_ai_spans: - items = capture_items("transaction", "span") + with start_transaction(name="anthropic"): + client.messages.create(max_tokens=1024, messages=messages, model="model") - with start_transaction(name="anthropic"): - client.messages.create(max_tokens=1024, messages=messages, model="model") - - spans = [item.payload for item in items if item.type == "span"] - (span,) = spans - - stored_messages = json.loads( - span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - ) - else: - events = capture_events() - - with start_transaction(name="anthropic"): - client.messages.create(max_tokens=1024, messages=messages, model="model") - - assert len(events) == 1 - (event,) = events - (span,) = event["spans"] - - stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + assert len(events) == 1 + (event,) = events + (span,) = event["spans"] + stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) content = stored_messages[0]["content"] assert len(content) == 5 @@ -5721,21 +5565,14 @@ def test_message_with_mixed_content( } -@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) -def test_message_with_multiple_images_different_formats( - sentry_init, - capture_events, - capture_items, - stream_gen_ai_spans, -): +def test_message_with_multiple_images_different_formats(sentry_init, capture_events): """Test that messages with multiple images of different source types are handled.""" sentry_init( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) - + events = capture_events() client = Anthropic(api_key="z") client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) @@ -5771,30 +5608,14 @@ def test_message_with_multiple_images_different_formats( } ] - if stream_gen_ai_spans: - items = capture_items("transaction", "span") - - with start_transaction(name="anthropic"): - client.messages.create(max_tokens=1024, messages=messages, model="model") - - spans = [item.payload for item in items if item.type == "span"] - (span,) = spans - - stored_messages = json.loads( - span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - ) - else: - events = capture_events() - - with start_transaction(name="anthropic"): - client.messages.create(max_tokens=1024, messages=messages, model="model") + with start_transaction(name="anthropic"): + client.messages.create(max_tokens=1024, messages=messages, model="model") - assert len(events) == 1 - (event,) = events - (span,) = event["spans"] - - stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + assert len(events) == 1 + (event,) = events + (span,) = event["spans"] + stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) content = stored_messages[0]["content"] assert len(content) == 4 diff --git a/tests/integrations/google_genai/test_google_genai.py b/tests/integrations/google_genai/test_google_genai.py index 8da5e7ca22..d9b2736584 100644 --- a/tests/integrations/google_genai/test_google_genai.py +++ b/tests/integrations/google_genai/test_google_genai.py @@ -1401,21 +1401,16 @@ def test_tool_calls_extraction( assert json.loads(tool_calls[1]["arguments"]) == {"timezone": "PST"} -@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) def test_google_genai_message_truncation( - sentry_init, - capture_events, - capture_items, - mock_genai_client, - stream_gen_ai_spans, + sentry_init, capture_events, mock_genai_client ): """Test that large messages are truncated properly in Google GenAI integration.""" sentry_init( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) + events = capture_events() large_content = ( "This is a very long message that will exceed our size limits. " * 1000 @@ -1424,39 +1419,21 @@ def test_google_genai_message_truncation( mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) - if stream_gen_ai_spans: - items = capture_items("span") - - with mock.patch.object( - mock_genai_client._api_client, "request", return_value=mock_http_response - ), start_transaction(name="google_genai"): - mock_genai_client.models.generate_content( - model="gemini-1.5-flash", - contents=[large_content, small_content], - config=create_test_config(), - ) - - invoke_span = next(item.payload for item in items if item.type == "span") - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["attributes"] - - messages_data = invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - else: - events = capture_events() - - with mock.patch.object( - mock_genai_client._api_client, "request", return_value=mock_http_response - ), start_transaction(name="google_genai"): + with mock.patch.object( + mock_genai_client._api_client, "request", return_value=mock_http_response + ): + with start_transaction(name="google_genai"): mock_genai_client.models.generate_content( model="gemini-1.5-flash", contents=[large_content, small_content], config=create_test_config(), ) - (event,) = events - invoke_span = event["spans"][0] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["data"] + (event,) = events + invoke_span = event["spans"][0] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["data"] - messages_data = invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + messages_data = invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] assert isinstance(messages_data, str) parsed_messages = json.loads(messages_data) @@ -2543,21 +2520,16 @@ def test_generate_content_with_inline_data( assert messages[0]["content"][1]["content"] == BLOB_DATA_SUBSTITUTE -@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) def test_generate_content_with_function_response( - sentry_init, - capture_events, - capture_items, - mock_genai_client, - stream_gen_ai_spans, + sentry_init, capture_events, mock_genai_client ): """Test generate_content with function_response (tool result).""" sentry_init( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) + events = capture_events() mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) @@ -2583,36 +2555,18 @@ def test_generate_content_with_function_response( ), ] - if stream_gen_ai_spans: - items = capture_items("span") - - with mock.patch.object( - mock_genai_client._api_client, "request", return_value=mock_http_response - ), start_transaction(name="google_genai"): + with mock.patch.object( + mock_genai_client._api_client, "request", return_value=mock_http_response + ): + with start_transaction(name="google_genai"): mock_genai_client.models.generate_content( model="gemini-1.5-flash", contents=contents, config=create_test_config() ) - invoke_span = next(item.payload for item in items if item.type == "span") - - messages = json.loads( - invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - ) - else: - events = capture_events() - - with mock.patch.object( - mock_genai_client._api_client, "request", return_value=mock_http_response - ), start_transaction(name="google_genai"): - mock_genai_client.models.generate_content( - model="gemini-1.5-flash", contents=contents, config=create_test_config() - ) - - (event,) = events - invoke_span = event["spans"][0] - - messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + (event,) = events + invoke_span = event["spans"][0] + messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) assert len(messages) == 1 # First message is user message assert messages[0]["role"] == "tool" @@ -2621,21 +2575,16 @@ def test_generate_content_with_function_response( assert messages[0]["content"]["output"] == "Sunny, 72F" -@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) def test_generate_content_with_mixed_string_and_content( - sentry_init, - capture_events, - capture_items, - mock_genai_client, - stream_gen_ai_spans, + sentry_init, capture_events, mock_genai_client ): """Test generate_content with mixed string and Content objects in list.""" sentry_init( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) + events = capture_events() mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) @@ -2652,36 +2601,18 @@ def test_generate_content_with_mixed_string_and_content( ), ] - if stream_gen_ai_spans: - items = capture_items("span") - - with mock.patch.object( - mock_genai_client._api_client, "request", return_value=mock_http_response - ), start_transaction(name="google_genai"): + with mock.patch.object( + mock_genai_client._api_client, "request", return_value=mock_http_response + ): + with start_transaction(name="google_genai"): mock_genai_client.models.generate_content( model="gemini-1.5-flash", contents=contents, config=create_test_config() ) - invoke_span = next(item.payload for item in items if item.type == "span") - - messages = json.loads( - invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - ) - else: - events = capture_events() - - with mock.patch.object( - mock_genai_client._api_client, "request", return_value=mock_http_response - ), start_transaction(name="google_genai"): - mock_genai_client.models.generate_content( - model="gemini-1.5-flash", contents=contents, config=create_test_config() - ) - - (event,) = events - invoke_span = event["spans"][0] - - messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + (event,) = events + invoke_span = event["spans"][0] + messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) assert len(messages) == 1 # User message assert messages[0]["role"] == "user" @@ -2744,13 +2675,8 @@ def test_generate_content_with_part_object_directly( assert messages[0]["content"] == [{"text": "Direct Part object", "type": "text"}] -@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) def test_generate_content_with_list_of_dicts( - sentry_init, - capture_events, - capture_items, - mock_genai_client, - stream_gen_ai_spans, + sentry_init, capture_events, mock_genai_client ): """ Test generate_content with list of dict format inputs. @@ -2763,8 +2689,8 @@ def test_generate_content_with_list_of_dicts( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) + events = capture_events() mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) @@ -2775,36 +2701,18 @@ def test_generate_content_with_list_of_dicts( {"role": "user", "parts": [{"text": "Second user message"}]}, ] - if stream_gen_ai_spans: - items = capture_items("span") - - with mock.patch.object( - mock_genai_client._api_client, "request", return_value=mock_http_response - ), start_transaction(name="google_genai"): + with mock.patch.object( + mock_genai_client._api_client, "request", return_value=mock_http_response + ): + with start_transaction(name="google_genai"): mock_genai_client.models.generate_content( model="gemini-1.5-flash", contents=contents, config=create_test_config() ) - invoke_span = next(item.payload for item in items if item.type == "span") - - messages = json.loads( - invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - ) - else: - events = capture_events() - - with mock.patch.object( - mock_genai_client._api_client, "request", return_value=mock_http_response - ), start_transaction(name="google_genai"): - mock_genai_client.models.generate_content( - model="gemini-1.5-flash", contents=contents, config=create_test_config() - ) - - (event,) = events - invoke_span = event["spans"][0] - - messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + (event,) = events + invoke_span = event["spans"][0] + messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) assert len(messages) == 1 assert messages[0]["role"] == "user" assert messages[0]["content"] == [{"text": "Second user message", "type": "text"}] diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py index 7adb2d13c5..414eb67b3e 100644 --- a/tests/integrations/langchain/test_langchain.py +++ b/tests/integrations/langchain/test_langchain.py @@ -2949,13 +2949,7 @@ def test_langchain_message_role_normalization_units(): assert normalized[5] == "string message" # String message unchanged -@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) -def test_langchain_message_truncation( - sentry_init, - capture_events, - capture_items, - stream_gen_ai_spans, -): +def test_langchain_message_truncation(sentry_init, capture_events): """Test that large messages are truncated properly in Langchain integration.""" from langchain_core.outputs import LLMResult, Generation @@ -2963,8 +2957,8 @@ def test_langchain_message_truncation( integrations=[LangchainIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) + events = capture_events() callback = SentryLangchainCallback(max_span_map_size=100, include_prompts=True) @@ -2982,101 +2976,48 @@ def test_langchain_message_truncation( "small message 5", ] - if stream_gen_ai_spans: - items = capture_items("transaction", "span") - - with start_transaction(): - callback.on_llm_start( - serialized=serialized, - prompts=prompts, - run_id=run_id, - name="my_pipeline", - invocation_params={ - "temperature": 0.7, - "max_tokens": 100, - "model": "gpt-3.5-turbo", - }, - ) - - response = LLMResult( - generations=[[Generation(text="The response")]], - llm_output={ - "token_usage": { - "total_tokens": 25, - "prompt_tokens": 10, - "completion_tokens": 15, - } - }, - ) - callback.on_llm_end(response=response, run_id=run_id) - - tx = next(item.payload for item in items if item.type == "transaction") - assert tx["type"] == "transaction" - - spans = [item.payload for item in items if item.type == "span"] - llm_spans = [ - span - for span in spans - if span["attributes"].get("sentry.op") == "gen_ai.text_completion" - ] - - assert len(llm_spans) > 0 - - llm_span = llm_spans[0] - - assert llm_span["attributes"]["gen_ai.operation.name"] == "text_completion" - assert llm_span["attributes"][SPANDATA.GEN_AI_FUNCTION_ID] == "my_pipeline" - - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in llm_span["attributes"] - messages_data = llm_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - else: - events = capture_events() - - with start_transaction(): - callback.on_llm_start( - serialized=serialized, - prompts=prompts, - run_id=run_id, - name="my_pipeline", - invocation_params={ - "temperature": 0.7, - "max_tokens": 100, - "model": "gpt-3.5-turbo", - }, - ) - - response = LLMResult( - generations=[[Generation(text="The response")]], - llm_output={ - "token_usage": { - "total_tokens": 25, - "prompt_tokens": 10, - "completion_tokens": 15, - } - }, - ) - callback.on_llm_end(response=response, run_id=run_id) - - assert len(events) > 0 - tx = events[0] - assert tx["type"] == "transaction" - - llm_spans = [ - span - for span in tx.get("spans", []) - if span.get("op") == "gen_ai.text_completion" - ] + with start_transaction(): + callback.on_llm_start( + serialized=serialized, + prompts=prompts, + run_id=run_id, + name="my_pipeline", + invocation_params={ + "temperature": 0.7, + "max_tokens": 100, + "model": "gpt-3.5-turbo", + }, + ) - assert len(llm_spans) > 0 + response = LLMResult( + generations=[[Generation(text="The response")]], + llm_output={ + "token_usage": { + "total_tokens": 25, + "prompt_tokens": 10, + "completion_tokens": 15, + } + }, + ) + callback.on_llm_end(response=response, run_id=run_id) - llm_span = llm_spans[0] + assert len(events) > 0 + tx = events[0] + assert tx["type"] == "transaction" - assert llm_span["data"]["gen_ai.operation.name"] == "text_completion" - assert llm_span["data"][SPANDATA.GEN_AI_FUNCTION_ID] == "my_pipeline" + llm_spans = [ + span + for span in tx.get("spans", []) + if span.get("op") == "gen_ai.text_completion" + ] + assert len(llm_spans) > 0 - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in llm_span["data"] - messages_data = llm_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + llm_span = llm_spans[0] + assert llm_span["data"]["gen_ai.operation.name"] == "text_completion" + assert llm_span["data"][SPANDATA.GEN_AI_FUNCTION_ID] == "my_pipeline" + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in llm_span["data"] + messages_data = llm_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] assert isinstance(messages_data, str) parsed_messages = json.loads(messages_data) diff --git a/tests/integrations/langgraph/test_langgraph.py b/tests/integrations/langgraph/test_langgraph.py index 991c1f2269..c1e753716d 100644 --- a/tests/integrations/langgraph/test_langgraph.py +++ b/tests/integrations/langgraph/test_langgraph.py @@ -242,7 +242,6 @@ def original_compile(self, *args, **kwargs): assert "calculator" in tools_data -@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.parametrize( "send_default_pii, include_prompts", [ @@ -252,21 +251,14 @@ def original_compile(self, *args, **kwargs): (False, False), ], ) -def test_pregel_invoke( - sentry_init, - capture_events, - capture_items, - send_default_pii, - include_prompts, - stream_gen_ai_spans, -): +def test_pregel_invoke(sentry_init, capture_events, send_default_pii, include_prompts): """Test Pregel.invoke() wrapper creates proper invoke_agent span.""" sentry_init( integrations=[LanggraphIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) + events = capture_events() test_state = { "messages": [ @@ -297,134 +289,57 @@ def original_invoke(self, *args, **kwargs): ] return {"messages": new_messages} - if stream_gen_ai_spans: - items = capture_items("transaction", "span") - - with start_transaction(): - wrapped_invoke = _wrap_pregel_invoke(original_invoke) - result = wrapped_invoke(pregel, test_state) - - assert result is not None + with start_transaction(): + wrapped_invoke = _wrap_pregel_invoke(original_invoke) + result = wrapped_invoke(pregel, test_state) - spans = [item.payload for item in items if item.type == "span"] - invoke_spans = [ - span - for span in spans - if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT - ] + assert result is not None - assert len(invoke_spans) == 1 + tx = events[0] + assert tx["type"] == "transaction" - invoke_span = invoke_spans[0] + invoke_spans = [ + span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT + ] + assert len(invoke_spans) == 1 - assert invoke_span["name"] == "invoke_agent test_graph" - assert invoke_span["attributes"]["sentry.origin"] == "auto.ai.langgraph" - assert ( - invoke_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "invoke_agent" - ) - assert invoke_span["attributes"][SPANDATA.GEN_AI_PIPELINE_NAME] == "test_graph" - assert invoke_span["attributes"][SPANDATA.GEN_AI_AGENT_NAME] == "test_graph" + invoke_span = invoke_spans[0] + assert invoke_span["description"] == "invoke_agent test_graph" + assert invoke_span["origin"] == "auto.ai.langgraph" + assert invoke_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "invoke_agent" + assert invoke_span["data"][SPANDATA.GEN_AI_PIPELINE_NAME] == "test_graph" + assert invoke_span["data"][SPANDATA.GEN_AI_AGENT_NAME] == "test_graph" - if send_default_pii and include_prompts: - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["attributes"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT in invoke_span["attributes"] + if send_default_pii and include_prompts: + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["data"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT in invoke_span["data"] - request_messages = invoke_span["attributes"][ - SPANDATA.GEN_AI_REQUEST_MESSAGES - ] + request_messages = invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - if isinstance(request_messages, str): - import json + if isinstance(request_messages, str): + import json - request_messages = json.loads(request_messages) - assert len(request_messages) == 1 - assert request_messages[0]["content"] == "Of course! How can I assist you?" + request_messages = json.loads(request_messages) + assert len(request_messages) == 1 + assert request_messages[0]["content"] == "Of course! How can I assist you?" - response_text = invoke_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] - assert response_text == expected_assistant_response - - assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in invoke_span["attributes"] - tool_calls_data = invoke_span["attributes"][ - SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS - ] + response_text = invoke_span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] + assert response_text == expected_assistant_response - if isinstance(tool_calls_data, str): - import json + assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in invoke_span["data"] + tool_calls_data = invoke_span["data"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS] + if isinstance(tool_calls_data, str): + import json - tool_calls_data = json.loads(tool_calls_data) + tool_calls_data = json.loads(tool_calls_data) - assert len(tool_calls_data) == 1 - assert tool_calls_data[0]["id"] == "call_test_123" - assert tool_calls_data[0]["function"]["name"] == "search_tool" - else: - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in invoke_span.get( - "attributes", {} - ) - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in invoke_span.get( - "attributes", {} - ) - assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in invoke_span.get( - "attributes", {} - ) + assert len(tool_calls_data) == 1 + assert tool_calls_data[0]["id"] == "call_test_123" + assert tool_calls_data[0]["function"]["name"] == "search_tool" else: - events = capture_events() - - with start_transaction(): - wrapped_invoke = _wrap_pregel_invoke(original_invoke) - result = wrapped_invoke(pregel, test_state) - - assert result is not None - - tx = events[0] - assert tx["type"] == "transaction" - - invoke_spans = [ - span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT - ] - - assert len(invoke_spans) == 1 - - invoke_span = invoke_spans[0] - - assert invoke_span["description"] == "invoke_agent test_graph" - assert invoke_span["origin"] == "auto.ai.langgraph" - assert invoke_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "invoke_agent" - assert invoke_span["data"][SPANDATA.GEN_AI_PIPELINE_NAME] == "test_graph" - assert invoke_span["data"][SPANDATA.GEN_AI_AGENT_NAME] == "test_graph" - - if send_default_pii and include_prompts: - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["data"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT in invoke_span["data"] - - request_messages = invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - - if isinstance(request_messages, str): - import json - - request_messages = json.loads(request_messages) - assert len(request_messages) == 1 - assert request_messages[0]["content"] == "Of course! How can I assist you?" - - response_text = invoke_span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] - assert response_text == expected_assistant_response - - assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in invoke_span["data"] - tool_calls_data = invoke_span["data"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS] - - if isinstance(tool_calls_data, str): - import json - - tool_calls_data = json.loads(tool_calls_data) - - assert len(tool_calls_data) == 1 - assert tool_calls_data[0]["id"] == "call_test_123" - assert tool_calls_data[0]["function"]["name"] == "search_tool" - else: - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in invoke_span.get("data", {}) - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in invoke_span.get("data", {}) - assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in invoke_span.get( - "data", {} - ) + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in invoke_span.get("data", {}) + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in invoke_span.get("data", {}) + assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in invoke_span.get("data", {}) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @@ -2022,13 +1937,7 @@ def __init__(self, content, message_type="human"): assert "ai" not in roles -@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) -def test_langgraph_message_truncation( - sentry_init, - capture_events, - capture_items, - stream_gen_ai_spans, -): +def test_langgraph_message_truncation(sentry_init, capture_events): """Test that large messages are truncated properly in Langgraph integration.""" import json @@ -2036,8 +1945,8 @@ def test_langgraph_message_truncation( integrations=[LanggraphIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) + events = capture_events() large_content = ( "This is a very long message that will exceed our size limits. " * 1000 @@ -2057,66 +1966,28 @@ def test_langgraph_message_truncation( def original_invoke(self, *args, **kwargs): return {"messages": args[0].get("messages", [])} - if stream_gen_ai_spans: - items = capture_items("transaction", "span") - - with start_transaction(): - wrapped_invoke = _wrap_pregel_invoke(original_invoke) - result = wrapped_invoke(pregel, test_state) - - assert result is not None - - spans = [item.payload for item in items if item.type == "span"] - invoke_spans = [ - span - for span in spans - if span["attributes"].get("sentry.op") == OP.GEN_AI_INVOKE_AGENT - ] - - assert len(invoke_spans) > 0 + with start_transaction(): + wrapped_invoke = _wrap_pregel_invoke(original_invoke) + result = wrapped_invoke(pregel, test_state) - invoke_span = invoke_spans[0] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["attributes"] - - messages_data = invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - - assert isinstance(messages_data, str) - - parsed_messages = json.loads(messages_data) - assert isinstance(parsed_messages, list) - assert len(parsed_messages) == 1 - assert "small message 5" in str(parsed_messages[0]) - (tx,) = (item.payload for item in items if item.type == "transaction") - else: - events = capture_events() - - with start_transaction(): - wrapped_invoke = _wrap_pregel_invoke(original_invoke) - result = wrapped_invoke(pregel, test_state) - - assert result is not None - - assert len(events) > 0 - tx = events[0] - assert tx["type"] == "transaction" - - invoke_spans = [ - span - for span in tx.get("spans", []) - if span.get("op") == OP.GEN_AI_INVOKE_AGENT - ] - - assert len(invoke_spans) > 0 + assert result is not None + assert len(events) > 0 + tx = events[0] + assert tx["type"] == "transaction" - invoke_span = invoke_spans[0] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["data"] + invoke_spans = [ + span for span in tx.get("spans", []) if span.get("op") == OP.GEN_AI_INVOKE_AGENT + ] + assert len(invoke_spans) > 0 - messages_data = invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - assert isinstance(messages_data, str) + invoke_span = invoke_spans[0] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["data"] - parsed_messages = json.loads(messages_data) - assert isinstance(parsed_messages, list) - assert len(parsed_messages) == 1 - assert "small message 5" in str(parsed_messages[0]) + messages_data = invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + assert isinstance(messages_data, str) + parsed_messages = json.loads(messages_data) + assert isinstance(parsed_messages, list) + assert len(parsed_messages) == 1 + assert "small message 5" in str(parsed_messages[0]) assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5 diff --git a/tests/integrations/litellm/test_litellm.py b/tests/integrations/litellm/test_litellm.py index 8ae8dca99e..404cdeb9c4 100644 --- a/tests/integrations/litellm/test_litellm.py +++ b/tests/integrations/litellm/test_litellm.py @@ -2341,20 +2341,14 @@ def test_integration_setup(sentry_init): assert _failure_callback in (litellm.failure_callback or []) -@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) -def test_litellm_message_truncation( - sentry_init, - capture_events, - capture_items, - stream_gen_ai_spans, -): +def test_litellm_message_truncation(sentry_init, capture_events): """Test that large messages are truncated properly in LiteLLM integration.""" sentry_init( integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) + events = capture_events() large_content = ( "This is a very long message that will exceed our size limits. " * 1000 @@ -2368,79 +2362,39 @@ def test_litellm_message_truncation( ] mock_response = MockCompletionResponse() - if stream_gen_ai_spans: - items = capture_items("transaction", "span") - - with start_transaction(name="litellm test"): - kwargs = { - "model": "gpt-3.5-turbo", - "messages": messages, - } - - _input_callback(kwargs) - _success_callback( - kwargs, - mock_response, - datetime.now(), - datetime.now(), - ) - - spans = [item.payload for item in items if item.type == "span"] - chat_spans = [ - span - for span in spans - if span["attributes"].get("sentry.op") == OP.GEN_AI_CHAT - ] - - assert len(chat_spans) > 0 - - chat_span = chat_spans[0] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["attributes"] - - messages_data = chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - assert isinstance(messages_data, str) - - parsed_messages = json.loads(messages_data) - assert isinstance(parsed_messages, list) - assert len(parsed_messages) == 1 - assert "small message 5" in str(parsed_messages[0]) - tx = next(item.payload for item in items if item.type == "transaction") - else: - events = capture_events() - - with start_transaction(name="litellm test"): - kwargs = { - "model": "gpt-3.5-turbo", - "messages": messages, - } + with start_transaction(name="litellm test"): + kwargs = { + "model": "gpt-3.5-turbo", + "messages": messages, + } - _input_callback(kwargs) - _success_callback( - kwargs, - mock_response, - datetime.now(), - datetime.now(), - ) + _input_callback(kwargs) + _success_callback( + kwargs, + mock_response, + datetime.now(), + datetime.now(), + ) - assert len(events) > 0 - tx = events[0] - assert tx["type"] == "transaction" + assert len(events) > 0 + tx = events[0] + assert tx["type"] == "transaction" - chat_spans = [ - span for span in tx.get("spans", []) if span.get("op") == OP.GEN_AI_CHAT - ] - assert len(chat_spans) > 0 + chat_spans = [ + span for span in tx.get("spans", []) if span.get("op") == OP.GEN_AI_CHAT + ] + assert len(chat_spans) > 0 - chat_span = chat_spans[0] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["data"] + chat_span = chat_spans[0] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["data"] - messages_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - assert isinstance(messages_data, str) + messages_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + assert isinstance(messages_data, str) - parsed_messages = json.loads(messages_data) - assert isinstance(parsed_messages, list) - assert len(parsed_messages) == 1 - assert "small message 5" in str(parsed_messages[0]) + parsed_messages = json.loads(messages_data) + assert isinstance(parsed_messages, list) + assert len(parsed_messages) == 1 + assert "small message 5" in str(parsed_messages[0]) assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5 diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py index 0da39e842d..56ac885619 100644 --- a/tests/integrations/openai/test_openai.py +++ b/tests/integrations/openai/test_openai.py @@ -5720,21 +5720,16 @@ def test_openai_message_role_mapping( assert stored_messages[0]["role"] == expected_role -@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) def test_openai_message_truncation( - sentry_init, - capture_events, - capture_items, - nonstreaming_chat_completions_model_response, - stream_gen_ai_spans, + sentry_init, capture_events, nonstreaming_chat_completions_model_response ): """Test that large messages are truncated properly in OpenAI integration.""" sentry_init( integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, ) + events = capture_events() client = OpenAI(api_key="z") client.chat.completions._post = mock.Mock( @@ -5761,47 +5756,22 @@ def test_openai_message_truncation( {"role": "user", "content": large_content}, ] - if stream_gen_ai_spans: - items = capture_items("transaction", "span") - - with start_transaction(name="openai tx"): - client.chat.completions.create( - model="some-model", - messages=large_messages, - ) - - span = next(item.payload for item in items if item.type == "span") - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"] - - messages_data = span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - - assert isinstance(messages_data, str) - - parsed_messages = json.loads(messages_data) - assert isinstance(parsed_messages, list) - assert len(parsed_messages) <= len(large_messages) - - (event,) = (item.payload for item in items if item.type == "transaction") - else: - events = capture_events() - - with start_transaction(name="openai tx"): - client.chat.completions.create( - model="some-model", - messages=large_messages, - ) - - (event,) = events - span = event["spans"][0] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] + with start_transaction(name="openai tx"): + client.chat.completions.create( + model="some-model", + messages=large_messages, + ) - messages_data = span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + (event,) = events + span = event["spans"][0] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] - assert isinstance(messages_data, str) + messages_data = span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + assert isinstance(messages_data, str) - parsed_messages = json.loads(messages_data) - assert isinstance(parsed_messages, list) - assert len(parsed_messages) <= len(large_messages) + parsed_messages = json.loads(messages_data) + assert isinstance(parsed_messages, list) + assert len(parsed_messages) <= len(large_messages) meta_path = event["_meta"] span_meta = meta_path["spans"]["0"]["data"] diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py index 60f88cd7f4..f15bac5c64 100644 --- a/tests/integrations/openai_agents/test_openai_agents.py +++ b/tests/integrations/openai_agents/test_openai_agents.py @@ -1679,16 +1679,13 @@ async def test_max_turns_before_handoff_span( assert handoff_span["data"]["gen_ai.operation.name"] == "handoff" -@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_tool_execution_span( sentry_init, capture_events, - capture_items, test_agent, get_model_response, responses_tool_call_model_responses, - stream_gen_ai_spans, ): """ Test tool execution span creation. @@ -1746,413 +1743,195 @@ def simple_test_tool(message: str) -> str: serialize_pydantic=True, ) - if stream_gen_ai_spans: - with patch.object( - agent_with_tool.model._client._client, - "send", - side_effect=[tool_response, final_response], - ) as _: - sentry_init( - integrations=[OpenAIAgentsIntegration()], - traces_sample_rate=1.0, - send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, - ) - - items = capture_items("transaction", "span") - - await agents.Runner.run( - agent_with_tool, - "Please use the simple test tool", - run_config=test_run_config, - ) - - (transaction,) = (item.payload for item in items if item.type == "transaction") - assert transaction["transaction"] == "test_agent workflow" - assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents" - - spans = [item.payload for item in items if item.type == "span"] - agent_span = next( - span - for span in spans - if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT - ) - ai_client_span1, ai_client_span2 = ( - span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT - ) - tool_span = next( - span - for span in spans - if span["attributes"]["sentry.op"] == OP.GEN_AI_EXECUTE_TOOL + with patch.object( + agent_with_tool.model._client._client, + "send", + side_effect=[tool_response, final_response], + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + send_default_pii=True, ) - available_tool = { - "name": "simple_test_tool", - "description": "A simple tool", - "params_json_schema": { - "properties": {"message": {"title": "Message", "type": "string"}}, - "required": ["message"], - "title": "simple_test_tool_args", - "type": "object", - "additionalProperties": False, - }, - "on_invoke_tool": mock.ANY, - "strict_json_schema": True, - "is_enabled": True, - } - - if parse_version(OPENAI_AGENTS_VERSION) >= (0, 3, 3): - available_tool.update( - {"tool_input_guardrails": None, "tool_output_guardrails": None} - ) - - if parse_version(OPENAI_AGENTS_VERSION) >= ( - 0, - 8, - ): - available_tool["needs_approval"] = False - if parse_version(OPENAI_AGENTS_VERSION) >= ( - 0, - 9, - 0, - ): - available_tool.update( - { - "timeout_seconds": None, - "timeout_behavior": "error_as_result", - "timeout_error_function": None, - } - ) - - assert agent_span["name"] == "invoke_agent test_agent" - assert agent_span["attributes"]["sentry.origin"] == "auto.ai.openai_agents" - assert agent_span["attributes"]["gen_ai.agent.name"] == "test_agent" - assert agent_span["attributes"]["gen_ai.operation.name"] == "invoke_agent" - - agent_span_available_tool = json.loads( - agent_span["attributes"]["gen_ai.request.available_tools"] - )[0] - - assert all(agent_span_available_tool[k] == v for k, v in available_tool.items()) - - assert agent_span["attributes"]["gen_ai.request.max_tokens"] == 100 - assert agent_span["attributes"]["gen_ai.request.model"] == "gpt-4" - assert agent_span["attributes"]["gen_ai.request.temperature"] == 0.7 - assert agent_span["attributes"]["gen_ai.request.top_p"] == 1.0 - assert agent_span["attributes"]["gen_ai.system"] == "openai" - - assert ai_client_span1["name"] == "chat gpt-4" - assert ai_client_span1["attributes"]["gen_ai.operation.name"] == "chat" - assert ai_client_span1["attributes"]["gen_ai.system"] == "openai" - assert ai_client_span1["attributes"]["gen_ai.agent.name"] == "test_agent" - - ai_client_span1_available_tool = json.loads( - ai_client_span1["attributes"]["gen_ai.request.available_tools"] - )[0] + events = capture_events() - assert all( - ai_client_span1_available_tool[k] == v for k, v in available_tool.items() - ) - - assert ai_client_span1["attributes"]["gen_ai.request.max_tokens"] == 100 - assert ai_client_span1["attributes"][ - "gen_ai.request.messages" - ] == safe_serialize( - [ - { - "role": "user", - "content": [ - {"type": "text", "text": "Please use the simple test tool"} - ], - }, - ] - ) - assert ai_client_span1["attributes"]["gen_ai.request.model"] == "gpt-4" - assert ai_client_span1["attributes"]["gen_ai.request.temperature"] == 0.7 - assert ai_client_span1["attributes"]["gen_ai.request.top_p"] == 1.0 - assert ai_client_span1["attributes"]["gen_ai.usage.input_tokens"] == 10 - assert ai_client_span1["attributes"]["gen_ai.usage.input_tokens.cached"] == 0 - assert ai_client_span1["attributes"]["gen_ai.usage.output_tokens"] == 5 - assert ( - ai_client_span1["attributes"]["gen_ai.usage.output_tokens.reasoning"] == 0 - ) - assert ai_client_span1["attributes"]["gen_ai.usage.total_tokens"] == 15 - - tool_call = { - "arguments": '{"message": "hello"}', - "call_id": "call_123", - "name": "simple_test_tool", - "type": "function_call", - "id": "call_123", - "status": None, - } - - if OPENAI_VERSION >= (2, 25, 0): - tool_call["namespace"] = None - - assert json.loads( - ai_client_span1["attributes"]["gen_ai.response.tool_calls"] - ) == [tool_call] - - assert tool_span["name"] == "execute_tool simple_test_tool" - assert tool_span["attributes"]["gen_ai.agent.name"] == "test_agent" - assert tool_span["attributes"]["gen_ai.operation.name"] == "execute_tool" - - tool_span_available_tool = json.loads( - tool_span["attributes"]["gen_ai.request.available_tools"] - )[0] - - assert all(tool_span_available_tool[k] == v for k, v in available_tool.items()) - - assert tool_span["attributes"]["gen_ai.request.max_tokens"] == 100 - assert tool_span["attributes"]["gen_ai.request.model"] == "gpt-4" - assert tool_span["attributes"]["gen_ai.request.temperature"] == 0.7 - assert tool_span["attributes"]["gen_ai.request.top_p"] == 1.0 - assert tool_span["attributes"]["gen_ai.system"] == "openai" - assert tool_span["attributes"]["gen_ai.tool.description"] == "A simple tool" - assert tool_span["attributes"]["gen_ai.tool.input"] == '{"message": "hello"}' - assert tool_span["attributes"]["gen_ai.tool.name"] == "simple_test_tool" - assert ( - tool_span["attributes"]["gen_ai.tool.output"] == "Tool executed with: hello" + await agents.Runner.run( + agent_with_tool, + "Please use the simple test tool", + run_config=test_run_config, ) - assert ai_client_span2["name"] == "chat gpt-4" - assert ai_client_span2["attributes"]["gen_ai.agent.name"] == "test_agent" - assert ai_client_span2["attributes"]["gen_ai.operation.name"] == "chat" - ai_client_span2_available_tool = json.loads( - ai_client_span2["attributes"]["gen_ai.request.available_tools"] - )[0] - - assert all( - ai_client_span2_available_tool[k] == v for k, v in available_tool.items() + (transaction,) = events + spans = transaction["spans"] + agent_span = next(span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT) + ai_client_span1, ai_client_span2 = ( + span for span in spans if span["op"] == OP.GEN_AI_CHAT + ) + tool_span = next(span for span in spans if span["op"] == OP.GEN_AI_EXECUTE_TOOL) + + available_tool = { + "name": "simple_test_tool", + "description": "A simple tool", + "params_json_schema": { + "properties": {"message": {"title": "Message", "type": "string"}}, + "required": ["message"], + "title": "simple_test_tool_args", + "type": "object", + "additionalProperties": False, + }, + "on_invoke_tool": mock.ANY, + "strict_json_schema": True, + "is_enabled": True, + } + + if parse_version(OPENAI_AGENTS_VERSION) >= (0, 3, 3): + available_tool.update( + {"tool_input_guardrails": None, "tool_output_guardrails": None} ) - assert ai_client_span2["attributes"]["gen_ai.request.max_tokens"] == 100 - assert ai_client_span2["attributes"][ - "gen_ai.request.messages" - ] == safe_serialize( - [ - { - "role": "tool", - "content": [ - { - "call_id": "call_123", - "output": "Tool executed with: hello", - "type": "function_call_output", - } - ], - }, - ] - ) - assert ai_client_span2["attributes"]["gen_ai.request.model"] == "gpt-4" - assert ai_client_span2["attributes"]["gen_ai.request.temperature"] == 0.7 - assert ai_client_span2["attributes"]["gen_ai.request.top_p"] == 1.0 - assert ( - ai_client_span2["attributes"]["gen_ai.response.text"] - == "Task completed using the tool" - ) - assert ai_client_span2["attributes"]["gen_ai.system"] == "openai" - assert ai_client_span2["attributes"]["gen_ai.usage.input_tokens.cached"] == 0 - assert ai_client_span2["attributes"]["gen_ai.usage.input_tokens"] == 15 - assert ( - ai_client_span2["attributes"]["gen_ai.usage.output_tokens.reasoning"] == 0 + if parse_version(OPENAI_AGENTS_VERSION) >= ( + 0, + 8, + ): + available_tool["needs_approval"] = False + if parse_version(OPENAI_AGENTS_VERSION) >= ( + 0, + 9, + 0, + ): + available_tool.update( + { + "timeout_seconds": None, + "timeout_behavior": "error_as_result", + "timeout_error_function": None, + } ) - assert ai_client_span2["attributes"]["gen_ai.usage.output_tokens"] == 10 - assert ai_client_span2["attributes"]["gen_ai.usage.total_tokens"] == 25 - else: - with patch.object( - agent_with_tool.model._client._client, - "send", - side_effect=[tool_response, final_response], - ) as _: - sentry_init( - integrations=[OpenAIAgentsIntegration()], - traces_sample_rate=1.0, - send_default_pii=True, - _experiments={"stream_gen_ai_spans": stream_gen_ai_spans}, - ) - - events = capture_events() - - await agents.Runner.run( - agent_with_tool, - "Please use the simple test tool", - run_config=test_run_config, - ) + assert transaction["transaction"] == "test_agent workflow" + assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents" + + assert agent_span["description"] == "invoke_agent test_agent" + assert agent_span["origin"] == "auto.ai.openai_agents" + assert agent_span["data"]["gen_ai.agent.name"] == "test_agent" + assert agent_span["data"]["gen_ai.operation.name"] == "invoke_agent" + + agent_span_available_tool = json.loads( + agent_span["data"]["gen_ai.request.available_tools"] + )[0] + assert all(agent_span_available_tool[k] == v for k, v in available_tool.items()) + + assert agent_span["data"]["gen_ai.request.max_tokens"] == 100 + assert agent_span["data"]["gen_ai.request.model"] == "gpt-4" + assert agent_span["data"]["gen_ai.request.temperature"] == 0.7 + assert agent_span["data"]["gen_ai.request.top_p"] == 1.0 + assert agent_span["data"]["gen_ai.system"] == "openai" + + assert ai_client_span1["description"] == "chat gpt-4" + assert ai_client_span1["data"]["gen_ai.operation.name"] == "chat" + assert ai_client_span1["data"]["gen_ai.system"] == "openai" + assert ai_client_span1["data"]["gen_ai.agent.name"] == "test_agent" + + ai_client_span1_available_tool = json.loads( + ai_client_span1["data"]["gen_ai.request.available_tools"] + )[0] + assert all( + ai_client_span1_available_tool[k] == v for k, v in available_tool.items() + ) - (transaction,) = events - spans = transaction["spans"] - agent_span = next( - span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT - ) - ai_client_span1, ai_client_span2 = ( - span for span in spans if span["op"] == OP.GEN_AI_CHAT - ) - tool_span = next(span for span in spans if span["op"] == OP.GEN_AI_EXECUTE_TOOL) - - available_tool = { - "name": "simple_test_tool", - "description": "A simple tool", - "params_json_schema": { - "properties": {"message": {"title": "Message", "type": "string"}}, - "required": ["message"], - "title": "simple_test_tool_args", - "type": "object", - "additionalProperties": False, + assert ai_client_span1["data"]["gen_ai.request.max_tokens"] == 100 + assert ai_client_span1["data"]["gen_ai.request.messages"] == safe_serialize( + [ + { + "role": "user", + "content": [ + {"type": "text", "text": "Please use the simple test tool"} + ], }, - "on_invoke_tool": mock.ANY, - "strict_json_schema": True, - "is_enabled": True, - } - - if parse_version(OPENAI_AGENTS_VERSION) >= (0, 3, 3): - available_tool.update( - {"tool_input_guardrails": None, "tool_output_guardrails": None} - ) - - if parse_version(OPENAI_AGENTS_VERSION) >= ( - 0, - 8, - ): - available_tool["needs_approval"] = False - if parse_version(OPENAI_AGENTS_VERSION) >= ( - 0, - 9, - 0, - ): - available_tool.update( - { - "timeout_seconds": None, - "timeout_behavior": "error_as_result", - "timeout_error_function": None, - } - ) - - assert transaction["transaction"] == "test_agent workflow" - assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents" - - assert agent_span["description"] == "invoke_agent test_agent" - assert agent_span["origin"] == "auto.ai.openai_agents" - assert agent_span["data"]["gen_ai.agent.name"] == "test_agent" - assert agent_span["data"]["gen_ai.operation.name"] == "invoke_agent" - - agent_span_available_tool = json.loads( - agent_span["data"]["gen_ai.request.available_tools"] - )[0] - assert all(agent_span_available_tool[k] == v for k, v in available_tool.items()) - - assert agent_span["data"]["gen_ai.request.max_tokens"] == 100 - assert agent_span["data"]["gen_ai.request.model"] == "gpt-4" - assert agent_span["data"]["gen_ai.request.temperature"] == 0.7 - assert agent_span["data"]["gen_ai.request.top_p"] == 1.0 - assert agent_span["data"]["gen_ai.system"] == "openai" - - assert ai_client_span1["description"] == "chat gpt-4" - assert ai_client_span1["data"]["gen_ai.operation.name"] == "chat" - assert ai_client_span1["data"]["gen_ai.system"] == "openai" - assert ai_client_span1["data"]["gen_ai.agent.name"] == "test_agent" - - ai_client_span1_available_tool = json.loads( - ai_client_span1["data"]["gen_ai.request.available_tools"] - )[0] - assert all( - ai_client_span1_available_tool[k] == v for k, v in available_tool.items() - ) - - assert ai_client_span1["data"]["gen_ai.request.max_tokens"] == 100 - assert ai_client_span1["data"]["gen_ai.request.messages"] == safe_serialize( - [ - { - "role": "user", - "content": [ - {"type": "text", "text": "Please use the simple test tool"} - ], - }, - ] - ) - assert ai_client_span1["data"]["gen_ai.request.model"] == "gpt-4" - assert ai_client_span1["data"]["gen_ai.request.temperature"] == 0.7 - assert ai_client_span1["data"]["gen_ai.request.top_p"] == 1.0 - assert ai_client_span1["data"]["gen_ai.usage.input_tokens"] == 10 - assert ai_client_span1["data"]["gen_ai.usage.input_tokens.cached"] == 0 - assert ai_client_span1["data"]["gen_ai.usage.output_tokens"] == 5 - assert ai_client_span1["data"]["gen_ai.usage.output_tokens.reasoning"] == 0 - assert ai_client_span1["data"]["gen_ai.usage.total_tokens"] == 15 - - tool_call = { - "arguments": '{"message": "hello"}', - "call_id": "call_123", - "name": "simple_test_tool", - "type": "function_call", - "id": "call_123", - "status": None, - } - - if OPENAI_VERSION >= (2, 25, 0): - tool_call["namespace"] = None - - assert json.loads(ai_client_span1["data"]["gen_ai.response.tool_calls"]) == [ - tool_call ] + ) + assert ai_client_span1["data"]["gen_ai.request.model"] == "gpt-4" + assert ai_client_span1["data"]["gen_ai.request.temperature"] == 0.7 + assert ai_client_span1["data"]["gen_ai.request.top_p"] == 1.0 + assert ai_client_span1["data"]["gen_ai.usage.input_tokens"] == 10 + assert ai_client_span1["data"]["gen_ai.usage.input_tokens.cached"] == 0 + assert ai_client_span1["data"]["gen_ai.usage.output_tokens"] == 5 + assert ai_client_span1["data"]["gen_ai.usage.output_tokens.reasoning"] == 0 + assert ai_client_span1["data"]["gen_ai.usage.total_tokens"] == 15 + + tool_call = { + "arguments": '{"message": "hello"}', + "call_id": "call_123", + "name": "simple_test_tool", + "type": "function_call", + "id": "call_123", + "status": None, + } + + if OPENAI_VERSION >= (2, 25, 0): + tool_call["namespace"] = None + + assert json.loads(ai_client_span1["data"]["gen_ai.response.tool_calls"]) == [ + tool_call + ] - assert tool_span["description"] == "execute_tool simple_test_tool" - assert tool_span["data"]["gen_ai.agent.name"] == "test_agent" - assert tool_span["data"]["gen_ai.operation.name"] == "execute_tool" - - tool_span_available_tool = json.loads( - tool_span["data"]["gen_ai.request.available_tools"] - )[0] - assert all(tool_span_available_tool[k] == v for k, v in available_tool.items()) - - assert tool_span["data"]["gen_ai.request.max_tokens"] == 100 - assert tool_span["data"]["gen_ai.request.model"] == "gpt-4" - assert tool_span["data"]["gen_ai.request.temperature"] == 0.7 - assert tool_span["data"]["gen_ai.request.top_p"] == 1.0 - assert tool_span["data"]["gen_ai.system"] == "openai" - assert tool_span["data"]["gen_ai.tool.description"] == "A simple tool" - assert tool_span["data"]["gen_ai.tool.input"] == '{"message": "hello"}' - assert tool_span["data"]["gen_ai.tool.name"] == "simple_test_tool" - assert tool_span["data"]["gen_ai.tool.output"] == "Tool executed with: hello" - assert ai_client_span2["description"] == "chat gpt-4" - assert ai_client_span2["data"]["gen_ai.agent.name"] == "test_agent" - assert ai_client_span2["data"]["gen_ai.operation.name"] == "chat" - - ai_client_span2_available_tool = json.loads( - ai_client_span2["data"]["gen_ai.request.available_tools"] - )[0] - assert all( - ai_client_span2_available_tool[k] == v for k, v in available_tool.items() - ) + assert tool_span["description"] == "execute_tool simple_test_tool" + assert tool_span["data"]["gen_ai.agent.name"] == "test_agent" + assert tool_span["data"]["gen_ai.operation.name"] == "execute_tool" + + tool_span_available_tool = json.loads( + tool_span["data"]["gen_ai.request.available_tools"] + )[0] + assert all(tool_span_available_tool[k] == v for k, v in available_tool.items()) + + assert tool_span["data"]["gen_ai.request.max_tokens"] == 100 + assert tool_span["data"]["gen_ai.request.model"] == "gpt-4" + assert tool_span["data"]["gen_ai.request.temperature"] == 0.7 + assert tool_span["data"]["gen_ai.request.top_p"] == 1.0 + assert tool_span["data"]["gen_ai.system"] == "openai" + assert tool_span["data"]["gen_ai.tool.description"] == "A simple tool" + assert tool_span["data"]["gen_ai.tool.input"] == '{"message": "hello"}' + assert tool_span["data"]["gen_ai.tool.name"] == "simple_test_tool" + assert tool_span["data"]["gen_ai.tool.output"] == "Tool executed with: hello" + assert ai_client_span2["description"] == "chat gpt-4" + assert ai_client_span2["data"]["gen_ai.agent.name"] == "test_agent" + assert ai_client_span2["data"]["gen_ai.operation.name"] == "chat" + + ai_client_span2_available_tool = json.loads( + ai_client_span2["data"]["gen_ai.request.available_tools"] + )[0] + assert all( + ai_client_span2_available_tool[k] == v for k, v in available_tool.items() + ) - assert ai_client_span2["data"]["gen_ai.request.max_tokens"] == 100 - assert ai_client_span2["data"]["gen_ai.request.messages"] == safe_serialize( - [ - { - "role": "tool", - "content": [ - { - "call_id": "call_123", - "output": "Tool executed with: hello", - "type": "function_call_output", - } - ], - }, - ] - ) - assert ai_client_span2["data"]["gen_ai.request.model"] == "gpt-4" - assert ai_client_span2["data"]["gen_ai.request.temperature"] == 0.7 - assert ai_client_span2["data"]["gen_ai.request.top_p"] == 1.0 - assert ( - ai_client_span2["data"]["gen_ai.response.text"] - == "Task completed using the tool" - ) - assert ai_client_span2["data"]["gen_ai.system"] == "openai" - assert ai_client_span2["data"]["gen_ai.usage.input_tokens.cached"] == 0 - assert ai_client_span2["data"]["gen_ai.usage.input_tokens"] == 15 - assert ai_client_span2["data"]["gen_ai.usage.output_tokens.reasoning"] == 0 - assert ai_client_span2["data"]["gen_ai.usage.output_tokens"] == 10 - assert ai_client_span2["data"]["gen_ai.usage.total_tokens"] == 25 + assert ai_client_span2["data"]["gen_ai.request.max_tokens"] == 100 + assert ai_client_span2["data"]["gen_ai.request.messages"] == safe_serialize( + [ + { + "role": "tool", + "content": [ + { + "call_id": "call_123", + "output": "Tool executed with: hello", + "type": "function_call_output", + } + ], + }, + ] + ) + assert ai_client_span2["data"]["gen_ai.request.model"] == "gpt-4" + assert ai_client_span2["data"]["gen_ai.request.temperature"] == 0.7 + assert ai_client_span2["data"]["gen_ai.request.top_p"] == 1.0 + assert ( + ai_client_span2["data"]["gen_ai.response.text"] + == "Task completed using the tool" + ) + assert ai_client_span2["data"]["gen_ai.system"] == "openai" + assert ai_client_span2["data"]["gen_ai.usage.input_tokens.cached"] == 0 + assert ai_client_span2["data"]["gen_ai.usage.input_tokens"] == 15 + assert ai_client_span2["data"]["gen_ai.usage.output_tokens.reasoning"] == 0 + assert ai_client_span2["data"]["gen_ai.usage.output_tokens"] == 10 + assert ai_client_span2["data"]["gen_ai.usage.total_tokens"] == 25 @pytest.mark.asyncio From 5e8c254da212e907d24571911fe54dc9555b074d Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Tue, 12 May 2026 15:59:06 +0200 Subject: [PATCH 02/10] add pytest mark asyncio --- tests/integrations/anthropic/test_anthropic.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integrations/anthropic/test_anthropic.py b/tests/integrations/anthropic/test_anthropic.py index 02de047711..21e6c95100 100644 --- a/tests/integrations/anthropic/test_anthropic.py +++ b/tests/integrations/anthropic/test_anthropic.py @@ -3676,6 +3676,7 @@ def test_anthropic_message_truncation(sentry_init, capture_events): assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5 +@pytest.mark.asyncio async def test_anthropic_message_truncation_async(sentry_init, capture_events): """Test that large messages are truncated properly in Anthropic integration.""" sentry_init( From c948c14af1ef232d7653444d74683c46a85bbff5 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Tue, 12 May 2026 20:12:15 +0200 Subject: [PATCH 03/10] update to non-experimental option --- sentry_sdk/ai/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sentry_sdk/ai/utils.py b/sentry_sdk/ai/utils.py index 4bd65ced76..fb9edcd335 100644 --- a/sentry_sdk/ai/utils.py +++ b/sentry_sdk/ai/utils.py @@ -742,7 +742,7 @@ def truncate_and_annotate_messages( max_single_message_chars: int = MAX_SINGLE_MESSAGE_CONTENT_CHARS, ) -> "Optional[List[Dict[str, Any]]]": client = sentry_sdk.get_client() - if client.options["_experiments"].get("stream_gen_ai_spans", False): + if client.options.get("stream_gen_ai_spans", False): return messages if not messages: @@ -766,7 +766,7 @@ def truncate_and_annotate_embedding_inputs( max_bytes: int = MAX_GEN_AI_MESSAGE_BYTES, ) -> "Optional[List[Dict[str, Any]]]": client = sentry_sdk.get_client() - if client.options["_experiments"].get("stream_gen_ai_spans", False): + if client.options.get("stream_gen_ai_spans", False): return messages if not messages: From 398559b8d8b87ed712b52651dbbbc5bdc4ad94b9 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Tue, 12 May 2026 20:32:39 +0200 Subject: [PATCH 04/10] restore legitimate test --- .../openai_agents/test_openai_agents.py | 589 ++++++++++++------ 1 file changed, 414 insertions(+), 175 deletions(-) diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py index aa2dcab76e..4752ac0376 100644 --- a/tests/integrations/openai_agents/test_openai_agents.py +++ b/tests/integrations/openai_agents/test_openai_agents.py @@ -1679,13 +1679,16 @@ async def test_max_turns_before_handoff_span( assert handoff_span["data"]["gen_ai.operation.name"] == "handoff" +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_tool_execution_span( sentry_init, capture_events, + capture_items, test_agent, get_model_response, responses_tool_call_model_responses, + stream_gen_ai_spans, ): """ Test tool execution span creation. @@ -1743,195 +1746,431 @@ def simple_test_tool(message: str) -> str: serialize_pydantic=True, ) - with patch.object( - agent_with_tool.model._client._client, - "send", - side_effect=[tool_response, final_response], - ) as _: - sentry_init( - integrations=[OpenAIAgentsIntegration()], - traces_sample_rate=1.0, - send_default_pii=True, + if stream_gen_ai_spans: + with patch.object( + agent_with_tool.model._client._client, + "send", + side_effect=[tool_response, final_response], + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + send_default_pii=True, + stream_gen_ai_spans=stream_gen_ai_spans, + ) + + items = capture_items("transaction", "span") + + await agents.Runner.run( + agent_with_tool, + "Please use the simple test tool", + run_config=test_run_config, + ) + + (transaction,) = (item.payload for item in items if item.type == "transaction") + assert transaction["transaction"] == "test_agent workflow" + assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents" + + spans = [item.payload for item in items if item.type == "span"] + agent_span = next( + span + for span in spans + if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT + ) + ai_client_span1, ai_client_span2 = ( + span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + ) + tool_span = next( + span + for span in spans + if span["attributes"]["sentry.op"] == OP.GEN_AI_EXECUTE_TOOL ) - events = capture_events() + available_tool = { + "name": "simple_test_tool", + "description": "A simple tool", + "params_json_schema": { + "properties": {"message": {"title": "Message", "type": "string"}}, + "required": ["message"], + "title": "simple_test_tool_args", + "type": "object", + "additionalProperties": False, + }, + "on_invoke_tool": mock.ANY, + "strict_json_schema": True, + "is_enabled": True, + } + + if parse_version(OPENAI_AGENTS_VERSION) >= (0, 3, 3): + available_tool.update( + {"tool_input_guardrails": None, "tool_output_guardrails": None} + ) + + if parse_version(OPENAI_AGENTS_VERSION) >= ( + 0, + 8, + ): + available_tool["needs_approval"] = False + if parse_version(OPENAI_AGENTS_VERSION) >= ( + 0, + 9, + 0, + ): + available_tool.update( + { + "timeout_seconds": None, + "timeout_behavior": "error_as_result", + "timeout_error_function": None, + } + ) - await agents.Runner.run( - agent_with_tool, - "Please use the simple test tool", - run_config=test_run_config, + assert agent_span["name"] == "invoke_agent test_agent" + assert agent_span["attributes"]["sentry.origin"] == "auto.ai.openai_agents" + assert agent_span["attributes"]["gen_ai.agent.name"] == "test_agent" + assert agent_span["attributes"]["gen_ai.operation.name"] == "invoke_agent" + + agent_span_available_tool = json.loads( + agent_span["attributes"]["gen_ai.request.available_tools"] + )[0] + + assert all(agent_span_available_tool[k] == v for k, v in available_tool.items()) + + assert agent_span["attributes"]["gen_ai.request.max_tokens"] == 100 + assert agent_span["attributes"]["gen_ai.request.model"] == "gpt-4" + assert agent_span["attributes"]["gen_ai.request.temperature"] == 0.7 + assert agent_span["attributes"]["gen_ai.request.top_p"] == 1.0 + assert agent_span["attributes"]["gen_ai.system"] == "openai" + + assert ai_client_span1["name"] == "chat gpt-4" + assert ai_client_span1["attributes"]["gen_ai.operation.name"] == "chat" + assert ai_client_span1["attributes"]["gen_ai.system"] == "openai" + assert ai_client_span1["attributes"]["gen_ai.agent.name"] == "test_agent" + + ai_client_span1_available_tool = json.loads( + ai_client_span1["attributes"]["gen_ai.request.available_tools"] + )[0] + + assert all( + ai_client_span1_available_tool[k] == v for k, v in available_tool.items() ) - (transaction,) = events - spans = transaction["spans"] - agent_span = next(span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT) - ai_client_span1, ai_client_span2 = ( - span for span in spans if span["op"] == OP.GEN_AI_CHAT - ) - tool_span = next(span for span in spans if span["op"] == OP.GEN_AI_EXECUTE_TOOL) - - available_tool = { - "name": "simple_test_tool", - "description": "A simple tool", - "params_json_schema": { - "properties": {"message": {"title": "Message", "type": "string"}}, - "required": ["message"], - "title": "simple_test_tool_args", - "type": "object", - "additionalProperties": False, - }, - "on_invoke_tool": mock.ANY, - "strict_json_schema": True, - "is_enabled": True, - } - - if parse_version(OPENAI_AGENTS_VERSION) >= (0, 3, 3): - available_tool.update( - {"tool_input_guardrails": None, "tool_output_guardrails": None} + assert ai_client_span1["attributes"]["gen_ai.request.max_tokens"] == 100 + assert ai_client_span1["attributes"][ + "gen_ai.request.messages" + ] == safe_serialize( + [ + { + "role": "user", + "content": [ + {"type": "text", "text": "Please use the simple test tool"} + ], + }, + ] + ) + assert ai_client_span1["attributes"]["gen_ai.request.model"] == "gpt-4" + assert ai_client_span1["attributes"]["gen_ai.request.temperature"] == 0.7 + assert ai_client_span1["attributes"]["gen_ai.request.top_p"] == 1.0 + assert ai_client_span1["attributes"]["gen_ai.usage.input_tokens"] == 10 + assert ai_client_span1["attributes"]["gen_ai.usage.input_tokens.cached"] == 0 + assert ai_client_span1["attributes"]["gen_ai.usage.output_tokens"] == 5 + assert ( + ai_client_span1["attributes"]["gen_ai.usage.output_tokens.reasoning"] == 0 + ) + assert ai_client_span1["attributes"]["gen_ai.usage.total_tokens"] == 15 + + tool_call = { + "arguments": '{"message": "hello"}', + "call_id": "call_123", + "name": "simple_test_tool", + "type": "function_call", + "id": "call_123", + "status": None, + } + + if OPENAI_VERSION >= (2, 25, 0): + tool_call["namespace"] = None + + assert json.loads( + ai_client_span1["attributes"]["gen_ai.response.tool_calls"] + ) == [tool_call] + + assert tool_span["name"] == "execute_tool simple_test_tool" + assert tool_span["attributes"]["gen_ai.agent.name"] == "test_agent" + assert tool_span["attributes"]["gen_ai.operation.name"] == "execute_tool" + + tool_span_available_tool = json.loads( + tool_span["attributes"]["gen_ai.request.available_tools"] + )[0] + + assert all(tool_span_available_tool[k] == v for k, v in available_tool.items()) + + assert tool_span["attributes"]["gen_ai.request.max_tokens"] == 100 + assert tool_span["attributes"]["gen_ai.request.model"] == "gpt-4" + assert tool_span["attributes"]["gen_ai.request.temperature"] == 0.7 + assert tool_span["attributes"]["gen_ai.request.top_p"] == 1.0 + assert tool_span["attributes"]["gen_ai.system"] == "openai" + assert tool_span["attributes"]["gen_ai.tool.description"] == "A simple tool" + assert tool_span["attributes"]["gen_ai.tool.input"] == '{"message": "hello"}' + assert tool_span["attributes"]["gen_ai.tool.name"] == "simple_test_tool" + assert ( + tool_span["attributes"]["gen_ai.tool.output"] == "Tool executed with: hello" ) + assert ai_client_span2["name"] == "chat gpt-4" + assert ai_client_span2["attributes"]["gen_ai.agent.name"] == "test_agent" + assert ai_client_span2["attributes"]["gen_ai.operation.name"] == "chat" - if parse_version(OPENAI_AGENTS_VERSION) >= ( - 0, - 8, - ): - available_tool["needs_approval"] = False - if parse_version(OPENAI_AGENTS_VERSION) >= ( - 0, - 9, - 0, - ): - available_tool.update( - { - "timeout_seconds": None, - "timeout_behavior": "error_as_result", - "timeout_error_function": None, - } + ai_client_span2_available_tool = json.loads( + ai_client_span2["attributes"]["gen_ai.request.available_tools"] + )[0] + + assert all( + ai_client_span2_available_tool[k] == v for k, v in available_tool.items() ) - assert transaction["transaction"] == "test_agent workflow" - assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents" - - assert agent_span["description"] == "invoke_agent test_agent" - assert agent_span["origin"] == "auto.ai.openai_agents" - assert agent_span["data"]["gen_ai.agent.name"] == "test_agent" - assert agent_span["data"]["gen_ai.operation.name"] == "invoke_agent" - - agent_span_available_tool = json.loads( - agent_span["data"]["gen_ai.request.available_tools"] - )[0] - assert all(agent_span_available_tool[k] == v for k, v in available_tool.items()) - - assert agent_span["data"]["gen_ai.request.max_tokens"] == 100 - assert agent_span["data"]["gen_ai.request.model"] == "gpt-4" - assert agent_span["data"]["gen_ai.request.temperature"] == 0.7 - assert agent_span["data"]["gen_ai.request.top_p"] == 1.0 - assert agent_span["data"]["gen_ai.system"] == "openai" - - assert ai_client_span1["description"] == "chat gpt-4" - assert ai_client_span1["data"]["gen_ai.operation.name"] == "chat" - assert ai_client_span1["data"]["gen_ai.system"] == "openai" - assert ai_client_span1["data"]["gen_ai.agent.name"] == "test_agent" - - ai_client_span1_available_tool = json.loads( - ai_client_span1["data"]["gen_ai.request.available_tools"] - )[0] - assert all( - ai_client_span1_available_tool[k] == v for k, v in available_tool.items() - ) + assert ai_client_span2["attributes"]["gen_ai.request.max_tokens"] == 100 + assert ai_client_span2["attributes"][ + "gen_ai.request.messages" + ] == safe_serialize( + [ + { + "role": "user", + "content": [ + {"type": "text", "text": "Please use the simple test tool"} + ], + }, + { + "role": "assistant", + "content": [ + { + "arguments": '{"message": "hello"}', + "call_id": "call_123", + "name": "simple_test_tool", + "type": "function_call", + "id": "call_123", + } + ], + }, + { + "role": "tool", + "content": [ + { + "call_id": "call_123", + "output": "Tool executed with: hello", + "type": "function_call_output", + } + ], + }, + ] + ) + assert ai_client_span2["attributes"]["gen_ai.request.model"] == "gpt-4" + assert ai_client_span2["attributes"]["gen_ai.request.temperature"] == 0.7 + assert ai_client_span2["attributes"]["gen_ai.request.top_p"] == 1.0 + assert ( + ai_client_span2["attributes"]["gen_ai.response.text"] + == "Task completed using the tool" + ) + assert ai_client_span2["attributes"]["gen_ai.system"] == "openai" + assert ai_client_span2["attributes"]["gen_ai.usage.input_tokens.cached"] == 0 + assert ai_client_span2["attributes"]["gen_ai.usage.input_tokens"] == 15 + assert ( + ai_client_span2["attributes"]["gen_ai.usage.output_tokens.reasoning"] == 0 + ) + assert ai_client_span2["attributes"]["gen_ai.usage.output_tokens"] == 10 + assert ai_client_span2["attributes"]["gen_ai.usage.total_tokens"] == 25 - assert ai_client_span1["data"]["gen_ai.request.max_tokens"] == 100 - assert ai_client_span1["data"]["gen_ai.request.messages"] == safe_serialize( - [ - { - "role": "user", - "content": [ - {"type": "text", "text": "Please use the simple test tool"} - ], - }, - ] - ) - assert ai_client_span1["data"]["gen_ai.request.model"] == "gpt-4" - assert ai_client_span1["data"]["gen_ai.request.temperature"] == 0.7 - assert ai_client_span1["data"]["gen_ai.request.top_p"] == 1.0 - assert ai_client_span1["data"]["gen_ai.usage.input_tokens"] == 10 - assert ai_client_span1["data"]["gen_ai.usage.input_tokens.cached"] == 0 - assert ai_client_span1["data"]["gen_ai.usage.output_tokens"] == 5 - assert ai_client_span1["data"]["gen_ai.usage.output_tokens.reasoning"] == 0 - assert ai_client_span1["data"]["gen_ai.usage.total_tokens"] == 15 - - tool_call = { - "arguments": '{"message": "hello"}', - "call_id": "call_123", - "name": "simple_test_tool", - "type": "function_call", - "id": "call_123", - "status": None, - } - - if OPENAI_VERSION >= (2, 25, 0): - tool_call["namespace"] = None - - assert json.loads(ai_client_span1["data"]["gen_ai.response.tool_calls"]) == [ - tool_call - ] + else: + with patch.object( + agent_with_tool.model._client._client, + "send", + side_effect=[tool_response, final_response], + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + send_default_pii=True, + stream_gen_ai_spans=stream_gen_ai_spans, + ) - assert tool_span["description"] == "execute_tool simple_test_tool" - assert tool_span["data"]["gen_ai.agent.name"] == "test_agent" - assert tool_span["data"]["gen_ai.operation.name"] == "execute_tool" - - tool_span_available_tool = json.loads( - tool_span["data"]["gen_ai.request.available_tools"] - )[0] - assert all(tool_span_available_tool[k] == v for k, v in available_tool.items()) - - assert tool_span["data"]["gen_ai.request.max_tokens"] == 100 - assert tool_span["data"]["gen_ai.request.model"] == "gpt-4" - assert tool_span["data"]["gen_ai.request.temperature"] == 0.7 - assert tool_span["data"]["gen_ai.request.top_p"] == 1.0 - assert tool_span["data"]["gen_ai.system"] == "openai" - assert tool_span["data"]["gen_ai.tool.description"] == "A simple tool" - assert tool_span["data"]["gen_ai.tool.input"] == '{"message": "hello"}' - assert tool_span["data"]["gen_ai.tool.name"] == "simple_test_tool" - assert tool_span["data"]["gen_ai.tool.output"] == "Tool executed with: hello" - assert ai_client_span2["description"] == "chat gpt-4" - assert ai_client_span2["data"]["gen_ai.agent.name"] == "test_agent" - assert ai_client_span2["data"]["gen_ai.operation.name"] == "chat" - - ai_client_span2_available_tool = json.loads( - ai_client_span2["data"]["gen_ai.request.available_tools"] - )[0] - assert all( - ai_client_span2_available_tool[k] == v for k, v in available_tool.items() - ) + events = capture_events() - assert ai_client_span2["data"]["gen_ai.request.max_tokens"] == 100 - assert ai_client_span2["data"]["gen_ai.request.messages"] == safe_serialize( - [ - { - "role": "tool", - "content": [ - { - "call_id": "call_123", - "output": "Tool executed with: hello", - "type": "function_call_output", - } - ], + await agents.Runner.run( + agent_with_tool, + "Please use the simple test tool", + run_config=test_run_config, + ) + + (transaction,) = events + spans = transaction["spans"] + agent_span = next( + span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT + ) + ai_client_span1, ai_client_span2 = ( + span for span in spans if span["op"] == OP.GEN_AI_CHAT + ) + tool_span = next(span for span in spans if span["op"] == OP.GEN_AI_EXECUTE_TOOL) + + available_tool = { + "name": "simple_test_tool", + "description": "A simple tool", + "params_json_schema": { + "properties": {"message": {"title": "Message", "type": "string"}}, + "required": ["message"], + "title": "simple_test_tool_args", + "type": "object", + "additionalProperties": False, }, + "on_invoke_tool": mock.ANY, + "strict_json_schema": True, + "is_enabled": True, + } + + if parse_version(OPENAI_AGENTS_VERSION) >= (0, 3, 3): + available_tool.update( + {"tool_input_guardrails": None, "tool_output_guardrails": None} + ) + + if parse_version(OPENAI_AGENTS_VERSION) >= ( + 0, + 8, + ): + available_tool["needs_approval"] = False + if parse_version(OPENAI_AGENTS_VERSION) >= ( + 0, + 9, + 0, + ): + available_tool.update( + { + "timeout_seconds": None, + "timeout_behavior": "error_as_result", + "timeout_error_function": None, + } + ) + + assert transaction["transaction"] == "test_agent workflow" + assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents" + + assert agent_span["description"] == "invoke_agent test_agent" + assert agent_span["origin"] == "auto.ai.openai_agents" + assert agent_span["data"]["gen_ai.agent.name"] == "test_agent" + assert agent_span["data"]["gen_ai.operation.name"] == "invoke_agent" + + agent_span_available_tool = json.loads( + agent_span["data"]["gen_ai.request.available_tools"] + )[0] + assert all(agent_span_available_tool[k] == v for k, v in available_tool.items()) + + assert agent_span["data"]["gen_ai.request.max_tokens"] == 100 + assert agent_span["data"]["gen_ai.request.model"] == "gpt-4" + assert agent_span["data"]["gen_ai.request.temperature"] == 0.7 + assert agent_span["data"]["gen_ai.request.top_p"] == 1.0 + assert agent_span["data"]["gen_ai.system"] == "openai" + + assert ai_client_span1["description"] == "chat gpt-4" + assert ai_client_span1["data"]["gen_ai.operation.name"] == "chat" + assert ai_client_span1["data"]["gen_ai.system"] == "openai" + assert ai_client_span1["data"]["gen_ai.agent.name"] == "test_agent" + + ai_client_span1_available_tool = json.loads( + ai_client_span1["data"]["gen_ai.request.available_tools"] + )[0] + assert all( + ai_client_span1_available_tool[k] == v for k, v in available_tool.items() + ) + + assert ai_client_span1["data"]["gen_ai.request.max_tokens"] == 100 + assert ai_client_span1["data"]["gen_ai.request.messages"] == safe_serialize( + [ + { + "role": "user", + "content": [ + {"type": "text", "text": "Please use the simple test tool"} + ], + }, + ] + ) + assert ai_client_span1["data"]["gen_ai.request.model"] == "gpt-4" + assert ai_client_span1["data"]["gen_ai.request.temperature"] == 0.7 + assert ai_client_span1["data"]["gen_ai.request.top_p"] == 1.0 + assert ai_client_span1["data"]["gen_ai.usage.input_tokens"] == 10 + assert ai_client_span1["data"]["gen_ai.usage.input_tokens.cached"] == 0 + assert ai_client_span1["data"]["gen_ai.usage.output_tokens"] == 5 + assert ai_client_span1["data"]["gen_ai.usage.output_tokens.reasoning"] == 0 + assert ai_client_span1["data"]["gen_ai.usage.total_tokens"] == 15 + + tool_call = { + "arguments": '{"message": "hello"}', + "call_id": "call_123", + "name": "simple_test_tool", + "type": "function_call", + "id": "call_123", + "status": None, + } + + if OPENAI_VERSION >= (2, 25, 0): + tool_call["namespace"] = None + + assert json.loads(ai_client_span1["data"]["gen_ai.response.tool_calls"]) == [ + tool_call ] - ) - assert ai_client_span2["data"]["gen_ai.request.model"] == "gpt-4" - assert ai_client_span2["data"]["gen_ai.request.temperature"] == 0.7 - assert ai_client_span2["data"]["gen_ai.request.top_p"] == 1.0 - assert ( - ai_client_span2["data"]["gen_ai.response.text"] - == "Task completed using the tool" - ) - assert ai_client_span2["data"]["gen_ai.system"] == "openai" - assert ai_client_span2["data"]["gen_ai.usage.input_tokens.cached"] == 0 - assert ai_client_span2["data"]["gen_ai.usage.input_tokens"] == 15 - assert ai_client_span2["data"]["gen_ai.usage.output_tokens.reasoning"] == 0 - assert ai_client_span2["data"]["gen_ai.usage.output_tokens"] == 10 - assert ai_client_span2["data"]["gen_ai.usage.total_tokens"] == 25 + + assert tool_span["description"] == "execute_tool simple_test_tool" + assert tool_span["data"]["gen_ai.agent.name"] == "test_agent" + assert tool_span["data"]["gen_ai.operation.name"] == "execute_tool" + + tool_span_available_tool = json.loads( + tool_span["data"]["gen_ai.request.available_tools"] + )[0] + assert all(tool_span_available_tool[k] == v for k, v in available_tool.items()) + + assert tool_span["data"]["gen_ai.request.max_tokens"] == 100 + assert tool_span["data"]["gen_ai.request.model"] == "gpt-4" + assert tool_span["data"]["gen_ai.request.temperature"] == 0.7 + assert tool_span["data"]["gen_ai.request.top_p"] == 1.0 + assert tool_span["data"]["gen_ai.system"] == "openai" + assert tool_span["data"]["gen_ai.tool.description"] == "A simple tool" + assert tool_span["data"]["gen_ai.tool.input"] == '{"message": "hello"}' + assert tool_span["data"]["gen_ai.tool.name"] == "simple_test_tool" + assert tool_span["data"]["gen_ai.tool.output"] == "Tool executed with: hello" + assert ai_client_span2["description"] == "chat gpt-4" + assert ai_client_span2["data"]["gen_ai.agent.name"] == "test_agent" + assert ai_client_span2["data"]["gen_ai.operation.name"] == "chat" + + ai_client_span2_available_tool = json.loads( + ai_client_span2["data"]["gen_ai.request.available_tools"] + )[0] + assert all( + ai_client_span2_available_tool[k] == v for k, v in available_tool.items() + ) + + assert ai_client_span2["data"]["gen_ai.request.max_tokens"] == 100 + assert ai_client_span2["data"]["gen_ai.request.messages"] == safe_serialize( + [ + { + "role": "tool", + "content": [ + { + "call_id": "call_123", + "output": "Tool executed with: hello", + "type": "function_call_output", + } + ], + }, + ] + ) + assert ai_client_span2["data"]["gen_ai.request.model"] == "gpt-4" + assert ai_client_span2["data"]["gen_ai.request.temperature"] == 0.7 + assert ai_client_span2["data"]["gen_ai.request.top_p"] == 1.0 + assert ( + ai_client_span2["data"]["gen_ai.response.text"] + == "Task completed using the tool" + ) + assert ai_client_span2["data"]["gen_ai.system"] == "openai" + assert ai_client_span2["data"]["gen_ai.usage.input_tokens.cached"] == 0 + assert ai_client_span2["data"]["gen_ai.usage.input_tokens"] == 15 + assert ai_client_span2["data"]["gen_ai.usage.output_tokens.reasoning"] == 0 + assert ai_client_span2["data"]["gen_ai.usage.output_tokens"] == 10 + assert ai_client_span2["data"]["gen_ai.usage.total_tokens"] == 25 @pytest.mark.asyncio From b7811723c19ecd7b916614ac63c8451f3b3aeef7 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Wed, 13 May 2026 07:54:11 +0200 Subject: [PATCH 05/10] restore langgraph test --- .../integrations/langgraph/test_langgraph.py | 168 +++++++++++++----- 1 file changed, 127 insertions(+), 41 deletions(-) diff --git a/tests/integrations/langgraph/test_langgraph.py b/tests/integrations/langgraph/test_langgraph.py index c7032b009b..80a20fb617 100644 --- a/tests/integrations/langgraph/test_langgraph.py +++ b/tests/integrations/langgraph/test_langgraph.py @@ -242,6 +242,7 @@ def original_compile(self, *args, **kwargs): assert "calculator" in tools_data +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.parametrize( "send_default_pii, include_prompts", [ @@ -251,14 +252,21 @@ def original_compile(self, *args, **kwargs): (False, False), ], ) -def test_pregel_invoke(sentry_init, capture_events, send_default_pii, include_prompts): +def test_pregel_invoke( + sentry_init, + capture_events, + capture_items, + send_default_pii, + include_prompts, + stream_gen_ai_spans, +): """Test Pregel.invoke() wrapper creates proper invoke_agent span.""" sentry_init( integrations=[LanggraphIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, + stream_gen_ai_spans=stream_gen_ai_spans, ) - events = capture_events() test_state = { "messages": [ @@ -289,57 +297,135 @@ def original_invoke(self, *args, **kwargs): ] return {"messages": new_messages} - with start_transaction(): - wrapped_invoke = _wrap_pregel_invoke(original_invoke) - result = wrapped_invoke(pregel, test_state) + if stream_gen_ai_spans: + items = capture_items("transaction", "span") - assert result is not None + with start_transaction(): + wrapped_invoke = _wrap_pregel_invoke(original_invoke) + result = wrapped_invoke(pregel, test_state) - tx = events[0] - assert tx["type"] == "transaction" + assert result is not None - invoke_spans = [ - span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT - ] - assert len(invoke_spans) == 1 + spans = [item.payload for item in items if item.type == "span"] + invoke_spans = [ + span + for span in spans + if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT + ] - invoke_span = invoke_spans[0] - assert invoke_span["description"] == "invoke_agent test_graph" - assert invoke_span["origin"] == "auto.ai.langgraph" - assert invoke_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "invoke_agent" - assert invoke_span["data"][SPANDATA.GEN_AI_PIPELINE_NAME] == "test_graph" - assert invoke_span["data"][SPANDATA.GEN_AI_AGENT_NAME] == "test_graph" - - if send_default_pii and include_prompts: - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["data"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT in invoke_span["data"] + assert len(invoke_spans) == 1 - request_messages = invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + invoke_span = invoke_spans[0] - if isinstance(request_messages, str): - import json + assert invoke_span["name"] == "invoke_agent test_graph" + assert invoke_span["attributes"]["sentry.origin"] == "auto.ai.langgraph" + assert ( + invoke_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "invoke_agent" + ) + assert invoke_span["attributes"][SPANDATA.GEN_AI_PIPELINE_NAME] == "test_graph" + assert invoke_span["attributes"][SPANDATA.GEN_AI_AGENT_NAME] == "test_graph" - request_messages = json.loads(request_messages) - assert len(request_messages) == 1 - assert request_messages[0]["content"] == "Of course! How can I assist you?" + if send_default_pii and include_prompts: + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["attributes"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT in invoke_span["attributes"] - response_text = invoke_span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] - assert response_text == expected_assistant_response + request_messages = invoke_span["attributes"][ + SPANDATA.GEN_AI_REQUEST_MESSAGES + ] - assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in invoke_span["data"] - tool_calls_data = invoke_span["data"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS] - if isinstance(tool_calls_data, str): - import json + if isinstance(request_messages, str): + import json + + request_messages = json.loads(request_messages) + assert len(request_messages) == 1 + assert request_messages[0]["content"] == "Hello, can you help me?" + assert request_messages[1]["content"] == "Of course! How can I assist you?" + + response_text = invoke_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] + assert response_text == expected_assistant_response + + assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in invoke_span["attributes"] + tool_calls_data = invoke_span["attributes"][ + SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS + ] + + if isinstance(tool_calls_data, str): + import json - tool_calls_data = json.loads(tool_calls_data) + tool_calls_data = json.loads(tool_calls_data) - assert len(tool_calls_data) == 1 - assert tool_calls_data[0]["id"] == "call_test_123" - assert tool_calls_data[0]["function"]["name"] == "search_tool" + assert len(tool_calls_data) == 1 + assert tool_calls_data[0]["id"] == "call_test_123" + assert tool_calls_data[0]["function"]["name"] == "search_tool" + else: + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in invoke_span.get( + "attributes", {} + ) + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in invoke_span.get( + "attributes", {} + ) + assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in invoke_span.get( + "attributes", {} + ) else: - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in invoke_span.get("data", {}) - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in invoke_span.get("data", {}) - assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in invoke_span.get("data", {}) + events = capture_events() + + with start_transaction(): + wrapped_invoke = _wrap_pregel_invoke(original_invoke) + result = wrapped_invoke(pregel, test_state) + + assert result is not None + + tx = events[0] + assert tx["type"] == "transaction" + + invoke_spans = [ + span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT + ] + + assert len(invoke_spans) == 1 + + invoke_span = invoke_spans[0] + + assert invoke_span["description"] == "invoke_agent test_graph" + assert invoke_span["origin"] == "auto.ai.langgraph" + assert invoke_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "invoke_agent" + assert invoke_span["data"][SPANDATA.GEN_AI_PIPELINE_NAME] == "test_graph" + assert invoke_span["data"][SPANDATA.GEN_AI_AGENT_NAME] == "test_graph" + + if send_default_pii and include_prompts: + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["data"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT in invoke_span["data"] + + request_messages = invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + + if isinstance(request_messages, str): + import json + + request_messages = json.loads(request_messages) + assert len(request_messages) == 1 + assert request_messages[0]["content"] == "Of course! How can I assist you?" + + response_text = invoke_span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] + assert response_text == expected_assistant_response + + assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in invoke_span["data"] + tool_calls_data = invoke_span["data"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS] + + if isinstance(tool_calls_data, str): + import json + + tool_calls_data = json.loads(tool_calls_data) + + assert len(tool_calls_data) == 1 + assert tool_calls_data[0]["id"] == "call_test_123" + assert tool_calls_data[0]["function"]["name"] == "search_tool" + else: + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in invoke_span.get("data", {}) + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in invoke_span.get("data", {}) + assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in invoke_span.get( + "data", {} + ) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) From b618cc8be08ea91f7c01cf21d36c52d490da663a Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Wed, 13 May 2026 08:13:01 +0200 Subject: [PATCH 06/10] update test --- tests/integrations/langgraph/test_langgraph.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integrations/langgraph/test_langgraph.py b/tests/integrations/langgraph/test_langgraph.py index 80a20fb617..f308127276 100644 --- a/tests/integrations/langgraph/test_langgraph.py +++ b/tests/integrations/langgraph/test_langgraph.py @@ -337,7 +337,7 @@ def original_invoke(self, *args, **kwargs): import json request_messages = json.loads(request_messages) - assert len(request_messages) == 1 + assert len(request_messages) == 2 assert request_messages[0]["content"] == "Hello, can you help me?" assert request_messages[1]["content"] == "Of course! How can I assist you?" From 913ec9af4eefb8296bba602e65ade30be9efa9b1 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Wed, 13 May 2026 10:29:43 +0200 Subject: [PATCH 07/10] litellm test --- tests/integrations/litellm/test_litellm.py | 103 ++++++--------------- 1 file changed, 29 insertions(+), 74 deletions(-) diff --git a/tests/integrations/litellm/test_litellm.py b/tests/integrations/litellm/test_litellm.py index c04619d838..b463387daf 100644 --- a/tests/integrations/litellm/test_litellm.py +++ b/tests/integrations/litellm/test_litellm.py @@ -2325,20 +2325,14 @@ def test_integration_setup(sentry_init): assert _failure_callback in (litellm.failure_callback or []) -@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) -def test_litellm_message_truncation( - sentry_init, - capture_events, - capture_items, - stream_gen_ai_spans, -): +def test_litellm_message_truncation(sentry_init, capture_events): """Test that large messages are truncated properly in LiteLLM integration.""" sentry_init( integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, - stream_gen_ai_spans=stream_gen_ai_spans, ) + events = capture_events() large_content = ( "This is a very long message that will exceed our size limits. " * 1000 @@ -2352,78 +2346,39 @@ def test_litellm_message_truncation( ] mock_response = MockCompletionResponse() - if stream_gen_ai_spans: - items = capture_items("transaction", "span") - - with start_transaction(name="litellm test"): - kwargs = { - "model": "gpt-3.5-turbo", - "messages": messages, - } - - _input_callback(kwargs) - _success_callback( - kwargs, - mock_response, - datetime.now(), - datetime.now(), - ) - - spans = [item.payload for item in items if item.type == "span"] - chat_spans = [ - span - for span in spans - if span["attributes"].get("sentry.op") == OP.GEN_AI_CHAT - ] - assert len(chat_spans) > 0 - - chat_span = chat_spans[0] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["attributes"] - - messages_data = chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - assert isinstance(messages_data, str) - - parsed_messages = json.loads(messages_data) - assert isinstance(parsed_messages, list) - assert len(parsed_messages) == 1 - assert "small message 5" in str(parsed_messages[0]) - tx = next(item.payload for item in items if item.type == "transaction") - else: - events = capture_events() - - with start_transaction(name="litellm test"): - kwargs = { - "model": "gpt-3.5-turbo", - "messages": messages, - } + with start_transaction(name="litellm test"): + kwargs = { + "model": "gpt-3.5-turbo", + "messages": messages, + } - _input_callback(kwargs) - _success_callback( - kwargs, - mock_response, - datetime.now(), - datetime.now(), - ) + _input_callback(kwargs) + _success_callback( + kwargs, + mock_response, + datetime.now(), + datetime.now(), + ) - assert len(events) > 0 - tx = events[0] - assert tx["type"] == "transaction" + assert len(events) > 0 + tx = events[0] + assert tx["type"] == "transaction" - chat_spans = [ - span for span in tx.get("spans", []) if span.get("op") == OP.GEN_AI_CHAT - ] - assert len(chat_spans) > 0 + chat_spans = [ + span for span in tx.get("spans", []) if span.get("op") == OP.GEN_AI_CHAT + ] + assert len(chat_spans) > 0 - chat_span = chat_spans[0] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["data"] + chat_span = chat_spans[0] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["data"] - messages_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - assert isinstance(messages_data, str) + messages_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + assert isinstance(messages_data, str) - parsed_messages = json.loads(messages_data) - assert isinstance(parsed_messages, list) - assert len(parsed_messages) == 1 - assert "small message 5" in str(parsed_messages[0]) + parsed_messages = json.loads(messages_data) + assert isinstance(parsed_messages, list) + assert len(parsed_messages) == 1 + assert "small message 5" in str(parsed_messages[0]) assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5 From f2bdff5cc6967b30cf08796fbd7eddd92c7a2746 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Wed, 13 May 2026 10:31:11 +0200 Subject: [PATCH 08/10] remove whitespace changes --- tests/integrations/litellm/test_litellm.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tests/integrations/litellm/test_litellm.py b/tests/integrations/litellm/test_litellm.py index b463387daf..b76980ddd3 100644 --- a/tests/integrations/litellm/test_litellm.py +++ b/tests/integrations/litellm/test_litellm.py @@ -2920,7 +2920,6 @@ def test_binary_content_encoding_uri_type( ) assert len(chat_spans) == 1 span = chat_spans[0] - messages_data = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) else: events = capture_events() @@ -2948,7 +2947,6 @@ def test_binary_content_encoding_uri_type( assert len(chat_spans) == 1 span = chat_spans[0] - messages_data = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) uri_item = next( @@ -3036,10 +3034,8 @@ async def test_async_binary_content_encoding_uri_type( if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT and x["attributes"]["sentry.origin"] == "auto.ai.litellm" ) - assert len(chat_spans) == 1 span = chat_spans[0] - messages_data = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) else: events = capture_events() @@ -3065,10 +3061,8 @@ async def test_async_binary_content_encoding_uri_type( for x in event["spans"] if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm" ) - assert len(chat_spans) == 1 span = chat_spans[0] - messages_data = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) uri_item = next( From ec26b90f87b22ad048a0024f144143fa5b4cf385 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Wed, 13 May 2026 10:31:46 +0200 Subject: [PATCH 09/10] one more whitespace removal --- tests/integrations/litellm/test_litellm.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/integrations/litellm/test_litellm.py b/tests/integrations/litellm/test_litellm.py index b76980ddd3..aab289b28f 100644 --- a/tests/integrations/litellm/test_litellm.py +++ b/tests/integrations/litellm/test_litellm.py @@ -2944,7 +2944,6 @@ def test_binary_content_encoding_uri_type( for x in event["spans"] if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm" ) - assert len(chat_spans) == 1 span = chat_spans[0] messages_data = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) From 4ec3ff7e96f6ea4c2c2764b6a0d91eed4b497d08 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Wed, 13 May 2026 10:48:43 +0200 Subject: [PATCH 10/10] remove truncation per integration instead --- sentry_sdk/ai/utils.py | 8 ---- sentry_sdk/integrations/anthropic.py | 8 +++- sentry_sdk/integrations/google_genai/utils.py | 7 ++- sentry_sdk/integrations/langchain.py | 38 +++++++++++---- sentry_sdk/integrations/langgraph.py | 20 ++++++-- sentry_sdk/integrations/litellm.py | 14 ++++-- sentry_sdk/integrations/openai.py | 46 +++++++++++++++---- .../openai_agents/spans/invoke_agent.py | 7 ++- .../integrations/openai_agents/utils.py | 7 ++- .../pydantic_ai/spans/ai_client.py | 7 ++- .../pydantic_ai/spans/invoke_agent.py | 7 ++- 11 files changed, 127 insertions(+), 42 deletions(-) diff --git a/sentry_sdk/ai/utils.py b/sentry_sdk/ai/utils.py index fb9edcd335..8efa077ce5 100644 --- a/sentry_sdk/ai/utils.py +++ b/sentry_sdk/ai/utils.py @@ -741,10 +741,6 @@ def truncate_and_annotate_messages( scope: "Any", max_single_message_chars: int = MAX_SINGLE_MESSAGE_CONTENT_CHARS, ) -> "Optional[List[Dict[str, Any]]]": - client = sentry_sdk.get_client() - if client.options.get("stream_gen_ai_spans", False): - return messages - if not messages: return None @@ -765,10 +761,6 @@ def truncate_and_annotate_embedding_inputs( scope: "Any", max_bytes: int = MAX_GEN_AI_MESSAGE_BYTES, ) -> "Optional[List[Dict[str, Any]]]": - client = sentry_sdk.get_client() - if client.options.get("stream_gen_ai_spans", False): - return messages - if not messages: return None diff --git a/sentry_sdk/integrations/anthropic.py b/sentry_sdk/integrations/anthropic.py index efc2f70ffd..ca9e60e59d 100644 --- a/sentry_sdk/integrations/anthropic.py +++ b/sentry_sdk/integrations/anthropic.py @@ -438,9 +438,13 @@ def _set_common_input_data( normalized_messages.append(transformed_message) role_normalized_messages = normalize_message_roles(normalized_messages) + + client = sentry_sdk.get_client() scope = sentry_sdk.get_current_scope() - messages_data = truncate_and_annotate_messages( - role_normalized_messages, span, scope + messages_data = ( + role_normalized_messages + if client.options.get("stream_gen_ai_spans", False) + else truncate_and_annotate_messages(role_normalized_messages, span, scope) ) if messages_data is not None: set_data_normalized( diff --git a/sentry_sdk/integrations/google_genai/utils.py b/sentry_sdk/integrations/google_genai/utils.py index 25763ebe07..55a5b80233 100644 --- a/sentry_sdk/integrations/google_genai/utils.py +++ b/sentry_sdk/integrations/google_genai/utils.py @@ -892,9 +892,12 @@ def set_span_data_for_request( if messages: normalized_messages = normalize_message_roles(messages) + client = sentry_sdk.get_client() scope = sentry_sdk.get_current_scope() - messages_data = truncate_and_annotate_messages( - normalized_messages, span, scope + messages_data = ( + normalized_messages + if client.options.get("stream_gen_ai_spans", False) + else truncate_and_annotate_messages(normalized_messages, span, scope) ) if messages_data is not None: set_data_normalized( diff --git a/sentry_sdk/integrations/langchain.py b/sentry_sdk/integrations/langchain.py index 8acf215bfe..4f5a1b4939 100644 --- a/sentry_sdk/integrations/langchain.py +++ b/sentry_sdk/integrations/langchain.py @@ -374,9 +374,15 @@ def on_llm_start( } for prompt in prompts ] + + client = sentry_sdk.get_client() scope = sentry_sdk.get_current_scope() - messages_data = truncate_and_annotate_messages( - normalized_messages, span, scope + messages_data = ( + normalized_messages + if client.options.get("stream_gen_ai_spans", False) + else truncate_and_annotate_messages( + normalized_messages, span, scope + ) ) if messages_data is not None: set_data_normalized( @@ -463,9 +469,15 @@ def on_chat_model_start( self._normalize_langchain_message(message) ) normalized_messages = normalize_message_roles(normalized_messages) + + client = sentry_sdk.get_client() scope = sentry_sdk.get_current_scope() - messages_data = truncate_and_annotate_messages( - normalized_messages, span, scope + messages_data = ( + normalized_messages + if client.options.get("stream_gen_ai_spans", False) + else truncate_and_annotate_messages( + normalized_messages, span, scope + ) ) if messages_data is not None: set_data_normalized( @@ -992,9 +1004,15 @@ def new_invoke(self: "Any", *args: "Any", **kwargs: "Any") -> "Any": and integration.include_prompts ): normalized_messages = normalize_message_roles([input]) + + client = sentry_sdk.get_client() scope = sentry_sdk.get_current_scope() - messages_data = truncate_and_annotate_messages( - normalized_messages, span, scope + messages_data = ( + normalized_messages + if client.options.get("stream_gen_ai_spans", False) + else truncate_and_annotate_messages( + normalized_messages, span, scope + ) ) if messages_data is not None: set_data_normalized( @@ -1049,9 +1067,13 @@ def new_stream(self: "Any", *args: "Any", **kwargs: "Any") -> "Any": and integration.include_prompts ): normalized_messages = normalize_message_roles([input]) + + client = sentry_sdk.get_client() scope = sentry_sdk.get_current_scope() - messages_data = truncate_and_annotate_messages( - normalized_messages, span, scope + messages_data = ( + normalized_messages + if client.options.get("stream_gen_ai_spans", False) + else truncate_and_annotate_messages(normalized_messages, span, scope) ) if messages_data is not None: set_data_normalized( diff --git a/sentry_sdk/integrations/langgraph.py b/sentry_sdk/integrations/langgraph.py index e5ea12b90a..1454d151f4 100644 --- a/sentry_sdk/integrations/langgraph.py +++ b/sentry_sdk/integrations/langgraph.py @@ -181,9 +181,15 @@ def new_invoke(self: "Any", *args: "Any", **kwargs: "Any") -> "Any": input_messages = _parse_langgraph_messages(args[0]) if input_messages: normalized_input_messages = normalize_message_roles(input_messages) + + client = sentry_sdk.get_client() scope = sentry_sdk.get_current_scope() - messages_data = truncate_and_annotate_messages( - normalized_input_messages, span, scope + messages_data = ( + normalized_input_messages + if client.options.get("stream_gen_ai_spans", False) + else truncate_and_annotate_messages( + normalized_input_messages, span, scope + ) ) if messages_data is not None: set_data_normalized( @@ -234,9 +240,15 @@ async def new_ainvoke(self: "Any", *args: "Any", **kwargs: "Any") -> "Any": input_messages = _parse_langgraph_messages(args[0]) if input_messages: normalized_input_messages = normalize_message_roles(input_messages) + + client = sentry_sdk.get_client() scope = sentry_sdk.get_current_scope() - messages_data = truncate_and_annotate_messages( - normalized_input_messages, span, scope + messages_data = ( + normalized_input_messages + if client.options.get("stream_gen_ai_spans", False) + else truncate_and_annotate_messages( + normalized_input_messages, span, scope + ) ) if messages_data is not None: set_data_normalized( diff --git a/sentry_sdk/integrations/litellm.py b/sentry_sdk/integrations/litellm.py index 3cff0fbc23..9561bd61f3 100644 --- a/sentry_sdk/integrations/litellm.py +++ b/sentry_sdk/integrations/litellm.py @@ -119,8 +119,11 @@ def _input_callback(kwargs: "Dict[str, Any]") -> None: if isinstance(embedding_input, list) else [embedding_input] ) - messages_data = truncate_and_annotate_embedding_inputs( - input_list, span, scope + client = sentry_sdk.get_client() + messages_data = ( + input_list + if client.options.get("stream_gen_ai_spans", False) + else truncate_and_annotate_embedding_inputs(input_list, span, scope) ) if messages_data is not None: set_data_normalized( @@ -133,9 +136,14 @@ def _input_callback(kwargs: "Dict[str, Any]") -> None: # For chat, look for the 'messages' parameter messages = kwargs.get("messages", []) if messages: + client = sentry_sdk.get_client() scope = sentry_sdk.get_current_scope() messages = _convert_message_parts(messages) - messages_data = truncate_and_annotate_messages(messages, span, scope) + messages_data = ( + messages + if client.options.get("stream_gen_ai_spans", False) + else truncate_and_annotate_messages(messages, span, scope) + ) if messages_data is not None: set_data_normalized( span, diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py index b3919d1a9d..7bb328741e 100644 --- a/sentry_sdk/integrations/openai.py +++ b/sentry_sdk/integrations/openai.py @@ -398,8 +398,13 @@ def _set_responses_api_input_data( if isinstance(messages, str): normalized_messages = normalize_message_roles([messages]) # type: ignore + client = sentry_sdk.get_client() scope = sentry_sdk.get_current_scope() - messages_data = truncate_and_annotate_messages(normalized_messages, span, scope) + messages_data = ( + normalized_messages + if client.options.get("stream_gen_ai_spans", False) + else truncate_and_annotate_messages(normalized_messages, span, scope) + ) if messages_data is not None: set_data_normalized( span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data, unpack=False @@ -413,8 +418,13 @@ def _set_responses_api_input_data( ] if len(non_system_messages) > 0: normalized_messages = normalize_message_roles(non_system_messages) + client = sentry_sdk.get_client() scope = sentry_sdk.get_current_scope() - messages_data = truncate_and_annotate_messages(normalized_messages, span, scope) + messages_data = ( + normalized_messages + if client.options.get("stream_gen_ai_spans", False) + else truncate_and_annotate_messages(normalized_messages, span, scope) + ) if messages_data is not None: set_data_normalized( span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data, unpack=False @@ -472,8 +482,13 @@ def _set_completions_api_input_data( if isinstance(messages, str): normalized_messages = normalize_message_roles([messages]) # type: ignore + client = sentry_sdk.get_client() scope = sentry_sdk.get_current_scope() - messages_data = truncate_and_annotate_messages(normalized_messages, span, scope) + messages_data = ( + normalized_messages + if client.options.get("stream_gen_ai_spans", False) + else truncate_and_annotate_messages(normalized_messages, span, scope) + ) if messages_data is not None: set_data_normalized( span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data, unpack=False @@ -503,8 +518,13 @@ def _set_completions_api_input_data( ] if len(non_system_messages) > 0: normalized_messages = normalize_message_roles(non_system_messages) + client = sentry_sdk.get_client() scope = sentry_sdk.get_current_scope() - messages_data = truncate_and_annotate_messages(normalized_messages, span, scope) + messages_data = ( + normalized_messages + if client.options.get("stream_gen_ai_spans", False) + else truncate_and_annotate_messages(normalized_messages, span, scope) + ) if messages_data is not None: set_data_normalized( span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data, unpack=False @@ -539,9 +559,14 @@ def _set_embeddings_input_data( set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, "embeddings") normalized_messages = normalize_message_roles([messages]) # type: ignore + client = sentry_sdk.get_client() scope = sentry_sdk.get_current_scope() - messages_data = truncate_and_annotate_embedding_inputs( - normalized_messages, span, scope + messages_data = ( + normalized_messages + if client.options.get("stream_gen_ai_spans", False) + else truncate_and_annotate_embedding_inputs( + normalized_messages, span, scope + ) ) if messages_data is not None: set_data_normalized( @@ -560,9 +585,14 @@ def _set_embeddings_input_data( if len(messages) > 0: normalized_messages = normalize_message_roles(messages) + client = sentry_sdk.get_client() scope = sentry_sdk.get_current_scope() - messages_data = truncate_and_annotate_embedding_inputs( - normalized_messages, span, scope + messages_data = ( + normalized_messages + if client.options.get("stream_gen_ai_spans", False) + else truncate_and_annotate_embedding_inputs( + normalized_messages, span, scope + ) ) if messages_data is not None: set_data_normalized( diff --git a/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py b/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py index 27f9fdab25..2346189a96 100644 --- a/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py +++ b/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py @@ -63,9 +63,12 @@ def invoke_agent_span( if len(messages) > 0: normalized_messages = normalize_message_roles(messages) + client = sentry_sdk.get_client() scope = sentry_sdk.get_current_scope() - messages_data = truncate_and_annotate_messages( - normalized_messages, span, scope + messages_data = ( + normalized_messages + if client.options.get("stream_gen_ai_spans", False) + else truncate_and_annotate_messages(normalized_messages, span, scope) ) if messages_data is not None: set_data_normalized( diff --git a/sentry_sdk/integrations/openai_agents/utils.py b/sentry_sdk/integrations/openai_agents/utils.py index ee504b3496..ea1faefde7 100644 --- a/sentry_sdk/integrations/openai_agents/utils.py +++ b/sentry_sdk/integrations/openai_agents/utils.py @@ -173,8 +173,13 @@ def _set_input_data( ) normalized_messages = normalize_message_roles(request_messages) + client = sentry_sdk.get_client() scope = sentry_sdk.get_current_scope() - messages_data = truncate_and_annotate_messages(normalized_messages, span, scope) + messages_data = ( + normalized_messages + if client.options.get("stream_gen_ai_spans", False) + else truncate_and_annotate_messages(normalized_messages, span, scope) + ) if messages_data is not None: set_data_normalized( span, diff --git a/sentry_sdk/integrations/pydantic_ai/spans/ai_client.py b/sentry_sdk/integrations/pydantic_ai/spans/ai_client.py index dc95acad45..e549083fed 100644 --- a/sentry_sdk/integrations/pydantic_ai/spans/ai_client.py +++ b/sentry_sdk/integrations/pydantic_ai/spans/ai_client.py @@ -182,9 +182,12 @@ def _set_input_messages(span: "sentry_sdk.tracing.Span", messages: "Any") -> Non if formatted_messages: normalized_messages = normalize_message_roles(formatted_messages) + client = sentry_sdk.get_client() scope = sentry_sdk.get_current_scope() - messages_data = truncate_and_annotate_messages( - normalized_messages, span, scope + messages_data = ( + normalized_messages + if client.options.get("stream_gen_ai_spans", False) + else truncate_and_annotate_messages(normalized_messages, span, scope) ) set_data_normalized( span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data, unpack=False diff --git a/sentry_sdk/integrations/pydantic_ai/spans/invoke_agent.py b/sentry_sdk/integrations/pydantic_ai/spans/invoke_agent.py index ee08ca7036..c507315dcd 100644 --- a/sentry_sdk/integrations/pydantic_ai/spans/invoke_agent.py +++ b/sentry_sdk/integrations/pydantic_ai/spans/invoke_agent.py @@ -122,9 +122,12 @@ def invoke_agent_span( if messages: normalized_messages = normalize_message_roles(messages) + client = sentry_sdk.get_client() scope = sentry_sdk.get_current_scope() - messages_data = truncate_and_annotate_messages( - normalized_messages, span, scope + messages_data = ( + normalized_messages + if client.options.get("stream_gen_ai_spans", False) + else truncate_and_annotate_messages(normalized_messages, span, scope) ) set_data_normalized( span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data, unpack=False