From 401109aeb1f2b9d12431d7becabab4341b91f9b8 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Tue, 12 May 2026 15:55:30 +0200
Subject: [PATCH 01/10] feat: Remove truncation when stream_gen_ai_spans is
 enabled

---
 sentry_sdk/ai/utils.py                        |   8 +
 .../integrations/anthropic/test_anthropic.py  | 329 +++-------
 .../google_genai/test_google_genai.py         | 166 ++---
 .../integrations/langchain/test_langchain.py  | 137 ++---
 .../integrations/langgraph/test_langgraph.py  | 251 ++------
 tests/integrations/litellm/test_litellm.py    | 104 +---
 tests/integrations/openai/test_openai.py      |  60 +-
 .../openai_agents/test_openai_agents.py       | 571 ++++++------------
 8 files changed, 439 insertions(+), 1187 deletions(-)

diff --git a/sentry_sdk/ai/utils.py b/sentry_sdk/ai/utils.py
index 8efa077ce5..4bd65ced76 100644
--- a/sentry_sdk/ai/utils.py
+++ b/sentry_sdk/ai/utils.py
@@ -741,6 +741,10 @@ def truncate_and_annotate_messages(
     scope: "Any",
     max_single_message_chars: int = MAX_SINGLE_MESSAGE_CONTENT_CHARS,
 ) -> "Optional[List[Dict[str, Any]]]":
+    client = sentry_sdk.get_client()
+    if client.options["_experiments"].get("stream_gen_ai_spans", False):
+        return messages
+
     if not messages:
         return None
 
@@ -761,6 +765,10 @@ def truncate_and_annotate_embedding_inputs(
     scope: "Any",
     max_bytes: int = MAX_GEN_AI_MESSAGE_BYTES,
 ) -> "Optional[List[Dict[str, Any]]]":
+    client = sentry_sdk.get_client()
+    if client.options["_experiments"].get("stream_gen_ai_spans", False):
+        return messages
+
     if not messages:
         return None
 
diff --git a/tests/integrations/anthropic/test_anthropic.py b/tests/integrations/anthropic/test_anthropic.py
index 2e240b9c8f..02de047711 100644
--- a/tests/integrations/anthropic/test_anthropic.py
+++ b/tests/integrations/anthropic/test_anthropic.py
@@ -3625,20 +3625,14 @@ def mock_messages_create(*args, **kwargs):
     assert stored_messages[0]["role"] == expected_role
 
 
-@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
-def test_anthropic_message_truncation(
-    sentry_init,
-    capture_events,
-    capture_items,
-    stream_gen_ai_spans,
-):
+def test_anthropic_message_truncation(sentry_init, capture_events):
     """Test that large messages are truncated properly in Anthropic integration."""
     sentry_init(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
+    events = capture_events()
 
     client = Anthropic(api_key="z")
     client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE)
@@ -3654,83 +3648,42 @@ def test_anthropic_message_truncation(
         {"role": "user", "content": "small message 5"},
     ]
 
-    if stream_gen_ai_spans:
-        items = capture_items("transaction", "span")
-
-        with start_transaction():
-            client.messages.create(max_tokens=1024, messages=messages, model="model")
-
-        spans = [item.payload for item in items if item.type == "span"]
-        chat_spans = [
-            span
-            for span in spans
-            if span["attributes"].get("sentry.op") == OP.GEN_AI_CHAT
-        ]
-
-        assert len(chat_spans) > 0
-
-        chat_span = chat_spans[0]
-        assert chat_span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-        assert chat_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["attributes"]
-
-        messages_data = chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-
-        assert isinstance(messages_data, str)
-
-        parsed_messages = json.loads(messages_data)
-        assert isinstance(parsed_messages, list)
-        assert len(parsed_messages) == 1
-        assert "small message 5" in str(parsed_messages[0])
-
-        tx = next(item.payload for item in items if item.type == "transaction")
-    else:
-        events = capture_events()
-
-        with start_transaction():
-            client.messages.create(max_tokens=1024, messages=messages, model="model")
-
-        assert len(events) > 0
-        tx = events[0]
-        assert tx["type"] == "transaction"
+    with start_transaction():
+        client.messages.create(max_tokens=1024, messages=messages, model="model")
 
-        chat_spans = [
-            span for span in tx.get("spans", []) if span.get("op") == OP.GEN_AI_CHAT
-        ]
+    assert len(events) > 0
+    tx = events[0]
+    assert tx["type"] == "transaction"
 
-        assert len(chat_spans) > 0
+    chat_spans = [
+        span for span in tx.get("spans", []) if span.get("op") == OP.GEN_AI_CHAT
+    ]
+    assert len(chat_spans) > 0
 
-        chat_span = chat_spans[0]
-        assert chat_span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-        assert chat_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["data"]
+    chat_span = chat_spans[0]
+    assert chat_span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+    assert chat_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["data"]
 
-        messages_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-        assert isinstance(messages_data, str)
+    messages_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+    assert isinstance(messages_data, str)
 
-        parsed_messages = json.loads(messages_data)
-        assert isinstance(parsed_messages, list)
-        assert len(parsed_messages) == 1
-        assert "small message 5" in str(parsed_messages[0])
+    parsed_messages = json.loads(messages_data)
+    assert isinstance(parsed_messages, list)
+    assert len(parsed_messages) == 1
+    assert "small message 5" in str(parsed_messages[0])
 
     assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5
 
 
-@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
-@pytest.mark.asyncio
-async def test_anthropic_message_truncation_async(
-    sentry_init,
-    capture_events,
-    capture_items,
-    stream_gen_ai_spans,
-):
+async def test_anthropic_message_truncation_async(sentry_init, capture_events):
     """Test that large messages are truncated properly in Anthropic integration."""
     sentry_init(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
+    events = capture_events()
 
     client = AsyncAnthropic(api_key="z")
     client.messages._post = mock.AsyncMock(return_value=EXAMPLE_MESSAGE)
@@ -3746,70 +3699,30 @@ async def test_anthropic_message_truncation_async(
         {"role": "user", "content": "small message 5"},
     ]
 
-    if stream_gen_ai_spans:
-        items = capture_items("transaction", "span")
-
-        with start_transaction():
-            await client.messages.create(
-                max_tokens=1024, messages=messages, model="model"
-            )
-
-        spans = [item.payload for item in items if item.type == "span"]
-        chat_spans = [
-            span
-            for span in spans
-            if span["attributes"].get("sentry.op") == OP.GEN_AI_CHAT
-        ]
-
-        assert len(chat_spans) > 0
-
-        chat_span = chat_spans[0]
+    with start_transaction():
+        await client.messages.create(max_tokens=1024, messages=messages, model="model")
 
-        assert chat_span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-        assert chat_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["attributes"]
+    assert len(events) > 0
+    tx = events[0]
+    assert tx["type"] == "transaction"
 
-        messages_data = chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-
-        assert isinstance(messages_data, str)
-
-        parsed_messages = json.loads(messages_data)
-        assert isinstance(parsed_messages, list)
-        assert len(parsed_messages) == 1
-        assert "small message 5" in str(parsed_messages[0])
-
-        tx = next(item.payload for item in items if item.type == "transaction")
-    else:
-        events = capture_events()
-
-        with start_transaction():
-            await client.messages.create(
-                max_tokens=1024, messages=messages, model="model"
-            )
-
-        assert len(events) > 0
-        tx = events[0]
-        assert tx["type"] == "transaction"
-
-        chat_spans = [
-            span for span in tx.get("spans", []) if span.get("op") == OP.GEN_AI_CHAT
-        ]
-
-        assert len(chat_spans) > 0
-
-        chat_span = chat_spans[0]
+    chat_spans = [
+        span for span in tx.get("spans", []) if span.get("op") == OP.GEN_AI_CHAT
+    ]
+    assert len(chat_spans) > 0
 
-        assert chat_span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-        assert chat_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["data"]
+    chat_span = chat_spans[0]
+    assert chat_span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+    assert chat_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["data"]
 
-        messages_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-        assert isinstance(messages_data, str)
+    messages_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+    assert isinstance(messages_data, str)
 
-        parsed_messages = json.loads(messages_data)
-        assert isinstance(parsed_messages, list)
-        assert len(parsed_messages) == 1
-        assert "small message 5" in str(parsed_messages[0])
+    parsed_messages = json.loads(messages_data)
+    assert isinstance(parsed_messages, list)
+    assert len(parsed_messages) == 1
+    assert "small message 5" in str(parsed_messages[0])
 
     assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5
 
@@ -5203,21 +5116,14 @@ def test_transform_message_content_list_anthropic():
 # Integration tests for binary data in messages
 
 
-@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
-def test_message_with_base64_image(
-    sentry_init,
-    capture_events,
-    capture_items,
-    stream_gen_ai_spans,
-):
+def test_message_with_base64_image(sentry_init, capture_events):
     """Test that messages with base64 images are properly captured."""
     sentry_init(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-
+    events = capture_events()
     client = Anthropic(api_key="z")
     client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE)
 
@@ -5238,31 +5144,15 @@ def test_message_with_base64_image(
         }
     ]
 
-    if stream_gen_ai_spans:
-        items = capture_items("transaction", "span")
-
-        with start_transaction(name="anthropic"):
-            client.messages.create(max_tokens=1024, messages=messages, model="model")
-
-        spans = [item.payload for item in items if item.type == "span"]
-        (span,) = spans
-
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"]
-        stored_messages = json.loads(
-            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-        )
-    else:
-        events = capture_events()
+    with start_transaction(name="anthropic"):
+        client.messages.create(max_tokens=1024, messages=messages, model="model")
 
-        with start_transaction(name="anthropic"):
-            client.messages.create(max_tokens=1024, messages=messages, model="model")
+    assert len(events) == 1
+    (event,) = events
+    (span,) = event["spans"]
 
-        assert len(events) == 1
-        (event,) = events
-        (span,) = event["spans"]
-
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"]
-        stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"]
+    stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
 
     assert len(stored_messages) == 1
     assert stored_messages[0]["role"] == "user"
@@ -5412,21 +5302,14 @@ def test_message_with_file_image(
     }
 
 
-@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
-def test_message_with_base64_pdf(
-    sentry_init,
-    capture_events,
-    capture_items,
-    stream_gen_ai_spans,
-):
+def test_message_with_base64_pdf(sentry_init, capture_events):
     """Test that messages with base64-encoded PDF documents are properly captured."""
     sentry_init(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-
+    events = capture_events()
     client = Anthropic(api_key="z")
     client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE)
 
@@ -5447,30 +5330,14 @@ def test_message_with_base64_pdf(
         }
     ]
 
-    if stream_gen_ai_spans:
-        items = capture_items("transaction", "span")
-
-        with start_transaction(name="anthropic"):
-            client.messages.create(max_tokens=1024, messages=messages, model="model")
-
-        spans = [item.payload for item in items if item.type == "span"]
-        (span,) = spans
-
-        stored_messages = json.loads(
-            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-        )
-    else:
-        events = capture_events()
-
-        with start_transaction(name="anthropic"):
-            client.messages.create(max_tokens=1024, messages=messages, model="model")
-
-        assert len(events) == 1
-        (event,) = events
-        (span,) = event["spans"]
+    with start_transaction(name="anthropic"):
+        client.messages.create(max_tokens=1024, messages=messages, model="model")
 
-        stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    assert len(events) == 1
+    (event,) = events
+    (span,) = event["spans"]
 
+    stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
     content = stored_messages[0]["content"]
     assert content[1] == {
         "type": "blob",
@@ -5615,21 +5482,14 @@ def test_message_with_file_document(
     }
 
 
-@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
-def test_message_with_mixed_content(
-    sentry_init,
-    capture_events,
-    capture_items,
-    stream_gen_ai_spans,
-):
+def test_message_with_mixed_content(sentry_init, capture_events):
     """Test that messages with mixed content (text, images, documents) are properly captured."""
     sentry_init(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-
+    events = capture_events()
     client = Anthropic(api_key="z")
     client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE)
 
@@ -5666,30 +5526,14 @@ def test_message_with_mixed_content(
         }
     ]
 
-    if stream_gen_ai_spans:
-        items = capture_items("transaction", "span")
+    with start_transaction(name="anthropic"):
+        client.messages.create(max_tokens=1024, messages=messages, model="model")
 
-        with start_transaction(name="anthropic"):
-            client.messages.create(max_tokens=1024, messages=messages, model="model")
-
-        spans = [item.payload for item in items if item.type == "span"]
-        (span,) = spans
-
-        stored_messages = json.loads(
-            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-        )
-    else:
-        events = capture_events()
-
-        with start_transaction(name="anthropic"):
-            client.messages.create(max_tokens=1024, messages=messages, model="model")
-
-        assert len(events) == 1
-        (event,) = events
-        (span,) = event["spans"]
-
-        stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    assert len(events) == 1
+    (event,) = events
+    (span,) = event["spans"]
 
+    stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
     content = stored_messages[0]["content"]
 
     assert len(content) == 5
@@ -5721,21 +5565,14 @@ def test_message_with_mixed_content(
     }
 
 
-@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
-def test_message_with_multiple_images_different_formats(
-    sentry_init,
-    capture_events,
-    capture_items,
-    stream_gen_ai_spans,
-):
+def test_message_with_multiple_images_different_formats(sentry_init, capture_events):
     """Test that messages with multiple images of different source types are handled."""
     sentry_init(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-
+    events = capture_events()
     client = Anthropic(api_key="z")
     client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE)
 
@@ -5771,30 +5608,14 @@ def test_message_with_multiple_images_different_formats(
         }
     ]
 
-    if stream_gen_ai_spans:
-        items = capture_items("transaction", "span")
-
-        with start_transaction(name="anthropic"):
-            client.messages.create(max_tokens=1024, messages=messages, model="model")
-
-        spans = [item.payload for item in items if item.type == "span"]
-        (span,) = spans
-
-        stored_messages = json.loads(
-            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-        )
-    else:
-        events = capture_events()
-
-        with start_transaction(name="anthropic"):
-            client.messages.create(max_tokens=1024, messages=messages, model="model")
+    with start_transaction(name="anthropic"):
+        client.messages.create(max_tokens=1024, messages=messages, model="model")
 
-        assert len(events) == 1
-        (event,) = events
-        (span,) = event["spans"]
-
-        stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    assert len(events) == 1
+    (event,) = events
+    (span,) = event["spans"]
 
+    stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
     content = stored_messages[0]["content"]
 
     assert len(content) == 4
diff --git a/tests/integrations/google_genai/test_google_genai.py b/tests/integrations/google_genai/test_google_genai.py
index 8da5e7ca22..d9b2736584 100644
--- a/tests/integrations/google_genai/test_google_genai.py
+++ b/tests/integrations/google_genai/test_google_genai.py
@@ -1401,21 +1401,16 @@ def test_tool_calls_extraction(
     assert json.loads(tool_calls[1]["arguments"]) == {"timezone": "PST"}
 
 
-@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 def test_google_genai_message_truncation(
-    sentry_init,
-    capture_events,
-    capture_items,
-    mock_genai_client,
-    stream_gen_ai_spans,
+    sentry_init, capture_events, mock_genai_client
 ):
     """Test that large messages are truncated properly in Google GenAI integration."""
     sentry_init(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
+    events = capture_events()
 
     large_content = (
         "This is a very long message that will exceed our size limits. " * 1000
@@ -1424,39 +1419,21 @@ def test_google_genai_message_truncation(
 
     mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON)
 
-    if stream_gen_ai_spans:
-        items = capture_items("span")
-
-        with mock.patch.object(
-            mock_genai_client._api_client, "request", return_value=mock_http_response
-        ), start_transaction(name="google_genai"):
-            mock_genai_client.models.generate_content(
-                model="gemini-1.5-flash",
-                contents=[large_content, small_content],
-                config=create_test_config(),
-            )
-
-        invoke_span = next(item.payload for item in items if item.type == "span")
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["attributes"]
-
-        messages_data = invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-    else:
-        events = capture_events()
-
-        with mock.patch.object(
-            mock_genai_client._api_client, "request", return_value=mock_http_response
-        ), start_transaction(name="google_genai"):
+    with mock.patch.object(
+        mock_genai_client._api_client, "request", return_value=mock_http_response
+    ):
+        with start_transaction(name="google_genai"):
             mock_genai_client.models.generate_content(
                 model="gemini-1.5-flash",
                 contents=[large_content, small_content],
                 config=create_test_config(),
             )
 
-        (event,) = events
-        invoke_span = event["spans"][0]
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["data"]
+    (event,) = events
+    invoke_span = event["spans"][0]
+    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["data"]
 
-        messages_data = invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+    messages_data = invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
     assert isinstance(messages_data, str)
 
     parsed_messages = json.loads(messages_data)
@@ -2543,21 +2520,16 @@ def test_generate_content_with_inline_data(
     assert messages[0]["content"][1]["content"] == BLOB_DATA_SUBSTITUTE
 
 
-@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 def test_generate_content_with_function_response(
-    sentry_init,
-    capture_events,
-    capture_items,
-    mock_genai_client,
-    stream_gen_ai_spans,
+    sentry_init, capture_events, mock_genai_client
 ):
     """Test generate_content with function_response (tool result)."""
     sentry_init(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
+    events = capture_events()
 
     mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON)
 
@@ -2583,36 +2555,18 @@ def test_generate_content_with_function_response(
         ),
     ]
 
-    if stream_gen_ai_spans:
-        items = capture_items("span")
-
-        with mock.patch.object(
-            mock_genai_client._api_client, "request", return_value=mock_http_response
-        ), start_transaction(name="google_genai"):
+    with mock.patch.object(
+        mock_genai_client._api_client, "request", return_value=mock_http_response
+    ):
+        with start_transaction(name="google_genai"):
             mock_genai_client.models.generate_content(
                 model="gemini-1.5-flash", contents=contents, config=create_test_config()
             )
 
-        invoke_span = next(item.payload for item in items if item.type == "span")
-
-        messages = json.loads(
-            invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-        )
-    else:
-        events = capture_events()
-
-        with mock.patch.object(
-            mock_genai_client._api_client, "request", return_value=mock_http_response
-        ), start_transaction(name="google_genai"):
-            mock_genai_client.models.generate_content(
-                model="gemini-1.5-flash", contents=contents, config=create_test_config()
-            )
-
-        (event,) = events
-        invoke_span = event["spans"][0]
-
-        messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    (event,) = events
+    invoke_span = event["spans"][0]
 
+    messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
     assert len(messages) == 1
     # First message is user message
     assert messages[0]["role"] == "tool"
@@ -2621,21 +2575,16 @@ def test_generate_content_with_function_response(
     assert messages[0]["content"]["output"] == "Sunny, 72F"
 
 
-@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 def test_generate_content_with_mixed_string_and_content(
-    sentry_init,
-    capture_events,
-    capture_items,
-    mock_genai_client,
-    stream_gen_ai_spans,
+    sentry_init, capture_events, mock_genai_client
 ):
     """Test generate_content with mixed string and Content objects in list."""
     sentry_init(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
+    events = capture_events()
 
     mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON)
 
@@ -2652,36 +2601,18 @@ def test_generate_content_with_mixed_string_and_content(
         ),
     ]
 
-    if stream_gen_ai_spans:
-        items = capture_items("span")
-
-        with mock.patch.object(
-            mock_genai_client._api_client, "request", return_value=mock_http_response
-        ), start_transaction(name="google_genai"):
+    with mock.patch.object(
+        mock_genai_client._api_client, "request", return_value=mock_http_response
+    ):
+        with start_transaction(name="google_genai"):
             mock_genai_client.models.generate_content(
                 model="gemini-1.5-flash", contents=contents, config=create_test_config()
             )
 
-        invoke_span = next(item.payload for item in items if item.type == "span")
-
-        messages = json.loads(
-            invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-        )
-    else:
-        events = capture_events()
-
-        with mock.patch.object(
-            mock_genai_client._api_client, "request", return_value=mock_http_response
-        ), start_transaction(name="google_genai"):
-            mock_genai_client.models.generate_content(
-                model="gemini-1.5-flash", contents=contents, config=create_test_config()
-            )
-
-        (event,) = events
-        invoke_span = event["spans"][0]
-
-        messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    (event,) = events
+    invoke_span = event["spans"][0]
 
+    messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
     assert len(messages) == 1
     # User message
     assert messages[0]["role"] == "user"
@@ -2744,13 +2675,8 @@ def test_generate_content_with_part_object_directly(
     assert messages[0]["content"] == [{"text": "Direct Part object", "type": "text"}]
 
 
-@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 def test_generate_content_with_list_of_dicts(
-    sentry_init,
-    capture_events,
-    capture_items,
-    mock_genai_client,
-    stream_gen_ai_spans,
+    sentry_init, capture_events, mock_genai_client
 ):
     """
     Test generate_content with list of dict format inputs.
@@ -2763,8 +2689,8 @@ def test_generate_content_with_list_of_dicts(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
+    events = capture_events()
 
     mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON)
 
@@ -2775,36 +2701,18 @@ def test_generate_content_with_list_of_dicts(
         {"role": "user", "parts": [{"text": "Second user message"}]},
     ]
 
-    if stream_gen_ai_spans:
-        items = capture_items("span")
-
-        with mock.patch.object(
-            mock_genai_client._api_client, "request", return_value=mock_http_response
-        ), start_transaction(name="google_genai"):
+    with mock.patch.object(
+        mock_genai_client._api_client, "request", return_value=mock_http_response
+    ):
+        with start_transaction(name="google_genai"):
             mock_genai_client.models.generate_content(
                 model="gemini-1.5-flash", contents=contents, config=create_test_config()
             )
 
-        invoke_span = next(item.payload for item in items if item.type == "span")
-
-        messages = json.loads(
-            invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-        )
-    else:
-        events = capture_events()
-
-        with mock.patch.object(
-            mock_genai_client._api_client, "request", return_value=mock_http_response
-        ), start_transaction(name="google_genai"):
-            mock_genai_client.models.generate_content(
-                model="gemini-1.5-flash", contents=contents, config=create_test_config()
-            )
-
-        (event,) = events
-        invoke_span = event["spans"][0]
-
-        messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    (event,) = events
+    invoke_span = event["spans"][0]
 
+    messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
     assert len(messages) == 1
     assert messages[0]["role"] == "user"
     assert messages[0]["content"] == [{"text": "Second user message", "type": "text"}]
diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py
index 7adb2d13c5..414eb67b3e 100644
--- a/tests/integrations/langchain/test_langchain.py
+++ b/tests/integrations/langchain/test_langchain.py
@@ -2949,13 +2949,7 @@ def test_langchain_message_role_normalization_units():
     assert normalized[5] == "string message"  # String message unchanged
 
 
-@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
-def test_langchain_message_truncation(
-    sentry_init,
-    capture_events,
-    capture_items,
-    stream_gen_ai_spans,
-):
+def test_langchain_message_truncation(sentry_init, capture_events):
     """Test that large messages are truncated properly in Langchain integration."""
     from langchain_core.outputs import LLMResult, Generation
 
@@ -2963,8 +2957,8 @@ def test_langchain_message_truncation(
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
+    events = capture_events()
 
     callback = SentryLangchainCallback(max_span_map_size=100, include_prompts=True)
 
@@ -2982,101 +2976,48 @@ def test_langchain_message_truncation(
         "small message 5",
     ]
 
-    if stream_gen_ai_spans:
-        items = capture_items("transaction", "span")
-
-        with start_transaction():
-            callback.on_llm_start(
-                serialized=serialized,
-                prompts=prompts,
-                run_id=run_id,
-                name="my_pipeline",
-                invocation_params={
-                    "temperature": 0.7,
-                    "max_tokens": 100,
-                    "model": "gpt-3.5-turbo",
-                },
-            )
-
-            response = LLMResult(
-                generations=[[Generation(text="The response")]],
-                llm_output={
-                    "token_usage": {
-                        "total_tokens": 25,
-                        "prompt_tokens": 10,
-                        "completion_tokens": 15,
-                    }
-                },
-            )
-            callback.on_llm_end(response=response, run_id=run_id)
-
-        tx = next(item.payload for item in items if item.type == "transaction")
-        assert tx["type"] == "transaction"
-
-        spans = [item.payload for item in items if item.type == "span"]
-        llm_spans = [
-            span
-            for span in spans
-            if span["attributes"].get("sentry.op") == "gen_ai.text_completion"
-        ]
-
-        assert len(llm_spans) > 0
-
-        llm_span = llm_spans[0]
-
-        assert llm_span["attributes"]["gen_ai.operation.name"] == "text_completion"
-        assert llm_span["attributes"][SPANDATA.GEN_AI_FUNCTION_ID] == "my_pipeline"
-
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in llm_span["attributes"]
-        messages_data = llm_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-    else:
-        events = capture_events()
-
-        with start_transaction():
-            callback.on_llm_start(
-                serialized=serialized,
-                prompts=prompts,
-                run_id=run_id,
-                name="my_pipeline",
-                invocation_params={
-                    "temperature": 0.7,
-                    "max_tokens": 100,
-                    "model": "gpt-3.5-turbo",
-                },
-            )
-
-            response = LLMResult(
-                generations=[[Generation(text="The response")]],
-                llm_output={
-                    "token_usage": {
-                        "total_tokens": 25,
-                        "prompt_tokens": 10,
-                        "completion_tokens": 15,
-                    }
-                },
-            )
-            callback.on_llm_end(response=response, run_id=run_id)
-
-        assert len(events) > 0
-        tx = events[0]
-        assert tx["type"] == "transaction"
-
-        llm_spans = [
-            span
-            for span in tx.get("spans", [])
-            if span.get("op") == "gen_ai.text_completion"
-        ]
+    with start_transaction():
+        callback.on_llm_start(
+            serialized=serialized,
+            prompts=prompts,
+            run_id=run_id,
+            name="my_pipeline",
+            invocation_params={
+                "temperature": 0.7,
+                "max_tokens": 100,
+                "model": "gpt-3.5-turbo",
+            },
+        )
 
-        assert len(llm_spans) > 0
+        response = LLMResult(
+            generations=[[Generation(text="The response")]],
+            llm_output={
+                "token_usage": {
+                    "total_tokens": 25,
+                    "prompt_tokens": 10,
+                    "completion_tokens": 15,
+                }
+            },
+        )
+        callback.on_llm_end(response=response, run_id=run_id)
 
-        llm_span = llm_spans[0]
+    assert len(events) > 0
+    tx = events[0]
+    assert tx["type"] == "transaction"
 
-        assert llm_span["data"]["gen_ai.operation.name"] == "text_completion"
-        assert llm_span["data"][SPANDATA.GEN_AI_FUNCTION_ID] == "my_pipeline"
+    llm_spans = [
+        span
+        for span in tx.get("spans", [])
+        if span.get("op") == "gen_ai.text_completion"
+    ]
+    assert len(llm_spans) > 0
 
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in llm_span["data"]
-        messages_data = llm_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+    llm_span = llm_spans[0]
+    assert llm_span["data"]["gen_ai.operation.name"] == "text_completion"
+    assert llm_span["data"][SPANDATA.GEN_AI_FUNCTION_ID] == "my_pipeline"
 
+    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in llm_span["data"]
+    messages_data = llm_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
     assert isinstance(messages_data, str)
 
     parsed_messages = json.loads(messages_data)
diff --git a/tests/integrations/langgraph/test_langgraph.py b/tests/integrations/langgraph/test_langgraph.py
index 991c1f2269..c1e753716d 100644
--- a/tests/integrations/langgraph/test_langgraph.py
+++ b/tests/integrations/langgraph/test_langgraph.py
@@ -242,7 +242,6 @@ def original_compile(self, *args, **kwargs):
         assert "calculator" in tools_data
 
 
-@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.parametrize(
     "send_default_pii, include_prompts",
     [
@@ -252,21 +251,14 @@ def original_compile(self, *args, **kwargs):
         (False, False),
     ],
 )
-def test_pregel_invoke(
-    sentry_init,
-    capture_events,
-    capture_items,
-    send_default_pii,
-    include_prompts,
-    stream_gen_ai_spans,
-):
+def test_pregel_invoke(sentry_init, capture_events, send_default_pii, include_prompts):
     """Test Pregel.invoke() wrapper creates proper invoke_agent span."""
     sentry_init(
         integrations=[LanggraphIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
+    events = capture_events()
 
     test_state = {
         "messages": [
@@ -297,134 +289,57 @@ def original_invoke(self, *args, **kwargs):
         ]
         return {"messages": new_messages}
 
-    if stream_gen_ai_spans:
-        items = capture_items("transaction", "span")
-
-        with start_transaction():
-            wrapped_invoke = _wrap_pregel_invoke(original_invoke)
-            result = wrapped_invoke(pregel, test_state)
-
-        assert result is not None
+    with start_transaction():
+        wrapped_invoke = _wrap_pregel_invoke(original_invoke)
+        result = wrapped_invoke(pregel, test_state)
 
-        spans = [item.payload for item in items if item.type == "span"]
-        invoke_spans = [
-            span
-            for span in spans
-            if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
-        ]
+    assert result is not None
 
-        assert len(invoke_spans) == 1
+    tx = events[0]
+    assert tx["type"] == "transaction"
 
-        invoke_span = invoke_spans[0]
+    invoke_spans = [
+        span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT
+    ]
+    assert len(invoke_spans) == 1
 
-        assert invoke_span["name"] == "invoke_agent test_graph"
-        assert invoke_span["attributes"]["sentry.origin"] == "auto.ai.langgraph"
-        assert (
-            invoke_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "invoke_agent"
-        )
-        assert invoke_span["attributes"][SPANDATA.GEN_AI_PIPELINE_NAME] == "test_graph"
-        assert invoke_span["attributes"][SPANDATA.GEN_AI_AGENT_NAME] == "test_graph"
+    invoke_span = invoke_spans[0]
+    assert invoke_span["description"] == "invoke_agent test_graph"
+    assert invoke_span["origin"] == "auto.ai.langgraph"
+    assert invoke_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "invoke_agent"
+    assert invoke_span["data"][SPANDATA.GEN_AI_PIPELINE_NAME] == "test_graph"
+    assert invoke_span["data"][SPANDATA.GEN_AI_AGENT_NAME] == "test_graph"
 
-        if send_default_pii and include_prompts:
-            assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["attributes"]
-            assert SPANDATA.GEN_AI_RESPONSE_TEXT in invoke_span["attributes"]
+    if send_default_pii and include_prompts:
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["data"]
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT in invoke_span["data"]
 
-            request_messages = invoke_span["attributes"][
-                SPANDATA.GEN_AI_REQUEST_MESSAGES
-            ]
+        request_messages = invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
 
-            if isinstance(request_messages, str):
-                import json
+        if isinstance(request_messages, str):
+            import json
 
-                request_messages = json.loads(request_messages)
-            assert len(request_messages) == 1
-            assert request_messages[0]["content"] == "Of course! How can I assist you?"
+            request_messages = json.loads(request_messages)
+        assert len(request_messages) == 1
+        assert request_messages[0]["content"] == "Of course! How can I assist you?"
 
-            response_text = invoke_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
-            assert response_text == expected_assistant_response
-
-            assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in invoke_span["attributes"]
-            tool_calls_data = invoke_span["attributes"][
-                SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS
-            ]
+        response_text = invoke_span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+        assert response_text == expected_assistant_response
 
-            if isinstance(tool_calls_data, str):
-                import json
+        assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in invoke_span["data"]
+        tool_calls_data = invoke_span["data"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS]
+        if isinstance(tool_calls_data, str):
+            import json
 
-                tool_calls_data = json.loads(tool_calls_data)
+            tool_calls_data = json.loads(tool_calls_data)
 
-            assert len(tool_calls_data) == 1
-            assert tool_calls_data[0]["id"] == "call_test_123"
-            assert tool_calls_data[0]["function"]["name"] == "search_tool"
-        else:
-            assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in invoke_span.get(
-                "attributes", {}
-            )
-            assert SPANDATA.GEN_AI_RESPONSE_TEXT not in invoke_span.get(
-                "attributes", {}
-            )
-            assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in invoke_span.get(
-                "attributes", {}
-            )
+        assert len(tool_calls_data) == 1
+        assert tool_calls_data[0]["id"] == "call_test_123"
+        assert tool_calls_data[0]["function"]["name"] == "search_tool"
     else:
-        events = capture_events()
-
-        with start_transaction():
-            wrapped_invoke = _wrap_pregel_invoke(original_invoke)
-            result = wrapped_invoke(pregel, test_state)
-
-        assert result is not None
-
-        tx = events[0]
-        assert tx["type"] == "transaction"
-
-        invoke_spans = [
-            span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT
-        ]
-
-        assert len(invoke_spans) == 1
-
-        invoke_span = invoke_spans[0]
-
-        assert invoke_span["description"] == "invoke_agent test_graph"
-        assert invoke_span["origin"] == "auto.ai.langgraph"
-        assert invoke_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "invoke_agent"
-        assert invoke_span["data"][SPANDATA.GEN_AI_PIPELINE_NAME] == "test_graph"
-        assert invoke_span["data"][SPANDATA.GEN_AI_AGENT_NAME] == "test_graph"
-
-        if send_default_pii and include_prompts:
-            assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["data"]
-            assert SPANDATA.GEN_AI_RESPONSE_TEXT in invoke_span["data"]
-
-            request_messages = invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-
-            if isinstance(request_messages, str):
-                import json
-
-                request_messages = json.loads(request_messages)
-            assert len(request_messages) == 1
-            assert request_messages[0]["content"] == "Of course! How can I assist you?"
-
-            response_text = invoke_span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
-            assert response_text == expected_assistant_response
-
-            assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in invoke_span["data"]
-            tool_calls_data = invoke_span["data"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS]
-
-            if isinstance(tool_calls_data, str):
-                import json
-
-                tool_calls_data = json.loads(tool_calls_data)
-
-            assert len(tool_calls_data) == 1
-            assert tool_calls_data[0]["id"] == "call_test_123"
-            assert tool_calls_data[0]["function"]["name"] == "search_tool"
-        else:
-            assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in invoke_span.get("data", {})
-            assert SPANDATA.GEN_AI_RESPONSE_TEXT not in invoke_span.get("data", {})
-            assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in invoke_span.get(
-                "data", {}
-            )
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in invoke_span.get("data", {})
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in invoke_span.get("data", {})
+        assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in invoke_span.get("data", {})
 
 
 @pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
@@ -2022,13 +1937,7 @@ def __init__(self, content, message_type="human"):
     assert "ai" not in roles
 
 
-@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
-def test_langgraph_message_truncation(
-    sentry_init,
-    capture_events,
-    capture_items,
-    stream_gen_ai_spans,
-):
+def test_langgraph_message_truncation(sentry_init, capture_events):
     """Test that large messages are truncated properly in Langgraph integration."""
     import json
 
@@ -2036,8 +1945,8 @@ def test_langgraph_message_truncation(
         integrations=[LanggraphIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
+    events = capture_events()
 
     large_content = (
         "This is a very long message that will exceed our size limits. " * 1000
@@ -2057,66 +1966,28 @@ def test_langgraph_message_truncation(
     def original_invoke(self, *args, **kwargs):
         return {"messages": args[0].get("messages", [])}
 
-    if stream_gen_ai_spans:
-        items = capture_items("transaction", "span")
-
-        with start_transaction():
-            wrapped_invoke = _wrap_pregel_invoke(original_invoke)
-            result = wrapped_invoke(pregel, test_state)
-
-        assert result is not None
-
-        spans = [item.payload for item in items if item.type == "span"]
-        invoke_spans = [
-            span
-            for span in spans
-            if span["attributes"].get("sentry.op") == OP.GEN_AI_INVOKE_AGENT
-        ]
-
-        assert len(invoke_spans) > 0
+    with start_transaction():
+        wrapped_invoke = _wrap_pregel_invoke(original_invoke)
+        result = wrapped_invoke(pregel, test_state)
 
-        invoke_span = invoke_spans[0]
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["attributes"]
-
-        messages_data = invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-
-        assert isinstance(messages_data, str)
-
-        parsed_messages = json.loads(messages_data)
-        assert isinstance(parsed_messages, list)
-        assert len(parsed_messages) == 1
-        assert "small message 5" in str(parsed_messages[0])
-        (tx,) = (item.payload for item in items if item.type == "transaction")
-    else:
-        events = capture_events()
-
-        with start_transaction():
-            wrapped_invoke = _wrap_pregel_invoke(original_invoke)
-            result = wrapped_invoke(pregel, test_state)
-
-        assert result is not None
-
-        assert len(events) > 0
-        tx = events[0]
-        assert tx["type"] == "transaction"
-
-        invoke_spans = [
-            span
-            for span in tx.get("spans", [])
-            if span.get("op") == OP.GEN_AI_INVOKE_AGENT
-        ]
-
-        assert len(invoke_spans) > 0
+    assert result is not None
+    assert len(events) > 0
+    tx = events[0]
+    assert tx["type"] == "transaction"
 
-        invoke_span = invoke_spans[0]
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["data"]
+    invoke_spans = [
+        span for span in tx.get("spans", []) if span.get("op") == OP.GEN_AI_INVOKE_AGENT
+    ]
+    assert len(invoke_spans) > 0
 
-        messages_data = invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-        assert isinstance(messages_data, str)
+    invoke_span = invoke_spans[0]
+    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["data"]
 
-        parsed_messages = json.loads(messages_data)
-        assert isinstance(parsed_messages, list)
-        assert len(parsed_messages) == 1
-        assert "small message 5" in str(parsed_messages[0])
+    messages_data = invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+    assert isinstance(messages_data, str)
 
+    parsed_messages = json.loads(messages_data)
+    assert isinstance(parsed_messages, list)
+    assert len(parsed_messages) == 1
+    assert "small message 5" in str(parsed_messages[0])
     assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5
diff --git a/tests/integrations/litellm/test_litellm.py b/tests/integrations/litellm/test_litellm.py
index 8ae8dca99e..404cdeb9c4 100644
--- a/tests/integrations/litellm/test_litellm.py
+++ b/tests/integrations/litellm/test_litellm.py
@@ -2341,20 +2341,14 @@ def test_integration_setup(sentry_init):
     assert _failure_callback in (litellm.failure_callback or [])
 
 
-@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
-def test_litellm_message_truncation(
-    sentry_init,
-    capture_events,
-    capture_items,
-    stream_gen_ai_spans,
-):
+def test_litellm_message_truncation(sentry_init, capture_events):
     """Test that large messages are truncated properly in LiteLLM integration."""
     sentry_init(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
+    events = capture_events()
 
     large_content = (
         "This is a very long message that will exceed our size limits. " * 1000
@@ -2368,79 +2362,39 @@ def test_litellm_message_truncation(
     ]
     mock_response = MockCompletionResponse()
 
-    if stream_gen_ai_spans:
-        items = capture_items("transaction", "span")
-
-        with start_transaction(name="litellm test"):
-            kwargs = {
-                "model": "gpt-3.5-turbo",
-                "messages": messages,
-            }
-
-            _input_callback(kwargs)
-            _success_callback(
-                kwargs,
-                mock_response,
-                datetime.now(),
-                datetime.now(),
-            )
-
-        spans = [item.payload for item in items if item.type == "span"]
-        chat_spans = [
-            span
-            for span in spans
-            if span["attributes"].get("sentry.op") == OP.GEN_AI_CHAT
-        ]
-
-        assert len(chat_spans) > 0
-
-        chat_span = chat_spans[0]
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["attributes"]
-
-        messages_data = chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-        assert isinstance(messages_data, str)
-
-        parsed_messages = json.loads(messages_data)
-        assert isinstance(parsed_messages, list)
-        assert len(parsed_messages) == 1
-        assert "small message 5" in str(parsed_messages[0])
-        tx = next(item.payload for item in items if item.type == "transaction")
-    else:
-        events = capture_events()
-
-        with start_transaction(name="litellm test"):
-            kwargs = {
-                "model": "gpt-3.5-turbo",
-                "messages": messages,
-            }
+    with start_transaction(name="litellm test"):
+        kwargs = {
+            "model": "gpt-3.5-turbo",
+            "messages": messages,
+        }
 
-            _input_callback(kwargs)
-            _success_callback(
-                kwargs,
-                mock_response,
-                datetime.now(),
-                datetime.now(),
-            )
+        _input_callback(kwargs)
+        _success_callback(
+            kwargs,
+            mock_response,
+            datetime.now(),
+            datetime.now(),
+        )
 
-        assert len(events) > 0
-        tx = events[0]
-        assert tx["type"] == "transaction"
+    assert len(events) > 0
+    tx = events[0]
+    assert tx["type"] == "transaction"
 
-        chat_spans = [
-            span for span in tx.get("spans", []) if span.get("op") == OP.GEN_AI_CHAT
-        ]
-        assert len(chat_spans) > 0
+    chat_spans = [
+        span for span in tx.get("spans", []) if span.get("op") == OP.GEN_AI_CHAT
+    ]
+    assert len(chat_spans) > 0
 
-        chat_span = chat_spans[0]
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["data"]
+    chat_span = chat_spans[0]
+    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["data"]
 
-        messages_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-        assert isinstance(messages_data, str)
+    messages_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+    assert isinstance(messages_data, str)
 
-        parsed_messages = json.loads(messages_data)
-        assert isinstance(parsed_messages, list)
-        assert len(parsed_messages) == 1
-        assert "small message 5" in str(parsed_messages[0])
+    parsed_messages = json.loads(messages_data)
+    assert isinstance(parsed_messages, list)
+    assert len(parsed_messages) == 1
+    assert "small message 5" in str(parsed_messages[0])
     assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5
 
 
diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py
index 0da39e842d..56ac885619 100644
--- a/tests/integrations/openai/test_openai.py
+++ b/tests/integrations/openai/test_openai.py
@@ -5720,21 +5720,16 @@ def test_openai_message_role_mapping(
     assert stored_messages[0]["role"] == expected_role
 
 
-@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 def test_openai_message_truncation(
-    sentry_init,
-    capture_events,
-    capture_items,
-    nonstreaming_chat_completions_model_response,
-    stream_gen_ai_spans,
+    sentry_init, capture_events, nonstreaming_chat_completions_model_response
 ):
     """Test that large messages are truncated properly in OpenAI integration."""
     sentry_init(
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
+    events = capture_events()
 
     client = OpenAI(api_key="z")
     client.chat.completions._post = mock.Mock(
@@ -5761,47 +5756,22 @@ def test_openai_message_truncation(
         {"role": "user", "content": large_content},
     ]
 
-    if stream_gen_ai_spans:
-        items = capture_items("transaction", "span")
-
-        with start_transaction(name="openai tx"):
-            client.chat.completions.create(
-                model="some-model",
-                messages=large_messages,
-            )
-
-        span = next(item.payload for item in items if item.type == "span")
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"]
-
-        messages_data = span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-
-        assert isinstance(messages_data, str)
-
-        parsed_messages = json.loads(messages_data)
-        assert isinstance(parsed_messages, list)
-        assert len(parsed_messages) <= len(large_messages)
-
-        (event,) = (item.payload for item in items if item.type == "transaction")
-    else:
-        events = capture_events()
-
-        with start_transaction(name="openai tx"):
-            client.chat.completions.create(
-                model="some-model",
-                messages=large_messages,
-            )
-
-        (event,) = events
-        span = event["spans"][0]
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"]
+    with start_transaction(name="openai tx"):
+        client.chat.completions.create(
+            model="some-model",
+            messages=large_messages,
+        )
 
-        messages_data = span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+    (event,) = events
+    span = event["spans"][0]
+    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"]
 
-        assert isinstance(messages_data, str)
+    messages_data = span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+    assert isinstance(messages_data, str)
 
-        parsed_messages = json.loads(messages_data)
-        assert isinstance(parsed_messages, list)
-        assert len(parsed_messages) <= len(large_messages)
+    parsed_messages = json.loads(messages_data)
+    assert isinstance(parsed_messages, list)
+    assert len(parsed_messages) <= len(large_messages)
 
     meta_path = event["_meta"]
     span_meta = meta_path["spans"]["0"]["data"]
diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py
index 60f88cd7f4..f15bac5c64 100644
--- a/tests/integrations/openai_agents/test_openai_agents.py
+++ b/tests/integrations/openai_agents/test_openai_agents.py
@@ -1679,16 +1679,13 @@ async def test_max_turns_before_handoff_span(
         assert handoff_span["data"]["gen_ai.operation.name"] == "handoff"
 
 
-@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
 async def test_tool_execution_span(
     sentry_init,
     capture_events,
-    capture_items,
     test_agent,
     get_model_response,
     responses_tool_call_model_responses,
-    stream_gen_ai_spans,
 ):
     """
     Test tool execution span creation.
@@ -1746,413 +1743,195 @@ def simple_test_tool(message: str) -> str:
         serialize_pydantic=True,
     )
 
-    if stream_gen_ai_spans:
-        with patch.object(
-            agent_with_tool.model._client._client,
-            "send",
-            side_effect=[tool_response, final_response],
-        ) as _:
-            sentry_init(
-                integrations=[OpenAIAgentsIntegration()],
-                traces_sample_rate=1.0,
-                send_default_pii=True,
-                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
-            )
-
-            items = capture_items("transaction", "span")
-
-            await agents.Runner.run(
-                agent_with_tool,
-                "Please use the simple test tool",
-                run_config=test_run_config,
-            )
-
-        (transaction,) = (item.payload for item in items if item.type == "transaction")
-        assert transaction["transaction"] == "test_agent workflow"
-        assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
-
-        spans = [item.payload for item in items if item.type == "span"]
-        agent_span = next(
-            span
-            for span in spans
-            if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
-        )
-        ai_client_span1, ai_client_span2 = (
-            span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
-        )
-        tool_span = next(
-            span
-            for span in spans
-            if span["attributes"]["sentry.op"] == OP.GEN_AI_EXECUTE_TOOL
+    with patch.object(
+        agent_with_tool.model._client._client,
+        "send",
+        side_effect=[tool_response, final_response],
+    ) as _:
+        sentry_init(
+            integrations=[OpenAIAgentsIntegration()],
+            traces_sample_rate=1.0,
+            send_default_pii=True,
         )
 
-        available_tool = {
-            "name": "simple_test_tool",
-            "description": "A simple tool",
-            "params_json_schema": {
-                "properties": {"message": {"title": "Message", "type": "string"}},
-                "required": ["message"],
-                "title": "simple_test_tool_args",
-                "type": "object",
-                "additionalProperties": False,
-            },
-            "on_invoke_tool": mock.ANY,
-            "strict_json_schema": True,
-            "is_enabled": True,
-        }
-
-        if parse_version(OPENAI_AGENTS_VERSION) >= (0, 3, 3):
-            available_tool.update(
-                {"tool_input_guardrails": None, "tool_output_guardrails": None}
-            )
-
-        if parse_version(OPENAI_AGENTS_VERSION) >= (
-            0,
-            8,
-        ):
-            available_tool["needs_approval"] = False
-        if parse_version(OPENAI_AGENTS_VERSION) >= (
-            0,
-            9,
-            0,
-        ):
-            available_tool.update(
-                {
-                    "timeout_seconds": None,
-                    "timeout_behavior": "error_as_result",
-                    "timeout_error_function": None,
-                }
-            )
-
-        assert agent_span["name"] == "invoke_agent test_agent"
-        assert agent_span["attributes"]["sentry.origin"] == "auto.ai.openai_agents"
-        assert agent_span["attributes"]["gen_ai.agent.name"] == "test_agent"
-        assert agent_span["attributes"]["gen_ai.operation.name"] == "invoke_agent"
-
-        agent_span_available_tool = json.loads(
-            agent_span["attributes"]["gen_ai.request.available_tools"]
-        )[0]
-
-        assert all(agent_span_available_tool[k] == v for k, v in available_tool.items())
-
-        assert agent_span["attributes"]["gen_ai.request.max_tokens"] == 100
-        assert agent_span["attributes"]["gen_ai.request.model"] == "gpt-4"
-        assert agent_span["attributes"]["gen_ai.request.temperature"] == 0.7
-        assert agent_span["attributes"]["gen_ai.request.top_p"] == 1.0
-        assert agent_span["attributes"]["gen_ai.system"] == "openai"
-
-        assert ai_client_span1["name"] == "chat gpt-4"
-        assert ai_client_span1["attributes"]["gen_ai.operation.name"] == "chat"
-        assert ai_client_span1["attributes"]["gen_ai.system"] == "openai"
-        assert ai_client_span1["attributes"]["gen_ai.agent.name"] == "test_agent"
-
-        ai_client_span1_available_tool = json.loads(
-            ai_client_span1["attributes"]["gen_ai.request.available_tools"]
-        )[0]
+        events = capture_events()
 
-        assert all(
-            ai_client_span1_available_tool[k] == v for k, v in available_tool.items()
-        )
-
-        assert ai_client_span1["attributes"]["gen_ai.request.max_tokens"] == 100
-        assert ai_client_span1["attributes"][
-            "gen_ai.request.messages"
-        ] == safe_serialize(
-            [
-                {
-                    "role": "user",
-                    "content": [
-                        {"type": "text", "text": "Please use the simple test tool"}
-                    ],
-                },
-            ]
-        )
-        assert ai_client_span1["attributes"]["gen_ai.request.model"] == "gpt-4"
-        assert ai_client_span1["attributes"]["gen_ai.request.temperature"] == 0.7
-        assert ai_client_span1["attributes"]["gen_ai.request.top_p"] == 1.0
-        assert ai_client_span1["attributes"]["gen_ai.usage.input_tokens"] == 10
-        assert ai_client_span1["attributes"]["gen_ai.usage.input_tokens.cached"] == 0
-        assert ai_client_span1["attributes"]["gen_ai.usage.output_tokens"] == 5
-        assert (
-            ai_client_span1["attributes"]["gen_ai.usage.output_tokens.reasoning"] == 0
-        )
-        assert ai_client_span1["attributes"]["gen_ai.usage.total_tokens"] == 15
-
-        tool_call = {
-            "arguments": '{"message": "hello"}',
-            "call_id": "call_123",
-            "name": "simple_test_tool",
-            "type": "function_call",
-            "id": "call_123",
-            "status": None,
-        }
-
-        if OPENAI_VERSION >= (2, 25, 0):
-            tool_call["namespace"] = None
-
-        assert json.loads(
-            ai_client_span1["attributes"]["gen_ai.response.tool_calls"]
-        ) == [tool_call]
-
-        assert tool_span["name"] == "execute_tool simple_test_tool"
-        assert tool_span["attributes"]["gen_ai.agent.name"] == "test_agent"
-        assert tool_span["attributes"]["gen_ai.operation.name"] == "execute_tool"
-
-        tool_span_available_tool = json.loads(
-            tool_span["attributes"]["gen_ai.request.available_tools"]
-        )[0]
-
-        assert all(tool_span_available_tool[k] == v for k, v in available_tool.items())
-
-        assert tool_span["attributes"]["gen_ai.request.max_tokens"] == 100
-        assert tool_span["attributes"]["gen_ai.request.model"] == "gpt-4"
-        assert tool_span["attributes"]["gen_ai.request.temperature"] == 0.7
-        assert tool_span["attributes"]["gen_ai.request.top_p"] == 1.0
-        assert tool_span["attributes"]["gen_ai.system"] == "openai"
-        assert tool_span["attributes"]["gen_ai.tool.description"] == "A simple tool"
-        assert tool_span["attributes"]["gen_ai.tool.input"] == '{"message": "hello"}'
-        assert tool_span["attributes"]["gen_ai.tool.name"] == "simple_test_tool"
-        assert (
-            tool_span["attributes"]["gen_ai.tool.output"] == "Tool executed with: hello"
+        await agents.Runner.run(
+            agent_with_tool,
+            "Please use the simple test tool",
+            run_config=test_run_config,
         )
-        assert ai_client_span2["name"] == "chat gpt-4"
-        assert ai_client_span2["attributes"]["gen_ai.agent.name"] == "test_agent"
-        assert ai_client_span2["attributes"]["gen_ai.operation.name"] == "chat"
 
-        ai_client_span2_available_tool = json.loads(
-            ai_client_span2["attributes"]["gen_ai.request.available_tools"]
-        )[0]
-
-        assert all(
-            ai_client_span2_available_tool[k] == v for k, v in available_tool.items()
+    (transaction,) = events
+    spans = transaction["spans"]
+    agent_span = next(span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT)
+    ai_client_span1, ai_client_span2 = (
+        span for span in spans if span["op"] == OP.GEN_AI_CHAT
+    )
+    tool_span = next(span for span in spans if span["op"] == OP.GEN_AI_EXECUTE_TOOL)
+
+    available_tool = {
+        "name": "simple_test_tool",
+        "description": "A simple tool",
+        "params_json_schema": {
+            "properties": {"message": {"title": "Message", "type": "string"}},
+            "required": ["message"],
+            "title": "simple_test_tool_args",
+            "type": "object",
+            "additionalProperties": False,
+        },
+        "on_invoke_tool": mock.ANY,
+        "strict_json_schema": True,
+        "is_enabled": True,
+    }
+
+    if parse_version(OPENAI_AGENTS_VERSION) >= (0, 3, 3):
+        available_tool.update(
+            {"tool_input_guardrails": None, "tool_output_guardrails": None}
         )
 
-        assert ai_client_span2["attributes"]["gen_ai.request.max_tokens"] == 100
-        assert ai_client_span2["attributes"][
-            "gen_ai.request.messages"
-        ] == safe_serialize(
-            [
-                {
-                    "role": "tool",
-                    "content": [
-                        {
-                            "call_id": "call_123",
-                            "output": "Tool executed with: hello",
-                            "type": "function_call_output",
-                        }
-                    ],
-                },
-            ]
-        )
-        assert ai_client_span2["attributes"]["gen_ai.request.model"] == "gpt-4"
-        assert ai_client_span2["attributes"]["gen_ai.request.temperature"] == 0.7
-        assert ai_client_span2["attributes"]["gen_ai.request.top_p"] == 1.0
-        assert (
-            ai_client_span2["attributes"]["gen_ai.response.text"]
-            == "Task completed using the tool"
-        )
-        assert ai_client_span2["attributes"]["gen_ai.system"] == "openai"
-        assert ai_client_span2["attributes"]["gen_ai.usage.input_tokens.cached"] == 0
-        assert ai_client_span2["attributes"]["gen_ai.usage.input_tokens"] == 15
-        assert (
-            ai_client_span2["attributes"]["gen_ai.usage.output_tokens.reasoning"] == 0
+    if parse_version(OPENAI_AGENTS_VERSION) >= (
+        0,
+        8,
+    ):
+        available_tool["needs_approval"] = False
+    if parse_version(OPENAI_AGENTS_VERSION) >= (
+        0,
+        9,
+        0,
+    ):
+        available_tool.update(
+            {
+                "timeout_seconds": None,
+                "timeout_behavior": "error_as_result",
+                "timeout_error_function": None,
+            }
         )
-        assert ai_client_span2["attributes"]["gen_ai.usage.output_tokens"] == 10
-        assert ai_client_span2["attributes"]["gen_ai.usage.total_tokens"] == 25
 
-    else:
-        with patch.object(
-            agent_with_tool.model._client._client,
-            "send",
-            side_effect=[tool_response, final_response],
-        ) as _:
-            sentry_init(
-                integrations=[OpenAIAgentsIntegration()],
-                traces_sample_rate=1.0,
-                send_default_pii=True,
-                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
-            )
-
-            events = capture_events()
-
-            await agents.Runner.run(
-                agent_with_tool,
-                "Please use the simple test tool",
-                run_config=test_run_config,
-            )
+    assert transaction["transaction"] == "test_agent workflow"
+    assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
+
+    assert agent_span["description"] == "invoke_agent test_agent"
+    assert agent_span["origin"] == "auto.ai.openai_agents"
+    assert agent_span["data"]["gen_ai.agent.name"] == "test_agent"
+    assert agent_span["data"]["gen_ai.operation.name"] == "invoke_agent"
+
+    agent_span_available_tool = json.loads(
+        agent_span["data"]["gen_ai.request.available_tools"]
+    )[0]
+    assert all(agent_span_available_tool[k] == v for k, v in available_tool.items())
+
+    assert agent_span["data"]["gen_ai.request.max_tokens"] == 100
+    assert agent_span["data"]["gen_ai.request.model"] == "gpt-4"
+    assert agent_span["data"]["gen_ai.request.temperature"] == 0.7
+    assert agent_span["data"]["gen_ai.request.top_p"] == 1.0
+    assert agent_span["data"]["gen_ai.system"] == "openai"
+
+    assert ai_client_span1["description"] == "chat gpt-4"
+    assert ai_client_span1["data"]["gen_ai.operation.name"] == "chat"
+    assert ai_client_span1["data"]["gen_ai.system"] == "openai"
+    assert ai_client_span1["data"]["gen_ai.agent.name"] == "test_agent"
+
+    ai_client_span1_available_tool = json.loads(
+        ai_client_span1["data"]["gen_ai.request.available_tools"]
+    )[0]
+    assert all(
+        ai_client_span1_available_tool[k] == v for k, v in available_tool.items()
+    )
 
-        (transaction,) = events
-        spans = transaction["spans"]
-        agent_span = next(
-            span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT
-        )
-        ai_client_span1, ai_client_span2 = (
-            span for span in spans if span["op"] == OP.GEN_AI_CHAT
-        )
-        tool_span = next(span for span in spans if span["op"] == OP.GEN_AI_EXECUTE_TOOL)
-
-        available_tool = {
-            "name": "simple_test_tool",
-            "description": "A simple tool",
-            "params_json_schema": {
-                "properties": {"message": {"title": "Message", "type": "string"}},
-                "required": ["message"],
-                "title": "simple_test_tool_args",
-                "type": "object",
-                "additionalProperties": False,
+    assert ai_client_span1["data"]["gen_ai.request.max_tokens"] == 100
+    assert ai_client_span1["data"]["gen_ai.request.messages"] == safe_serialize(
+        [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": "Please use the simple test tool"}
+                ],
             },
-            "on_invoke_tool": mock.ANY,
-            "strict_json_schema": True,
-            "is_enabled": True,
-        }
-
-        if parse_version(OPENAI_AGENTS_VERSION) >= (0, 3, 3):
-            available_tool.update(
-                {"tool_input_guardrails": None, "tool_output_guardrails": None}
-            )
-
-        if parse_version(OPENAI_AGENTS_VERSION) >= (
-            0,
-            8,
-        ):
-            available_tool["needs_approval"] = False
-        if parse_version(OPENAI_AGENTS_VERSION) >= (
-            0,
-            9,
-            0,
-        ):
-            available_tool.update(
-                {
-                    "timeout_seconds": None,
-                    "timeout_behavior": "error_as_result",
-                    "timeout_error_function": None,
-                }
-            )
-
-        assert transaction["transaction"] == "test_agent workflow"
-        assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
-
-        assert agent_span["description"] == "invoke_agent test_agent"
-        assert agent_span["origin"] == "auto.ai.openai_agents"
-        assert agent_span["data"]["gen_ai.agent.name"] == "test_agent"
-        assert agent_span["data"]["gen_ai.operation.name"] == "invoke_agent"
-
-        agent_span_available_tool = json.loads(
-            agent_span["data"]["gen_ai.request.available_tools"]
-        )[0]
-        assert all(agent_span_available_tool[k] == v for k, v in available_tool.items())
-
-        assert agent_span["data"]["gen_ai.request.max_tokens"] == 100
-        assert agent_span["data"]["gen_ai.request.model"] == "gpt-4"
-        assert agent_span["data"]["gen_ai.request.temperature"] == 0.7
-        assert agent_span["data"]["gen_ai.request.top_p"] == 1.0
-        assert agent_span["data"]["gen_ai.system"] == "openai"
-
-        assert ai_client_span1["description"] == "chat gpt-4"
-        assert ai_client_span1["data"]["gen_ai.operation.name"] == "chat"
-        assert ai_client_span1["data"]["gen_ai.system"] == "openai"
-        assert ai_client_span1["data"]["gen_ai.agent.name"] == "test_agent"
-
-        ai_client_span1_available_tool = json.loads(
-            ai_client_span1["data"]["gen_ai.request.available_tools"]
-        )[0]
-        assert all(
-            ai_client_span1_available_tool[k] == v for k, v in available_tool.items()
-        )
-
-        assert ai_client_span1["data"]["gen_ai.request.max_tokens"] == 100
-        assert ai_client_span1["data"]["gen_ai.request.messages"] == safe_serialize(
-            [
-                {
-                    "role": "user",
-                    "content": [
-                        {"type": "text", "text": "Please use the simple test tool"}
-                    ],
-                },
-            ]
-        )
-        assert ai_client_span1["data"]["gen_ai.request.model"] == "gpt-4"
-        assert ai_client_span1["data"]["gen_ai.request.temperature"] == 0.7
-        assert ai_client_span1["data"]["gen_ai.request.top_p"] == 1.0
-        assert ai_client_span1["data"]["gen_ai.usage.input_tokens"] == 10
-        assert ai_client_span1["data"]["gen_ai.usage.input_tokens.cached"] == 0
-        assert ai_client_span1["data"]["gen_ai.usage.output_tokens"] == 5
-        assert ai_client_span1["data"]["gen_ai.usage.output_tokens.reasoning"] == 0
-        assert ai_client_span1["data"]["gen_ai.usage.total_tokens"] == 15
-
-        tool_call = {
-            "arguments": '{"message": "hello"}',
-            "call_id": "call_123",
-            "name": "simple_test_tool",
-            "type": "function_call",
-            "id": "call_123",
-            "status": None,
-        }
-
-        if OPENAI_VERSION >= (2, 25, 0):
-            tool_call["namespace"] = None
-
-        assert json.loads(ai_client_span1["data"]["gen_ai.response.tool_calls"]) == [
-            tool_call
         ]
+    )
+    assert ai_client_span1["data"]["gen_ai.request.model"] == "gpt-4"
+    assert ai_client_span1["data"]["gen_ai.request.temperature"] == 0.7
+    assert ai_client_span1["data"]["gen_ai.request.top_p"] == 1.0
+    assert ai_client_span1["data"]["gen_ai.usage.input_tokens"] == 10
+    assert ai_client_span1["data"]["gen_ai.usage.input_tokens.cached"] == 0
+    assert ai_client_span1["data"]["gen_ai.usage.output_tokens"] == 5
+    assert ai_client_span1["data"]["gen_ai.usage.output_tokens.reasoning"] == 0
+    assert ai_client_span1["data"]["gen_ai.usage.total_tokens"] == 15
+
+    tool_call = {
+        "arguments": '{"message": "hello"}',
+        "call_id": "call_123",
+        "name": "simple_test_tool",
+        "type": "function_call",
+        "id": "call_123",
+        "status": None,
+    }
+
+    if OPENAI_VERSION >= (2, 25, 0):
+        tool_call["namespace"] = None
+
+    assert json.loads(ai_client_span1["data"]["gen_ai.response.tool_calls"]) == [
+        tool_call
+    ]
 
-        assert tool_span["description"] == "execute_tool simple_test_tool"
-        assert tool_span["data"]["gen_ai.agent.name"] == "test_agent"
-        assert tool_span["data"]["gen_ai.operation.name"] == "execute_tool"
-
-        tool_span_available_tool = json.loads(
-            tool_span["data"]["gen_ai.request.available_tools"]
-        )[0]
-        assert all(tool_span_available_tool[k] == v for k, v in available_tool.items())
-
-        assert tool_span["data"]["gen_ai.request.max_tokens"] == 100
-        assert tool_span["data"]["gen_ai.request.model"] == "gpt-4"
-        assert tool_span["data"]["gen_ai.request.temperature"] == 0.7
-        assert tool_span["data"]["gen_ai.request.top_p"] == 1.0
-        assert tool_span["data"]["gen_ai.system"] == "openai"
-        assert tool_span["data"]["gen_ai.tool.description"] == "A simple tool"
-        assert tool_span["data"]["gen_ai.tool.input"] == '{"message": "hello"}'
-        assert tool_span["data"]["gen_ai.tool.name"] == "simple_test_tool"
-        assert tool_span["data"]["gen_ai.tool.output"] == "Tool executed with: hello"
-        assert ai_client_span2["description"] == "chat gpt-4"
-        assert ai_client_span2["data"]["gen_ai.agent.name"] == "test_agent"
-        assert ai_client_span2["data"]["gen_ai.operation.name"] == "chat"
-
-        ai_client_span2_available_tool = json.loads(
-            ai_client_span2["data"]["gen_ai.request.available_tools"]
-        )[0]
-        assert all(
-            ai_client_span2_available_tool[k] == v for k, v in available_tool.items()
-        )
+    assert tool_span["description"] == "execute_tool simple_test_tool"
+    assert tool_span["data"]["gen_ai.agent.name"] == "test_agent"
+    assert tool_span["data"]["gen_ai.operation.name"] == "execute_tool"
+
+    tool_span_available_tool = json.loads(
+        tool_span["data"]["gen_ai.request.available_tools"]
+    )[0]
+    assert all(tool_span_available_tool[k] == v for k, v in available_tool.items())
+
+    assert tool_span["data"]["gen_ai.request.max_tokens"] == 100
+    assert tool_span["data"]["gen_ai.request.model"] == "gpt-4"
+    assert tool_span["data"]["gen_ai.request.temperature"] == 0.7
+    assert tool_span["data"]["gen_ai.request.top_p"] == 1.0
+    assert tool_span["data"]["gen_ai.system"] == "openai"
+    assert tool_span["data"]["gen_ai.tool.description"] == "A simple tool"
+    assert tool_span["data"]["gen_ai.tool.input"] == '{"message": "hello"}'
+    assert tool_span["data"]["gen_ai.tool.name"] == "simple_test_tool"
+    assert tool_span["data"]["gen_ai.tool.output"] == "Tool executed with: hello"
+    assert ai_client_span2["description"] == "chat gpt-4"
+    assert ai_client_span2["data"]["gen_ai.agent.name"] == "test_agent"
+    assert ai_client_span2["data"]["gen_ai.operation.name"] == "chat"
+
+    ai_client_span2_available_tool = json.loads(
+        ai_client_span2["data"]["gen_ai.request.available_tools"]
+    )[0]
+    assert all(
+        ai_client_span2_available_tool[k] == v for k, v in available_tool.items()
+    )
 
-        assert ai_client_span2["data"]["gen_ai.request.max_tokens"] == 100
-        assert ai_client_span2["data"]["gen_ai.request.messages"] == safe_serialize(
-            [
-                {
-                    "role": "tool",
-                    "content": [
-                        {
-                            "call_id": "call_123",
-                            "output": "Tool executed with: hello",
-                            "type": "function_call_output",
-                        }
-                    ],
-                },
-            ]
-        )
-        assert ai_client_span2["data"]["gen_ai.request.model"] == "gpt-4"
-        assert ai_client_span2["data"]["gen_ai.request.temperature"] == 0.7
-        assert ai_client_span2["data"]["gen_ai.request.top_p"] == 1.0
-        assert (
-            ai_client_span2["data"]["gen_ai.response.text"]
-            == "Task completed using the tool"
-        )
-        assert ai_client_span2["data"]["gen_ai.system"] == "openai"
-        assert ai_client_span2["data"]["gen_ai.usage.input_tokens.cached"] == 0
-        assert ai_client_span2["data"]["gen_ai.usage.input_tokens"] == 15
-        assert ai_client_span2["data"]["gen_ai.usage.output_tokens.reasoning"] == 0
-        assert ai_client_span2["data"]["gen_ai.usage.output_tokens"] == 10
-        assert ai_client_span2["data"]["gen_ai.usage.total_tokens"] == 25
+    assert ai_client_span2["data"]["gen_ai.request.max_tokens"] == 100
+    assert ai_client_span2["data"]["gen_ai.request.messages"] == safe_serialize(
+        [
+            {
+                "role": "tool",
+                "content": [
+                    {
+                        "call_id": "call_123",
+                        "output": "Tool executed with: hello",
+                        "type": "function_call_output",
+                    }
+                ],
+            },
+        ]
+    )
+    assert ai_client_span2["data"]["gen_ai.request.model"] == "gpt-4"
+    assert ai_client_span2["data"]["gen_ai.request.temperature"] == 0.7
+    assert ai_client_span2["data"]["gen_ai.request.top_p"] == 1.0
+    assert (
+        ai_client_span2["data"]["gen_ai.response.text"]
+        == "Task completed using the tool"
+    )
+    assert ai_client_span2["data"]["gen_ai.system"] == "openai"
+    assert ai_client_span2["data"]["gen_ai.usage.input_tokens.cached"] == 0
+    assert ai_client_span2["data"]["gen_ai.usage.input_tokens"] == 15
+    assert ai_client_span2["data"]["gen_ai.usage.output_tokens.reasoning"] == 0
+    assert ai_client_span2["data"]["gen_ai.usage.output_tokens"] == 10
+    assert ai_client_span2["data"]["gen_ai.usage.total_tokens"] == 25
 
 
 @pytest.mark.asyncio

From 5e8c254da212e907d24571911fe54dc9555b074d Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Tue, 12 May 2026 15:59:06 +0200
Subject: [PATCH 02/10] add pytest mark asyncio

---
 tests/integrations/anthropic/test_anthropic.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/integrations/anthropic/test_anthropic.py b/tests/integrations/anthropic/test_anthropic.py
index 02de047711..21e6c95100 100644
--- a/tests/integrations/anthropic/test_anthropic.py
+++ b/tests/integrations/anthropic/test_anthropic.py
@@ -3676,6 +3676,7 @@ def test_anthropic_message_truncation(sentry_init, capture_events):
     assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5
 
 
+@pytest.mark.asyncio
 async def test_anthropic_message_truncation_async(sentry_init, capture_events):
     """Test that large messages are truncated properly in Anthropic integration."""
     sentry_init(

From c948c14af1ef232d7653444d74683c46a85bbff5 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Tue, 12 May 2026 20:12:15 +0200
Subject: [PATCH 03/10] update to non-experimental option

---
 sentry_sdk/ai/utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sentry_sdk/ai/utils.py b/sentry_sdk/ai/utils.py
index 4bd65ced76..fb9edcd335 100644
--- a/sentry_sdk/ai/utils.py
+++ b/sentry_sdk/ai/utils.py
@@ -742,7 +742,7 @@ def truncate_and_annotate_messages(
     max_single_message_chars: int = MAX_SINGLE_MESSAGE_CONTENT_CHARS,
 ) -> "Optional[List[Dict[str, Any]]]":
     client = sentry_sdk.get_client()
-    if client.options["_experiments"].get("stream_gen_ai_spans", False):
+    if client.options.get("stream_gen_ai_spans", False):
         return messages
 
     if not messages:
@@ -766,7 +766,7 @@ def truncate_and_annotate_embedding_inputs(
     max_bytes: int = MAX_GEN_AI_MESSAGE_BYTES,
 ) -> "Optional[List[Dict[str, Any]]]":
     client = sentry_sdk.get_client()
-    if client.options["_experiments"].get("stream_gen_ai_spans", False):
+    if client.options.get("stream_gen_ai_spans", False):
         return messages
 
     if not messages:

From 398559b8d8b87ed712b52651dbbbc5bdc4ad94b9 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Tue, 12 May 2026 20:32:39 +0200
Subject: [PATCH 04/10] restore legitimate test

---
 .../openai_agents/test_openai_agents.py       | 589 ++++++++++++------
 1 file changed, 414 insertions(+), 175 deletions(-)

diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py
index aa2dcab76e..4752ac0376 100644
--- a/tests/integrations/openai_agents/test_openai_agents.py
+++ b/tests/integrations/openai_agents/test_openai_agents.py
@@ -1679,13 +1679,16 @@ async def test_max_turns_before_handoff_span(
         assert handoff_span["data"]["gen_ai.operation.name"] == "handoff"
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
 async def test_tool_execution_span(
     sentry_init,
     capture_events,
+    capture_items,
     test_agent,
     get_model_response,
     responses_tool_call_model_responses,
+    stream_gen_ai_spans,
 ):
     """
     Test tool execution span creation.
@@ -1743,195 +1746,431 @@ def simple_test_tool(message: str) -> str:
         serialize_pydantic=True,
     )
 
-    with patch.object(
-        agent_with_tool.model._client._client,
-        "send",
-        side_effect=[tool_response, final_response],
-    ) as _:
-        sentry_init(
-            integrations=[OpenAIAgentsIntegration()],
-            traces_sample_rate=1.0,
-            send_default_pii=True,
+    if stream_gen_ai_spans:
+        with patch.object(
+            agent_with_tool.model._client._client,
+            "send",
+            side_effect=[tool_response, final_response],
+        ) as _:
+            sentry_init(
+                integrations=[OpenAIAgentsIntegration()],
+                traces_sample_rate=1.0,
+                send_default_pii=True,
+                stream_gen_ai_spans=stream_gen_ai_spans,
+            )
+
+            items = capture_items("transaction", "span")
+
+            await agents.Runner.run(
+                agent_with_tool,
+                "Please use the simple test tool",
+                run_config=test_run_config,
+            )
+
+        (transaction,) = (item.payload for item in items if item.type == "transaction")
+        assert transaction["transaction"] == "test_agent workflow"
+        assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
+
+        spans = [item.payload for item in items if item.type == "span"]
+        agent_span = next(
+            span
+            for span in spans
+            if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
+        )
+        ai_client_span1, ai_client_span2 = (
+            span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+        )
+        tool_span = next(
+            span
+            for span in spans
+            if span["attributes"]["sentry.op"] == OP.GEN_AI_EXECUTE_TOOL
         )
 
-        events = capture_events()
+        available_tool = {
+            "name": "simple_test_tool",
+            "description": "A simple tool",
+            "params_json_schema": {
+                "properties": {"message": {"title": "Message", "type": "string"}},
+                "required": ["message"],
+                "title": "simple_test_tool_args",
+                "type": "object",
+                "additionalProperties": False,
+            },
+            "on_invoke_tool": mock.ANY,
+            "strict_json_schema": True,
+            "is_enabled": True,
+        }
+
+        if parse_version(OPENAI_AGENTS_VERSION) >= (0, 3, 3):
+            available_tool.update(
+                {"tool_input_guardrails": None, "tool_output_guardrails": None}
+            )
+
+        if parse_version(OPENAI_AGENTS_VERSION) >= (
+            0,
+            8,
+        ):
+            available_tool["needs_approval"] = False
+        if parse_version(OPENAI_AGENTS_VERSION) >= (
+            0,
+            9,
+            0,
+        ):
+            available_tool.update(
+                {
+                    "timeout_seconds": None,
+                    "timeout_behavior": "error_as_result",
+                    "timeout_error_function": None,
+                }
+            )
 
-        await agents.Runner.run(
-            agent_with_tool,
-            "Please use the simple test tool",
-            run_config=test_run_config,
+        assert agent_span["name"] == "invoke_agent test_agent"
+        assert agent_span["attributes"]["sentry.origin"] == "auto.ai.openai_agents"
+        assert agent_span["attributes"]["gen_ai.agent.name"] == "test_agent"
+        assert agent_span["attributes"]["gen_ai.operation.name"] == "invoke_agent"
+
+        agent_span_available_tool = json.loads(
+            agent_span["attributes"]["gen_ai.request.available_tools"]
+        )[0]
+
+        assert all(agent_span_available_tool[k] == v for k, v in available_tool.items())
+
+        assert agent_span["attributes"]["gen_ai.request.max_tokens"] == 100
+        assert agent_span["attributes"]["gen_ai.request.model"] == "gpt-4"
+        assert agent_span["attributes"]["gen_ai.request.temperature"] == 0.7
+        assert agent_span["attributes"]["gen_ai.request.top_p"] == 1.0
+        assert agent_span["attributes"]["gen_ai.system"] == "openai"
+
+        assert ai_client_span1["name"] == "chat gpt-4"
+        assert ai_client_span1["attributes"]["gen_ai.operation.name"] == "chat"
+        assert ai_client_span1["attributes"]["gen_ai.system"] == "openai"
+        assert ai_client_span1["attributes"]["gen_ai.agent.name"] == "test_agent"
+
+        ai_client_span1_available_tool = json.loads(
+            ai_client_span1["attributes"]["gen_ai.request.available_tools"]
+        )[0]
+
+        assert all(
+            ai_client_span1_available_tool[k] == v for k, v in available_tool.items()
         )
 
-    (transaction,) = events
-    spans = transaction["spans"]
-    agent_span = next(span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT)
-    ai_client_span1, ai_client_span2 = (
-        span for span in spans if span["op"] == OP.GEN_AI_CHAT
-    )
-    tool_span = next(span for span in spans if span["op"] == OP.GEN_AI_EXECUTE_TOOL)
-
-    available_tool = {
-        "name": "simple_test_tool",
-        "description": "A simple tool",
-        "params_json_schema": {
-            "properties": {"message": {"title": "Message", "type": "string"}},
-            "required": ["message"],
-            "title": "simple_test_tool_args",
-            "type": "object",
-            "additionalProperties": False,
-        },
-        "on_invoke_tool": mock.ANY,
-        "strict_json_schema": True,
-        "is_enabled": True,
-    }
-
-    if parse_version(OPENAI_AGENTS_VERSION) >= (0, 3, 3):
-        available_tool.update(
-            {"tool_input_guardrails": None, "tool_output_guardrails": None}
+        assert ai_client_span1["attributes"]["gen_ai.request.max_tokens"] == 100
+        assert ai_client_span1["attributes"][
+            "gen_ai.request.messages"
+        ] == safe_serialize(
+            [
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "text", "text": "Please use the simple test tool"}
+                    ],
+                },
+            ]
+        )
+        assert ai_client_span1["attributes"]["gen_ai.request.model"] == "gpt-4"
+        assert ai_client_span1["attributes"]["gen_ai.request.temperature"] == 0.7
+        assert ai_client_span1["attributes"]["gen_ai.request.top_p"] == 1.0
+        assert ai_client_span1["attributes"]["gen_ai.usage.input_tokens"] == 10
+        assert ai_client_span1["attributes"]["gen_ai.usage.input_tokens.cached"] == 0
+        assert ai_client_span1["attributes"]["gen_ai.usage.output_tokens"] == 5
+        assert (
+            ai_client_span1["attributes"]["gen_ai.usage.output_tokens.reasoning"] == 0
+        )
+        assert ai_client_span1["attributes"]["gen_ai.usage.total_tokens"] == 15
+
+        tool_call = {
+            "arguments": '{"message": "hello"}',
+            "call_id": "call_123",
+            "name": "simple_test_tool",
+            "type": "function_call",
+            "id": "call_123",
+            "status": None,
+        }
+
+        if OPENAI_VERSION >= (2, 25, 0):
+            tool_call["namespace"] = None
+
+        assert json.loads(
+            ai_client_span1["attributes"]["gen_ai.response.tool_calls"]
+        ) == [tool_call]
+
+        assert tool_span["name"] == "execute_tool simple_test_tool"
+        assert tool_span["attributes"]["gen_ai.agent.name"] == "test_agent"
+        assert tool_span["attributes"]["gen_ai.operation.name"] == "execute_tool"
+
+        tool_span_available_tool = json.loads(
+            tool_span["attributes"]["gen_ai.request.available_tools"]
+        )[0]
+
+        assert all(tool_span_available_tool[k] == v for k, v in available_tool.items())
+
+        assert tool_span["attributes"]["gen_ai.request.max_tokens"] == 100
+        assert tool_span["attributes"]["gen_ai.request.model"] == "gpt-4"
+        assert tool_span["attributes"]["gen_ai.request.temperature"] == 0.7
+        assert tool_span["attributes"]["gen_ai.request.top_p"] == 1.0
+        assert tool_span["attributes"]["gen_ai.system"] == "openai"
+        assert tool_span["attributes"]["gen_ai.tool.description"] == "A simple tool"
+        assert tool_span["attributes"]["gen_ai.tool.input"] == '{"message": "hello"}'
+        assert tool_span["attributes"]["gen_ai.tool.name"] == "simple_test_tool"
+        assert (
+            tool_span["attributes"]["gen_ai.tool.output"] == "Tool executed with: hello"
         )
+        assert ai_client_span2["name"] == "chat gpt-4"
+        assert ai_client_span2["attributes"]["gen_ai.agent.name"] == "test_agent"
+        assert ai_client_span2["attributes"]["gen_ai.operation.name"] == "chat"
 
-    if parse_version(OPENAI_AGENTS_VERSION) >= (
-        0,
-        8,
-    ):
-        available_tool["needs_approval"] = False
-    if parse_version(OPENAI_AGENTS_VERSION) >= (
-        0,
-        9,
-        0,
-    ):
-        available_tool.update(
-            {
-                "timeout_seconds": None,
-                "timeout_behavior": "error_as_result",
-                "timeout_error_function": None,
-            }
+        ai_client_span2_available_tool = json.loads(
+            ai_client_span2["attributes"]["gen_ai.request.available_tools"]
+        )[0]
+
+        assert all(
+            ai_client_span2_available_tool[k] == v for k, v in available_tool.items()
         )
 
-    assert transaction["transaction"] == "test_agent workflow"
-    assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
-
-    assert agent_span["description"] == "invoke_agent test_agent"
-    assert agent_span["origin"] == "auto.ai.openai_agents"
-    assert agent_span["data"]["gen_ai.agent.name"] == "test_agent"
-    assert agent_span["data"]["gen_ai.operation.name"] == "invoke_agent"
-
-    agent_span_available_tool = json.loads(
-        agent_span["data"]["gen_ai.request.available_tools"]
-    )[0]
-    assert all(agent_span_available_tool[k] == v for k, v in available_tool.items())
-
-    assert agent_span["data"]["gen_ai.request.max_tokens"] == 100
-    assert agent_span["data"]["gen_ai.request.model"] == "gpt-4"
-    assert agent_span["data"]["gen_ai.request.temperature"] == 0.7
-    assert agent_span["data"]["gen_ai.request.top_p"] == 1.0
-    assert agent_span["data"]["gen_ai.system"] == "openai"
-
-    assert ai_client_span1["description"] == "chat gpt-4"
-    assert ai_client_span1["data"]["gen_ai.operation.name"] == "chat"
-    assert ai_client_span1["data"]["gen_ai.system"] == "openai"
-    assert ai_client_span1["data"]["gen_ai.agent.name"] == "test_agent"
-
-    ai_client_span1_available_tool = json.loads(
-        ai_client_span1["data"]["gen_ai.request.available_tools"]
-    )[0]
-    assert all(
-        ai_client_span1_available_tool[k] == v for k, v in available_tool.items()
-    )
+        assert ai_client_span2["attributes"]["gen_ai.request.max_tokens"] == 100
+        assert ai_client_span2["attributes"][
+            "gen_ai.request.messages"
+        ] == safe_serialize(
+            [
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "text", "text": "Please use the simple test tool"}
+                    ],
+                },
+                {
+                    "role": "assistant",
+                    "content": [
+                        {
+                            "arguments": '{"message": "hello"}',
+                            "call_id": "call_123",
+                            "name": "simple_test_tool",
+                            "type": "function_call",
+                            "id": "call_123",
+                        }
+                    ],
+                },
+                {
+                    "role": "tool",
+                    "content": [
+                        {
+                            "call_id": "call_123",
+                            "output": "Tool executed with: hello",
+                            "type": "function_call_output",
+                        }
+                    ],
+                },
+            ]
+        )
+        assert ai_client_span2["attributes"]["gen_ai.request.model"] == "gpt-4"
+        assert ai_client_span2["attributes"]["gen_ai.request.temperature"] == 0.7
+        assert ai_client_span2["attributes"]["gen_ai.request.top_p"] == 1.0
+        assert (
+            ai_client_span2["attributes"]["gen_ai.response.text"]
+            == "Task completed using the tool"
+        )
+        assert ai_client_span2["attributes"]["gen_ai.system"] == "openai"
+        assert ai_client_span2["attributes"]["gen_ai.usage.input_tokens.cached"] == 0
+        assert ai_client_span2["attributes"]["gen_ai.usage.input_tokens"] == 15
+        assert (
+            ai_client_span2["attributes"]["gen_ai.usage.output_tokens.reasoning"] == 0
+        )
+        assert ai_client_span2["attributes"]["gen_ai.usage.output_tokens"] == 10
+        assert ai_client_span2["attributes"]["gen_ai.usage.total_tokens"] == 25
 
-    assert ai_client_span1["data"]["gen_ai.request.max_tokens"] == 100
-    assert ai_client_span1["data"]["gen_ai.request.messages"] == safe_serialize(
-        [
-            {
-                "role": "user",
-                "content": [
-                    {"type": "text", "text": "Please use the simple test tool"}
-                ],
-            },
-        ]
-    )
-    assert ai_client_span1["data"]["gen_ai.request.model"] == "gpt-4"
-    assert ai_client_span1["data"]["gen_ai.request.temperature"] == 0.7
-    assert ai_client_span1["data"]["gen_ai.request.top_p"] == 1.0
-    assert ai_client_span1["data"]["gen_ai.usage.input_tokens"] == 10
-    assert ai_client_span1["data"]["gen_ai.usage.input_tokens.cached"] == 0
-    assert ai_client_span1["data"]["gen_ai.usage.output_tokens"] == 5
-    assert ai_client_span1["data"]["gen_ai.usage.output_tokens.reasoning"] == 0
-    assert ai_client_span1["data"]["gen_ai.usage.total_tokens"] == 15
-
-    tool_call = {
-        "arguments": '{"message": "hello"}',
-        "call_id": "call_123",
-        "name": "simple_test_tool",
-        "type": "function_call",
-        "id": "call_123",
-        "status": None,
-    }
-
-    if OPENAI_VERSION >= (2, 25, 0):
-        tool_call["namespace"] = None
-
-    assert json.loads(ai_client_span1["data"]["gen_ai.response.tool_calls"]) == [
-        tool_call
-    ]
+    else:
+        with patch.object(
+            agent_with_tool.model._client._client,
+            "send",
+            side_effect=[tool_response, final_response],
+        ) as _:
+            sentry_init(
+                integrations=[OpenAIAgentsIntegration()],
+                traces_sample_rate=1.0,
+                send_default_pii=True,
+                stream_gen_ai_spans=stream_gen_ai_spans,
+            )
 
-    assert tool_span["description"] == "execute_tool simple_test_tool"
-    assert tool_span["data"]["gen_ai.agent.name"] == "test_agent"
-    assert tool_span["data"]["gen_ai.operation.name"] == "execute_tool"
-
-    tool_span_available_tool = json.loads(
-        tool_span["data"]["gen_ai.request.available_tools"]
-    )[0]
-    assert all(tool_span_available_tool[k] == v for k, v in available_tool.items())
-
-    assert tool_span["data"]["gen_ai.request.max_tokens"] == 100
-    assert tool_span["data"]["gen_ai.request.model"] == "gpt-4"
-    assert tool_span["data"]["gen_ai.request.temperature"] == 0.7
-    assert tool_span["data"]["gen_ai.request.top_p"] == 1.0
-    assert tool_span["data"]["gen_ai.system"] == "openai"
-    assert tool_span["data"]["gen_ai.tool.description"] == "A simple tool"
-    assert tool_span["data"]["gen_ai.tool.input"] == '{"message": "hello"}'
-    assert tool_span["data"]["gen_ai.tool.name"] == "simple_test_tool"
-    assert tool_span["data"]["gen_ai.tool.output"] == "Tool executed with: hello"
-    assert ai_client_span2["description"] == "chat gpt-4"
-    assert ai_client_span2["data"]["gen_ai.agent.name"] == "test_agent"
-    assert ai_client_span2["data"]["gen_ai.operation.name"] == "chat"
-
-    ai_client_span2_available_tool = json.loads(
-        ai_client_span2["data"]["gen_ai.request.available_tools"]
-    )[0]
-    assert all(
-        ai_client_span2_available_tool[k] == v for k, v in available_tool.items()
-    )
+            events = capture_events()
 
-    assert ai_client_span2["data"]["gen_ai.request.max_tokens"] == 100
-    assert ai_client_span2["data"]["gen_ai.request.messages"] == safe_serialize(
-        [
-            {
-                "role": "tool",
-                "content": [
-                    {
-                        "call_id": "call_123",
-                        "output": "Tool executed with: hello",
-                        "type": "function_call_output",
-                    }
-                ],
+            await agents.Runner.run(
+                agent_with_tool,
+                "Please use the simple test tool",
+                run_config=test_run_config,
+            )
+
+        (transaction,) = events
+        spans = transaction["spans"]
+        agent_span = next(
+            span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT
+        )
+        ai_client_span1, ai_client_span2 = (
+            span for span in spans if span["op"] == OP.GEN_AI_CHAT
+        )
+        tool_span = next(span for span in spans if span["op"] == OP.GEN_AI_EXECUTE_TOOL)
+
+        available_tool = {
+            "name": "simple_test_tool",
+            "description": "A simple tool",
+            "params_json_schema": {
+                "properties": {"message": {"title": "Message", "type": "string"}},
+                "required": ["message"],
+                "title": "simple_test_tool_args",
+                "type": "object",
+                "additionalProperties": False,
             },
+            "on_invoke_tool": mock.ANY,
+            "strict_json_schema": True,
+            "is_enabled": True,
+        }
+
+        if parse_version(OPENAI_AGENTS_VERSION) >= (0, 3, 3):
+            available_tool.update(
+                {"tool_input_guardrails": None, "tool_output_guardrails": None}
+            )
+
+        if parse_version(OPENAI_AGENTS_VERSION) >= (
+            0,
+            8,
+        ):
+            available_tool["needs_approval"] = False
+        if parse_version(OPENAI_AGENTS_VERSION) >= (
+            0,
+            9,
+            0,
+        ):
+            available_tool.update(
+                {
+                    "timeout_seconds": None,
+                    "timeout_behavior": "error_as_result",
+                    "timeout_error_function": None,
+                }
+            )
+
+        assert transaction["transaction"] == "test_agent workflow"
+        assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
+
+        assert agent_span["description"] == "invoke_agent test_agent"
+        assert agent_span["origin"] == "auto.ai.openai_agents"
+        assert agent_span["data"]["gen_ai.agent.name"] == "test_agent"
+        assert agent_span["data"]["gen_ai.operation.name"] == "invoke_agent"
+
+        agent_span_available_tool = json.loads(
+            agent_span["data"]["gen_ai.request.available_tools"]
+        )[0]
+        assert all(agent_span_available_tool[k] == v for k, v in available_tool.items())
+
+        assert agent_span["data"]["gen_ai.request.max_tokens"] == 100
+        assert agent_span["data"]["gen_ai.request.model"] == "gpt-4"
+        assert agent_span["data"]["gen_ai.request.temperature"] == 0.7
+        assert agent_span["data"]["gen_ai.request.top_p"] == 1.0
+        assert agent_span["data"]["gen_ai.system"] == "openai"
+
+        assert ai_client_span1["description"] == "chat gpt-4"
+        assert ai_client_span1["data"]["gen_ai.operation.name"] == "chat"
+        assert ai_client_span1["data"]["gen_ai.system"] == "openai"
+        assert ai_client_span1["data"]["gen_ai.agent.name"] == "test_agent"
+
+        ai_client_span1_available_tool = json.loads(
+            ai_client_span1["data"]["gen_ai.request.available_tools"]
+        )[0]
+        assert all(
+            ai_client_span1_available_tool[k] == v for k, v in available_tool.items()
+        )
+
+        assert ai_client_span1["data"]["gen_ai.request.max_tokens"] == 100
+        assert ai_client_span1["data"]["gen_ai.request.messages"] == safe_serialize(
+            [
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "text", "text": "Please use the simple test tool"}
+                    ],
+                },
+            ]
+        )
+        assert ai_client_span1["data"]["gen_ai.request.model"] == "gpt-4"
+        assert ai_client_span1["data"]["gen_ai.request.temperature"] == 0.7
+        assert ai_client_span1["data"]["gen_ai.request.top_p"] == 1.0
+        assert ai_client_span1["data"]["gen_ai.usage.input_tokens"] == 10
+        assert ai_client_span1["data"]["gen_ai.usage.input_tokens.cached"] == 0
+        assert ai_client_span1["data"]["gen_ai.usage.output_tokens"] == 5
+        assert ai_client_span1["data"]["gen_ai.usage.output_tokens.reasoning"] == 0
+        assert ai_client_span1["data"]["gen_ai.usage.total_tokens"] == 15
+
+        tool_call = {
+            "arguments": '{"message": "hello"}',
+            "call_id": "call_123",
+            "name": "simple_test_tool",
+            "type": "function_call",
+            "id": "call_123",
+            "status": None,
+        }
+
+        if OPENAI_VERSION >= (2, 25, 0):
+            tool_call["namespace"] = None
+
+        assert json.loads(ai_client_span1["data"]["gen_ai.response.tool_calls"]) == [
+            tool_call
         ]
-    )
-    assert ai_client_span2["data"]["gen_ai.request.model"] == "gpt-4"
-    assert ai_client_span2["data"]["gen_ai.request.temperature"] == 0.7
-    assert ai_client_span2["data"]["gen_ai.request.top_p"] == 1.0
-    assert (
-        ai_client_span2["data"]["gen_ai.response.text"]
-        == "Task completed using the tool"
-    )
-    assert ai_client_span2["data"]["gen_ai.system"] == "openai"
-    assert ai_client_span2["data"]["gen_ai.usage.input_tokens.cached"] == 0
-    assert ai_client_span2["data"]["gen_ai.usage.input_tokens"] == 15
-    assert ai_client_span2["data"]["gen_ai.usage.output_tokens.reasoning"] == 0
-    assert ai_client_span2["data"]["gen_ai.usage.output_tokens"] == 10
-    assert ai_client_span2["data"]["gen_ai.usage.total_tokens"] == 25
+
+        assert tool_span["description"] == "execute_tool simple_test_tool"
+        assert tool_span["data"]["gen_ai.agent.name"] == "test_agent"
+        assert tool_span["data"]["gen_ai.operation.name"] == "execute_tool"
+
+        tool_span_available_tool = json.loads(
+            tool_span["data"]["gen_ai.request.available_tools"]
+        )[0]
+        assert all(tool_span_available_tool[k] == v for k, v in available_tool.items())
+
+        assert tool_span["data"]["gen_ai.request.max_tokens"] == 100
+        assert tool_span["data"]["gen_ai.request.model"] == "gpt-4"
+        assert tool_span["data"]["gen_ai.request.temperature"] == 0.7
+        assert tool_span["data"]["gen_ai.request.top_p"] == 1.0
+        assert tool_span["data"]["gen_ai.system"] == "openai"
+        assert tool_span["data"]["gen_ai.tool.description"] == "A simple tool"
+        assert tool_span["data"]["gen_ai.tool.input"] == '{"message": "hello"}'
+        assert tool_span["data"]["gen_ai.tool.name"] == "simple_test_tool"
+        assert tool_span["data"]["gen_ai.tool.output"] == "Tool executed with: hello"
+        assert ai_client_span2["description"] == "chat gpt-4"
+        assert ai_client_span2["data"]["gen_ai.agent.name"] == "test_agent"
+        assert ai_client_span2["data"]["gen_ai.operation.name"] == "chat"
+
+        ai_client_span2_available_tool = json.loads(
+            ai_client_span2["data"]["gen_ai.request.available_tools"]
+        )[0]
+        assert all(
+            ai_client_span2_available_tool[k] == v for k, v in available_tool.items()
+        )
+
+        assert ai_client_span2["data"]["gen_ai.request.max_tokens"] == 100
+        assert ai_client_span2["data"]["gen_ai.request.messages"] == safe_serialize(
+            [
+                {
+                    "role": "tool",
+                    "content": [
+                        {
+                            "call_id": "call_123",
+                            "output": "Tool executed with: hello",
+                            "type": "function_call_output",
+                        }
+                    ],
+                },
+            ]
+        )
+        assert ai_client_span2["data"]["gen_ai.request.model"] == "gpt-4"
+        assert ai_client_span2["data"]["gen_ai.request.temperature"] == 0.7
+        assert ai_client_span2["data"]["gen_ai.request.top_p"] == 1.0
+        assert (
+            ai_client_span2["data"]["gen_ai.response.text"]
+            == "Task completed using the tool"
+        )
+        assert ai_client_span2["data"]["gen_ai.system"] == "openai"
+        assert ai_client_span2["data"]["gen_ai.usage.input_tokens.cached"] == 0
+        assert ai_client_span2["data"]["gen_ai.usage.input_tokens"] == 15
+        assert ai_client_span2["data"]["gen_ai.usage.output_tokens.reasoning"] == 0
+        assert ai_client_span2["data"]["gen_ai.usage.output_tokens"] == 10
+        assert ai_client_span2["data"]["gen_ai.usage.total_tokens"] == 25
 
 
 @pytest.mark.asyncio

From b7811723c19ecd7b916614ac63c8451f3b3aeef7 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Wed, 13 May 2026 07:54:11 +0200
Subject: [PATCH 05/10] restore langgraph test

---
 .../integrations/langgraph/test_langgraph.py  | 168 +++++++++++++-----
 1 file changed, 127 insertions(+), 41 deletions(-)

diff --git a/tests/integrations/langgraph/test_langgraph.py b/tests/integrations/langgraph/test_langgraph.py
index c7032b009b..80a20fb617 100644
--- a/tests/integrations/langgraph/test_langgraph.py
+++ b/tests/integrations/langgraph/test_langgraph.py
@@ -242,6 +242,7 @@ def original_compile(self, *args, **kwargs):
         assert "calculator" in tools_data
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.parametrize(
     "send_default_pii, include_prompts",
     [
@@ -251,14 +252,21 @@ def original_compile(self, *args, **kwargs):
         (False, False),
     ],
 )
-def test_pregel_invoke(sentry_init, capture_events, send_default_pii, include_prompts):
+def test_pregel_invoke(
+    sentry_init,
+    capture_events,
+    capture_items,
+    send_default_pii,
+    include_prompts,
+    stream_gen_ai_spans,
+):
     """Test Pregel.invoke() wrapper creates proper invoke_agent span."""
     sentry_init(
         integrations=[LanggraphIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
-    events = capture_events()
 
     test_state = {
         "messages": [
@@ -289,57 +297,135 @@ def original_invoke(self, *args, **kwargs):
         ]
         return {"messages": new_messages}
 
-    with start_transaction():
-        wrapped_invoke = _wrap_pregel_invoke(original_invoke)
-        result = wrapped_invoke(pregel, test_state)
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
 
-    assert result is not None
+        with start_transaction():
+            wrapped_invoke = _wrap_pregel_invoke(original_invoke)
+            result = wrapped_invoke(pregel, test_state)
 
-    tx = events[0]
-    assert tx["type"] == "transaction"
+        assert result is not None
 
-    invoke_spans = [
-        span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT
-    ]
-    assert len(invoke_spans) == 1
+        spans = [item.payload for item in items if item.type == "span"]
+        invoke_spans = [
+            span
+            for span in spans
+            if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
+        ]
 
-    invoke_span = invoke_spans[0]
-    assert invoke_span["description"] == "invoke_agent test_graph"
-    assert invoke_span["origin"] == "auto.ai.langgraph"
-    assert invoke_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "invoke_agent"
-    assert invoke_span["data"][SPANDATA.GEN_AI_PIPELINE_NAME] == "test_graph"
-    assert invoke_span["data"][SPANDATA.GEN_AI_AGENT_NAME] == "test_graph"
-
-    if send_default_pii and include_prompts:
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["data"]
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT in invoke_span["data"]
+        assert len(invoke_spans) == 1
 
-        request_messages = invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        invoke_span = invoke_spans[0]
 
-        if isinstance(request_messages, str):
-            import json
+        assert invoke_span["name"] == "invoke_agent test_graph"
+        assert invoke_span["attributes"]["sentry.origin"] == "auto.ai.langgraph"
+        assert (
+            invoke_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "invoke_agent"
+        )
+        assert invoke_span["attributes"][SPANDATA.GEN_AI_PIPELINE_NAME] == "test_graph"
+        assert invoke_span["attributes"][SPANDATA.GEN_AI_AGENT_NAME] == "test_graph"
 
-            request_messages = json.loads(request_messages)
-        assert len(request_messages) == 1
-        assert request_messages[0]["content"] == "Of course! How can I assist you?"
+        if send_default_pii and include_prompts:
+            assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["attributes"]
+            assert SPANDATA.GEN_AI_RESPONSE_TEXT in invoke_span["attributes"]
 
-        response_text = invoke_span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
-        assert response_text == expected_assistant_response
+            request_messages = invoke_span["attributes"][
+                SPANDATA.GEN_AI_REQUEST_MESSAGES
+            ]
 
-        assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in invoke_span["data"]
-        tool_calls_data = invoke_span["data"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS]
-        if isinstance(tool_calls_data, str):
-            import json
+            if isinstance(request_messages, str):
+                import json
+
+                request_messages = json.loads(request_messages)
+            assert len(request_messages) == 1
+            assert request_messages[0]["content"] == "Hello, can you help me?"
+            assert request_messages[1]["content"] == "Of course! How can I assist you?"
+
+            response_text = invoke_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+            assert response_text == expected_assistant_response
+
+            assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in invoke_span["attributes"]
+            tool_calls_data = invoke_span["attributes"][
+                SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS
+            ]
+
+            if isinstance(tool_calls_data, str):
+                import json
 
-            tool_calls_data = json.loads(tool_calls_data)
+                tool_calls_data = json.loads(tool_calls_data)
 
-        assert len(tool_calls_data) == 1
-        assert tool_calls_data[0]["id"] == "call_test_123"
-        assert tool_calls_data[0]["function"]["name"] == "search_tool"
+            assert len(tool_calls_data) == 1
+            assert tool_calls_data[0]["id"] == "call_test_123"
+            assert tool_calls_data[0]["function"]["name"] == "search_tool"
+        else:
+            assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in invoke_span.get(
+                "attributes", {}
+            )
+            assert SPANDATA.GEN_AI_RESPONSE_TEXT not in invoke_span.get(
+                "attributes", {}
+            )
+            assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in invoke_span.get(
+                "attributes", {}
+            )
     else:
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in invoke_span.get("data", {})
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in invoke_span.get("data", {})
-        assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in invoke_span.get("data", {})
+        events = capture_events()
+
+        with start_transaction():
+            wrapped_invoke = _wrap_pregel_invoke(original_invoke)
+            result = wrapped_invoke(pregel, test_state)
+
+        assert result is not None
+
+        tx = events[0]
+        assert tx["type"] == "transaction"
+
+        invoke_spans = [
+            span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT
+        ]
+
+        assert len(invoke_spans) == 1
+
+        invoke_span = invoke_spans[0]
+
+        assert invoke_span["description"] == "invoke_agent test_graph"
+        assert invoke_span["origin"] == "auto.ai.langgraph"
+        assert invoke_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "invoke_agent"
+        assert invoke_span["data"][SPANDATA.GEN_AI_PIPELINE_NAME] == "test_graph"
+        assert invoke_span["data"][SPANDATA.GEN_AI_AGENT_NAME] == "test_graph"
+
+        if send_default_pii and include_prompts:
+            assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["data"]
+            assert SPANDATA.GEN_AI_RESPONSE_TEXT in invoke_span["data"]
+
+            request_messages = invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+
+            if isinstance(request_messages, str):
+                import json
+
+                request_messages = json.loads(request_messages)
+            assert len(request_messages) == 1
+            assert request_messages[0]["content"] == "Of course! How can I assist you?"
+
+            response_text = invoke_span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+            assert response_text == expected_assistant_response
+
+            assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in invoke_span["data"]
+            tool_calls_data = invoke_span["data"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS]
+
+            if isinstance(tool_calls_data, str):
+                import json
+
+                tool_calls_data = json.loads(tool_calls_data)
+
+            assert len(tool_calls_data) == 1
+            assert tool_calls_data[0]["id"] == "call_test_123"
+            assert tool_calls_data[0]["function"]["name"] == "search_tool"
+        else:
+            assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in invoke_span.get("data", {})
+            assert SPANDATA.GEN_AI_RESPONSE_TEXT not in invoke_span.get("data", {})
+            assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in invoke_span.get(
+                "data", {}
+            )
 
 
 @pytest.mark.parametrize("stream_gen_ai_spans", [True, False])

From b618cc8be08ea91f7c01cf21d36c52d490da663a Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Wed, 13 May 2026 08:13:01 +0200
Subject: [PATCH 06/10] update test

---
 tests/integrations/langgraph/test_langgraph.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integrations/langgraph/test_langgraph.py b/tests/integrations/langgraph/test_langgraph.py
index 80a20fb617..f308127276 100644
--- a/tests/integrations/langgraph/test_langgraph.py
+++ b/tests/integrations/langgraph/test_langgraph.py
@@ -337,7 +337,7 @@ def original_invoke(self, *args, **kwargs):
                 import json
 
                 request_messages = json.loads(request_messages)
-            assert len(request_messages) == 1
+            assert len(request_messages) == 2
             assert request_messages[0]["content"] == "Hello, can you help me?"
             assert request_messages[1]["content"] == "Of course! How can I assist you?"
 

From 913ec9af4eefb8296bba602e65ade30be9efa9b1 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Wed, 13 May 2026 10:29:43 +0200
Subject: [PATCH 07/10] litellm test

---
 tests/integrations/litellm/test_litellm.py | 103 ++++++---------------
 1 file changed, 29 insertions(+), 74 deletions(-)

diff --git a/tests/integrations/litellm/test_litellm.py b/tests/integrations/litellm/test_litellm.py
index c04619d838..b463387daf 100644
--- a/tests/integrations/litellm/test_litellm.py
+++ b/tests/integrations/litellm/test_litellm.py
@@ -2325,20 +2325,14 @@ def test_integration_setup(sentry_init):
     assert _failure_callback in (litellm.failure_callback or [])
 
 
-@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
-def test_litellm_message_truncation(
-    sentry_init,
-    capture_events,
-    capture_items,
-    stream_gen_ai_spans,
-):
+def test_litellm_message_truncation(sentry_init, capture_events):
     """Test that large messages are truncated properly in LiteLLM integration."""
     sentry_init(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        stream_gen_ai_spans=stream_gen_ai_spans,
     )
+    events = capture_events()
 
     large_content = (
         "This is a very long message that will exceed our size limits. " * 1000
@@ -2352,78 +2346,39 @@ def test_litellm_message_truncation(
     ]
     mock_response = MockCompletionResponse()
 
-    if stream_gen_ai_spans:
-        items = capture_items("transaction", "span")
-
-        with start_transaction(name="litellm test"):
-            kwargs = {
-                "model": "gpt-3.5-turbo",
-                "messages": messages,
-            }
-
-            _input_callback(kwargs)
-            _success_callback(
-                kwargs,
-                mock_response,
-                datetime.now(),
-                datetime.now(),
-            )
-
-        spans = [item.payload for item in items if item.type == "span"]
-        chat_spans = [
-            span
-            for span in spans
-            if span["attributes"].get("sentry.op") == OP.GEN_AI_CHAT
-        ]
-        assert len(chat_spans) > 0
-
-        chat_span = chat_spans[0]
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["attributes"]
-
-        messages_data = chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-        assert isinstance(messages_data, str)
-
-        parsed_messages = json.loads(messages_data)
-        assert isinstance(parsed_messages, list)
-        assert len(parsed_messages) == 1
-        assert "small message 5" in str(parsed_messages[0])
-        tx = next(item.payload for item in items if item.type == "transaction")
-    else:
-        events = capture_events()
-
-        with start_transaction(name="litellm test"):
-            kwargs = {
-                "model": "gpt-3.5-turbo",
-                "messages": messages,
-            }
+    with start_transaction(name="litellm test"):
+        kwargs = {
+            "model": "gpt-3.5-turbo",
+            "messages": messages,
+        }
 
-            _input_callback(kwargs)
-            _success_callback(
-                kwargs,
-                mock_response,
-                datetime.now(),
-                datetime.now(),
-            )
+        _input_callback(kwargs)
+        _success_callback(
+            kwargs,
+            mock_response,
+            datetime.now(),
+            datetime.now(),
+        )
 
-        assert len(events) > 0
-        tx = events[0]
-        assert tx["type"] == "transaction"
+    assert len(events) > 0
+    tx = events[0]
+    assert tx["type"] == "transaction"
 
-        chat_spans = [
-            span for span in tx.get("spans", []) if span.get("op") == OP.GEN_AI_CHAT
-        ]
-        assert len(chat_spans) > 0
+    chat_spans = [
+        span for span in tx.get("spans", []) if span.get("op") == OP.GEN_AI_CHAT
+    ]
+    assert len(chat_spans) > 0
 
-        chat_span = chat_spans[0]
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["data"]
+    chat_span = chat_spans[0]
+    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["data"]
 
-        messages_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-        assert isinstance(messages_data, str)
+    messages_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+    assert isinstance(messages_data, str)
 
-        parsed_messages = json.loads(messages_data)
-        assert isinstance(parsed_messages, list)
-        assert len(parsed_messages) == 1
-        assert "small message 5" in str(parsed_messages[0])
+    parsed_messages = json.loads(messages_data)
+    assert isinstance(parsed_messages, list)
+    assert len(parsed_messages) == 1
+    assert "small message 5" in str(parsed_messages[0])
     assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5
 
 

From f2bdff5cc6967b30cf08796fbd7eddd92c7a2746 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Wed, 13 May 2026 10:31:11 +0200
Subject: [PATCH 08/10] remove whitespace changes

---
 tests/integrations/litellm/test_litellm.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/tests/integrations/litellm/test_litellm.py b/tests/integrations/litellm/test_litellm.py
index b463387daf..b76980ddd3 100644
--- a/tests/integrations/litellm/test_litellm.py
+++ b/tests/integrations/litellm/test_litellm.py
@@ -2920,7 +2920,6 @@ def test_binary_content_encoding_uri_type(
         )
         assert len(chat_spans) == 1
         span = chat_spans[0]
-
         messages_data = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
     else:
         events = capture_events()
@@ -2948,7 +2947,6 @@ def test_binary_content_encoding_uri_type(
 
         assert len(chat_spans) == 1
         span = chat_spans[0]
-
         messages_data = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
 
     uri_item = next(
@@ -3036,10 +3034,8 @@ async def test_async_binary_content_encoding_uri_type(
             if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
             and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
         )
-
         assert len(chat_spans) == 1
         span = chat_spans[0]
-
         messages_data = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
     else:
         events = capture_events()
@@ -3065,10 +3061,8 @@ async def test_async_binary_content_encoding_uri_type(
             for x in event["spans"]
             if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm"
         )
-
         assert len(chat_spans) == 1
         span = chat_spans[0]
-
         messages_data = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
 
     uri_item = next(

From ec26b90f87b22ad048a0024f144143fa5b4cf385 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Wed, 13 May 2026 10:31:46 +0200
Subject: [PATCH 09/10] one more whitespace removal

---
 tests/integrations/litellm/test_litellm.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/integrations/litellm/test_litellm.py b/tests/integrations/litellm/test_litellm.py
index b76980ddd3..aab289b28f 100644
--- a/tests/integrations/litellm/test_litellm.py
+++ b/tests/integrations/litellm/test_litellm.py
@@ -2944,7 +2944,6 @@ def test_binary_content_encoding_uri_type(
             for x in event["spans"]
             if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm"
         )
-
         assert len(chat_spans) == 1
         span = chat_spans[0]
         messages_data = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])

From 4ec3ff7e96f6ea4c2c2764b6a0d91eed4b497d08 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Wed, 13 May 2026 10:48:43 +0200
Subject: [PATCH 10/10] remove truncation per integration instead

---
 sentry_sdk/ai/utils.py                        |  8 ----
 sentry_sdk/integrations/anthropic.py          |  8 +++-
 sentry_sdk/integrations/google_genai/utils.py |  7 ++-
 sentry_sdk/integrations/langchain.py          | 38 +++++++++++----
 sentry_sdk/integrations/langgraph.py          | 20 ++++++--
 sentry_sdk/integrations/litellm.py            | 14 ++++--
 sentry_sdk/integrations/openai.py             | 46 +++++++++++++++----
 .../openai_agents/spans/invoke_agent.py       |  7 ++-
 .../integrations/openai_agents/utils.py       |  7 ++-
 .../pydantic_ai/spans/ai_client.py            |  7 ++-
 .../pydantic_ai/spans/invoke_agent.py         |  7 ++-
 11 files changed, 127 insertions(+), 42 deletions(-)

diff --git a/sentry_sdk/ai/utils.py b/sentry_sdk/ai/utils.py
index fb9edcd335..8efa077ce5 100644
--- a/sentry_sdk/ai/utils.py
+++ b/sentry_sdk/ai/utils.py
@@ -741,10 +741,6 @@ def truncate_and_annotate_messages(
     scope: "Any",
     max_single_message_chars: int = MAX_SINGLE_MESSAGE_CONTENT_CHARS,
 ) -> "Optional[List[Dict[str, Any]]]":
-    client = sentry_sdk.get_client()
-    if client.options.get("stream_gen_ai_spans", False):
-        return messages
-
     if not messages:
         return None
 
@@ -765,10 +761,6 @@ def truncate_and_annotate_embedding_inputs(
     scope: "Any",
     max_bytes: int = MAX_GEN_AI_MESSAGE_BYTES,
 ) -> "Optional[List[Dict[str, Any]]]":
-    client = sentry_sdk.get_client()
-    if client.options.get("stream_gen_ai_spans", False):
-        return messages
-
     if not messages:
         return None
 
diff --git a/sentry_sdk/integrations/anthropic.py b/sentry_sdk/integrations/anthropic.py
index efc2f70ffd..ca9e60e59d 100644
--- a/sentry_sdk/integrations/anthropic.py
+++ b/sentry_sdk/integrations/anthropic.py
@@ -438,9 +438,13 @@ def _set_common_input_data(
                 normalized_messages.append(transformed_message)
 
         role_normalized_messages = normalize_message_roles(normalized_messages)
+
+        client = sentry_sdk.get_client()
         scope = sentry_sdk.get_current_scope()
-        messages_data = truncate_and_annotate_messages(
-            role_normalized_messages, span, scope
+        messages_data = (
+            role_normalized_messages
+            if client.options.get("stream_gen_ai_spans", False)
+            else truncate_and_annotate_messages(role_normalized_messages, span, scope)
         )
         if messages_data is not None:
             set_data_normalized(
diff --git a/sentry_sdk/integrations/google_genai/utils.py b/sentry_sdk/integrations/google_genai/utils.py
index 25763ebe07..55a5b80233 100644
--- a/sentry_sdk/integrations/google_genai/utils.py
+++ b/sentry_sdk/integrations/google_genai/utils.py
@@ -892,9 +892,12 @@ def set_span_data_for_request(
 
         if messages:
             normalized_messages = normalize_message_roles(messages)
+            client = sentry_sdk.get_client()
             scope = sentry_sdk.get_current_scope()
-            messages_data = truncate_and_annotate_messages(
-                normalized_messages, span, scope
+            messages_data = (
+                normalized_messages
+                if client.options.get("stream_gen_ai_spans", False)
+                else truncate_and_annotate_messages(normalized_messages, span, scope)
             )
             if messages_data is not None:
                 set_data_normalized(
diff --git a/sentry_sdk/integrations/langchain.py b/sentry_sdk/integrations/langchain.py
index 8acf215bfe..4f5a1b4939 100644
--- a/sentry_sdk/integrations/langchain.py
+++ b/sentry_sdk/integrations/langchain.py
@@ -374,9 +374,15 @@ def on_llm_start(
                     }
                     for prompt in prompts
                 ]
+
+                client = sentry_sdk.get_client()
                 scope = sentry_sdk.get_current_scope()
-                messages_data = truncate_and_annotate_messages(
-                    normalized_messages, span, scope
+                messages_data = (
+                    normalized_messages
+                    if client.options.get("stream_gen_ai_spans", False)
+                    else truncate_and_annotate_messages(
+                        normalized_messages, span, scope
+                    )
                 )
                 if messages_data is not None:
                     set_data_normalized(
@@ -463,9 +469,15 @@ def on_chat_model_start(
                             self._normalize_langchain_message(message)
                         )
                 normalized_messages = normalize_message_roles(normalized_messages)
+
+                client = sentry_sdk.get_client()
                 scope = sentry_sdk.get_current_scope()
-                messages_data = truncate_and_annotate_messages(
-                    normalized_messages, span, scope
+                messages_data = (
+                    normalized_messages
+                    if client.options.get("stream_gen_ai_spans", False)
+                    else truncate_and_annotate_messages(
+                        normalized_messages, span, scope
+                    )
                 )
                 if messages_data is not None:
                     set_data_normalized(
@@ -992,9 +1004,15 @@ def new_invoke(self: "Any", *args: "Any", **kwargs: "Any") -> "Any":
                 and integration.include_prompts
             ):
                 normalized_messages = normalize_message_roles([input])
+
+                client = sentry_sdk.get_client()
                 scope = sentry_sdk.get_current_scope()
-                messages_data = truncate_and_annotate_messages(
-                    normalized_messages, span, scope
+                messages_data = (
+                    normalized_messages
+                    if client.options.get("stream_gen_ai_spans", False)
+                    else truncate_and_annotate_messages(
+                        normalized_messages, span, scope
+                    )
                 )
                 if messages_data is not None:
                     set_data_normalized(
@@ -1049,9 +1067,13 @@ def new_stream(self: "Any", *args: "Any", **kwargs: "Any") -> "Any":
             and integration.include_prompts
         ):
             normalized_messages = normalize_message_roles([input])
+
+            client = sentry_sdk.get_client()
             scope = sentry_sdk.get_current_scope()
-            messages_data = truncate_and_annotate_messages(
-                normalized_messages, span, scope
+            messages_data = (
+                normalized_messages
+                if client.options.get("stream_gen_ai_spans", False)
+                else truncate_and_annotate_messages(normalized_messages, span, scope)
             )
             if messages_data is not None:
                 set_data_normalized(
diff --git a/sentry_sdk/integrations/langgraph.py b/sentry_sdk/integrations/langgraph.py
index e5ea12b90a..1454d151f4 100644
--- a/sentry_sdk/integrations/langgraph.py
+++ b/sentry_sdk/integrations/langgraph.py
@@ -181,9 +181,15 @@ def new_invoke(self: "Any", *args: "Any", **kwargs: "Any") -> "Any":
                 input_messages = _parse_langgraph_messages(args[0])
                 if input_messages:
                     normalized_input_messages = normalize_message_roles(input_messages)
+
+                    client = sentry_sdk.get_client()
                     scope = sentry_sdk.get_current_scope()
-                    messages_data = truncate_and_annotate_messages(
-                        normalized_input_messages, span, scope
+                    messages_data = (
+                        normalized_input_messages
+                        if client.options.get("stream_gen_ai_spans", False)
+                        else truncate_and_annotate_messages(
+                            normalized_input_messages, span, scope
+                        )
                     )
                     if messages_data is not None:
                         set_data_normalized(
@@ -234,9 +240,15 @@ async def new_ainvoke(self: "Any", *args: "Any", **kwargs: "Any") -> "Any":
                 input_messages = _parse_langgraph_messages(args[0])
                 if input_messages:
                     normalized_input_messages = normalize_message_roles(input_messages)
+
+                    client = sentry_sdk.get_client()
                     scope = sentry_sdk.get_current_scope()
-                    messages_data = truncate_and_annotate_messages(
-                        normalized_input_messages, span, scope
+                    messages_data = (
+                        normalized_input_messages
+                        if client.options.get("stream_gen_ai_spans", False)
+                        else truncate_and_annotate_messages(
+                            normalized_input_messages, span, scope
+                        )
                     )
                     if messages_data is not None:
                         set_data_normalized(
diff --git a/sentry_sdk/integrations/litellm.py b/sentry_sdk/integrations/litellm.py
index 3cff0fbc23..9561bd61f3 100644
--- a/sentry_sdk/integrations/litellm.py
+++ b/sentry_sdk/integrations/litellm.py
@@ -119,8 +119,11 @@ def _input_callback(kwargs: "Dict[str, Any]") -> None:
                     if isinstance(embedding_input, list)
                     else [embedding_input]
                 )
-                messages_data = truncate_and_annotate_embedding_inputs(
-                    input_list, span, scope
+                client = sentry_sdk.get_client()
+                messages_data = (
+                    input_list
+                    if client.options.get("stream_gen_ai_spans", False)
+                    else truncate_and_annotate_embedding_inputs(input_list, span, scope)
                 )
                 if messages_data is not None:
                     set_data_normalized(
@@ -133,9 +136,14 @@ def _input_callback(kwargs: "Dict[str, Any]") -> None:
             # For chat, look for the 'messages' parameter
             messages = kwargs.get("messages", [])
             if messages:
+                client = sentry_sdk.get_client()
                 scope = sentry_sdk.get_current_scope()
                 messages = _convert_message_parts(messages)
-                messages_data = truncate_and_annotate_messages(messages, span, scope)
+                messages_data = (
+                    messages
+                    if client.options.get("stream_gen_ai_spans", False)
+                    else truncate_and_annotate_messages(messages, span, scope)
+                )
                 if messages_data is not None:
                     set_data_normalized(
                         span,
diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py
index b3919d1a9d..7bb328741e 100644
--- a/sentry_sdk/integrations/openai.py
+++ b/sentry_sdk/integrations/openai.py
@@ -398,8 +398,13 @@ def _set_responses_api_input_data(
 
     if isinstance(messages, str):
         normalized_messages = normalize_message_roles([messages])  # type: ignore
+        client = sentry_sdk.get_client()
         scope = sentry_sdk.get_current_scope()
-        messages_data = truncate_and_annotate_messages(normalized_messages, span, scope)
+        messages_data = (
+            normalized_messages
+            if client.options.get("stream_gen_ai_spans", False)
+            else truncate_and_annotate_messages(normalized_messages, span, scope)
+        )
         if messages_data is not None:
             set_data_normalized(
                 span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data, unpack=False
@@ -413,8 +418,13 @@ def _set_responses_api_input_data(
     ]
     if len(non_system_messages) > 0:
         normalized_messages = normalize_message_roles(non_system_messages)
+        client = sentry_sdk.get_client()
         scope = sentry_sdk.get_current_scope()
-        messages_data = truncate_and_annotate_messages(normalized_messages, span, scope)
+        messages_data = (
+            normalized_messages
+            if client.options.get("stream_gen_ai_spans", False)
+            else truncate_and_annotate_messages(normalized_messages, span, scope)
+        )
         if messages_data is not None:
             set_data_normalized(
                 span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data, unpack=False
@@ -472,8 +482,13 @@ def _set_completions_api_input_data(
 
     if isinstance(messages, str):
         normalized_messages = normalize_message_roles([messages])  # type: ignore
+        client = sentry_sdk.get_client()
         scope = sentry_sdk.get_current_scope()
-        messages_data = truncate_and_annotate_messages(normalized_messages, span, scope)
+        messages_data = (
+            normalized_messages
+            if client.options.get("stream_gen_ai_spans", False)
+            else truncate_and_annotate_messages(normalized_messages, span, scope)
+        )
         if messages_data is not None:
             set_data_normalized(
                 span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data, unpack=False
@@ -503,8 +518,13 @@ def _set_completions_api_input_data(
     ]
     if len(non_system_messages) > 0:
         normalized_messages = normalize_message_roles(non_system_messages)
+        client = sentry_sdk.get_client()
         scope = sentry_sdk.get_current_scope()
-        messages_data = truncate_and_annotate_messages(normalized_messages, span, scope)
+        messages_data = (
+            normalized_messages
+            if client.options.get("stream_gen_ai_spans", False)
+            else truncate_and_annotate_messages(normalized_messages, span, scope)
+        )
         if messages_data is not None:
             set_data_normalized(
                 span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data, unpack=False
@@ -539,9 +559,14 @@ def _set_embeddings_input_data(
         set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, "embeddings")
 
         normalized_messages = normalize_message_roles([messages])  # type: ignore
+        client = sentry_sdk.get_client()
         scope = sentry_sdk.get_current_scope()
-        messages_data = truncate_and_annotate_embedding_inputs(
-            normalized_messages, span, scope
+        messages_data = (
+            normalized_messages
+            if client.options.get("stream_gen_ai_spans", False)
+            else truncate_and_annotate_embedding_inputs(
+                normalized_messages, span, scope
+            )
         )
         if messages_data is not None:
             set_data_normalized(
@@ -560,9 +585,14 @@ def _set_embeddings_input_data(
 
     if len(messages) > 0:
         normalized_messages = normalize_message_roles(messages)
+        client = sentry_sdk.get_client()
         scope = sentry_sdk.get_current_scope()
-        messages_data = truncate_and_annotate_embedding_inputs(
-            normalized_messages, span, scope
+        messages_data = (
+            normalized_messages
+            if client.options.get("stream_gen_ai_spans", False)
+            else truncate_and_annotate_embedding_inputs(
+                normalized_messages, span, scope
+            )
         )
         if messages_data is not None:
             set_data_normalized(
diff --git a/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py b/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py
index 27f9fdab25..2346189a96 100644
--- a/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py
+++ b/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py
@@ -63,9 +63,12 @@ def invoke_agent_span(
 
         if len(messages) > 0:
             normalized_messages = normalize_message_roles(messages)
+            client = sentry_sdk.get_client()
             scope = sentry_sdk.get_current_scope()
-            messages_data = truncate_and_annotate_messages(
-                normalized_messages, span, scope
+            messages_data = (
+                normalized_messages
+                if client.options.get("stream_gen_ai_spans", False)
+                else truncate_and_annotate_messages(normalized_messages, span, scope)
             )
             if messages_data is not None:
                 set_data_normalized(
diff --git a/sentry_sdk/integrations/openai_agents/utils.py b/sentry_sdk/integrations/openai_agents/utils.py
index ee504b3496..ea1faefde7 100644
--- a/sentry_sdk/integrations/openai_agents/utils.py
+++ b/sentry_sdk/integrations/openai_agents/utils.py
@@ -173,8 +173,13 @@ def _set_input_data(
                 )
 
     normalized_messages = normalize_message_roles(request_messages)
+    client = sentry_sdk.get_client()
     scope = sentry_sdk.get_current_scope()
-    messages_data = truncate_and_annotate_messages(normalized_messages, span, scope)
+    messages_data = (
+        normalized_messages
+        if client.options.get("stream_gen_ai_spans", False)
+        else truncate_and_annotate_messages(normalized_messages, span, scope)
+    )
     if messages_data is not None:
         set_data_normalized(
             span,
diff --git a/sentry_sdk/integrations/pydantic_ai/spans/ai_client.py b/sentry_sdk/integrations/pydantic_ai/spans/ai_client.py
index dc95acad45..e549083fed 100644
--- a/sentry_sdk/integrations/pydantic_ai/spans/ai_client.py
+++ b/sentry_sdk/integrations/pydantic_ai/spans/ai_client.py
@@ -182,9 +182,12 @@ def _set_input_messages(span: "sentry_sdk.tracing.Span", messages: "Any") -> Non
 
         if formatted_messages:
             normalized_messages = normalize_message_roles(formatted_messages)
+            client = sentry_sdk.get_client()
             scope = sentry_sdk.get_current_scope()
-            messages_data = truncate_and_annotate_messages(
-                normalized_messages, span, scope
+            messages_data = (
+                normalized_messages
+                if client.options.get("stream_gen_ai_spans", False)
+                else truncate_and_annotate_messages(normalized_messages, span, scope)
             )
             set_data_normalized(
                 span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data, unpack=False
diff --git a/sentry_sdk/integrations/pydantic_ai/spans/invoke_agent.py b/sentry_sdk/integrations/pydantic_ai/spans/invoke_agent.py
index ee08ca7036..c507315dcd 100644
--- a/sentry_sdk/integrations/pydantic_ai/spans/invoke_agent.py
+++ b/sentry_sdk/integrations/pydantic_ai/spans/invoke_agent.py
@@ -122,9 +122,12 @@ def invoke_agent_span(
 
         if messages:
             normalized_messages = normalize_message_roles(messages)
+            client = sentry_sdk.get_client()
             scope = sentry_sdk.get_current_scope()
-            messages_data = truncate_and_annotate_messages(
-                normalized_messages, span, scope
+            messages_data = (
+                normalized_messages
+                if client.options.get("stream_gen_ai_spans", False)
+                else truncate_and_annotate_messages(normalized_messages, span, scope)
             )
             set_data_normalized(
                 span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data, unpack=False