From 45729228bbb876219d6715e6b734cbc1e94083ab Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Tue, 21 Apr 2026 09:17:15 +0200 Subject: [PATCH 1/2] ref(openai): Revert input truncation --- sentry_sdk/ai/utils.py | 93 +---- sentry_sdk/integrations/openai.py | 56 +-- tests/integrations/openai/test_openai.py | 45 --- tests/test_ai_monitoring.py | 475 ----------------------- 4 files changed, 17 insertions(+), 652 deletions(-) diff --git a/sentry_sdk/ai/utils.py b/sentry_sdk/ai/utils.py index cb22db0add..1d738d0dae 100644 --- a/sentry_sdk/ai/utils.py +++ b/sentry_sdk/ai/utils.py @@ -1,11 +1,10 @@ import inspect import json -from copy import deepcopy from typing import TYPE_CHECKING if TYPE_CHECKING: - from typing import Any, Callable, Dict, List, Optional, Tuple + from typing import Any, Callable, Dict, Tuple from sentry_sdk.tracing import Span @@ -14,10 +13,6 @@ from sentry_sdk.traces import StreamedSpan from sentry_sdk.tracing_utils import has_span_streaming_enabled -MAX_GEN_AI_MESSAGE_BYTES = 20_000 # 20KB -# Maximum characters when only a single message is left after bytes truncation -MAX_SINGLE_MESSAGE_CONTENT_CHARS = 10_000 - class GEN_AI_ALLOWED_MESSAGE_ROLES: SYSTEM = "system" @@ -180,92 +175,6 @@ def _truncate_single_message_content_if_present( return message -def _find_truncation_index(messages: "List[Dict[str, Any]]", max_bytes: int) -> int: - """ - Find the index of the first message that would exceed the max bytes limit. - Compute the individual message sizes, and return the index of the first message from the back - of the list that would exceed the max bytes limit. - """ - running_sum = 0 - for idx in range(len(messages) - 1, -1, -1): - size = len(json.dumps(messages[idx], separators=(",", ":")).encode("utf-8")) - running_sum += size - if running_sum > max_bytes: - return idx + 1 - - return 0 - - -def truncate_messages_by_size( - messages: "List[Dict[str, Any]]", - max_bytes: int = MAX_GEN_AI_MESSAGE_BYTES, - max_single_message_chars: int = MAX_SINGLE_MESSAGE_CONTENT_CHARS, -) -> "Tuple[List[Dict[str, Any]], int]": - """ - Returns a truncated messages list, consisting of - - the last message, with its content truncated to `max_single_message_chars` characters, - if the last message's size exceeds `max_bytes` bytes; otherwise, - - the maximum number of messages, starting from the end of the `messages` list, whose total - serialized size does not exceed `max_bytes` bytes. - - In the single message case, the serialized message size may exceed `max_bytes`, because - truncation is based only on character count in that case. - """ - serialized_json = json.dumps(messages, separators=(",", ":")) - current_size = len(serialized_json.encode("utf-8")) - - if current_size <= max_bytes: - return messages, 0 - - truncation_index = _find_truncation_index(messages, max_bytes) - if truncation_index < len(messages): - truncated_messages = messages[truncation_index:] - else: - truncation_index = len(messages) - 1 - truncated_messages = messages[-1:] - - if len(truncated_messages) == 1: - truncated_messages[0] = _truncate_single_message_content_if_present( - deepcopy(truncated_messages[0]), max_chars=max_single_message_chars - ) - - return truncated_messages, truncation_index - - -def truncate_and_annotate_messages( - messages: "Optional[List[Dict[str, Any]]]", - span: "Any", - scope: "Any", - max_single_message_chars: int = MAX_SINGLE_MESSAGE_CONTENT_CHARS, -) -> "Optional[List[Dict[str, Any]]]": - if not messages: - return None - - truncated_message = _truncate_single_message_content_if_present( - deepcopy(messages[-1]), max_chars=max_single_message_chars - ) - if len(messages) > 1: - scope._gen_ai_original_message_count[span.span_id] = len(messages) - - return [truncated_message] - - -def truncate_and_annotate_embedding_inputs( - messages: "Optional[List[Dict[str, Any]]]", - span: "Any", - scope: "Any", - max_bytes: int = MAX_GEN_AI_MESSAGE_BYTES, -) -> "Optional[List[Dict[str, Any]]]": - if not messages: - return None - - truncated_messages, removed_count = truncate_messages_by_size(messages, max_bytes) - if removed_count > 0: - scope._gen_ai_original_message_count[span.span_id] = len(messages) - - return truncated_messages - - def set_conversation_id(conversation_id: str) -> None: """ Set the conversation_id in the scope. diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py index 480db9132d..f92d8125df 100644 --- a/sentry_sdk/integrations/openai.py +++ b/sentry_sdk/integrations/openai.py @@ -10,8 +10,6 @@ from sentry_sdk.ai.utils import ( set_data_normalized, normalize_message_roles, - truncate_and_annotate_messages, - truncate_and_annotate_embedding_inputs, ) from sentry_sdk.ai._openai_completions_api import ( _is_system_instruction as _is_system_instruction_completions, @@ -397,12 +395,9 @@ def _set_responses_api_input_data( if isinstance(messages, str): normalized_messages = normalize_message_roles([messages]) # type: ignore - scope = sentry_sdk.get_current_scope() - messages_data = truncate_and_annotate_messages(normalized_messages, span, scope) - if messages_data is not None: - set_data_normalized( - span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data, unpack=False - ) + set_data_normalized( + span, SPANDATA.GEN_AI_REQUEST_MESSAGES, normalized_messages, unpack=False + ) set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, "responses") return @@ -412,12 +407,9 @@ def _set_responses_api_input_data( ] if len(non_system_messages) > 0: normalized_messages = normalize_message_roles(non_system_messages) - scope = sentry_sdk.get_current_scope() - messages_data = truncate_and_annotate_messages(normalized_messages, span, scope) - if messages_data is not None: - set_data_normalized( - span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data, unpack=False - ) + set_data_normalized( + span, SPANDATA.GEN_AI_REQUEST_MESSAGES, normalized_messages, unpack=False + ) set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, "responses") @@ -471,12 +463,9 @@ def _set_completions_api_input_data( if isinstance(messages, str): normalized_messages = normalize_message_roles([messages]) # type: ignore - scope = sentry_sdk.get_current_scope() - messages_data = truncate_and_annotate_messages(normalized_messages, span, scope) - if messages_data is not None: - set_data_normalized( - span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data, unpack=False - ) + set_data_normalized( + span, SPANDATA.GEN_AI_REQUEST_MESSAGES, normalized_messages, unpack=False + ) set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, "chat") return @@ -502,12 +491,9 @@ def _set_completions_api_input_data( ] if len(non_system_messages) > 0: normalized_messages = normalize_message_roles(non_system_messages) - scope = sentry_sdk.get_current_scope() - messages_data = truncate_and_annotate_messages(normalized_messages, span, scope) - if messages_data is not None: - set_data_normalized( - span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data, unpack=False - ) + set_data_normalized( + span, SPANDATA.GEN_AI_REQUEST_MESSAGES, normalized_messages, unpack=False + ) set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, "chat") @@ -538,14 +524,9 @@ def _set_embeddings_input_data( set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, "embeddings") normalized_messages = normalize_message_roles([messages]) # type: ignore - scope = sentry_sdk.get_current_scope() - messages_data = truncate_and_annotate_embedding_inputs( - normalized_messages, span, scope + set_data_normalized( + span, SPANDATA.GEN_AI_EMBEDDINGS_INPUT, normalized_messages, unpack=False ) - if messages_data is not None: - set_data_normalized( - span, SPANDATA.GEN_AI_EMBEDDINGS_INPUT, messages_data, unpack=False - ) return @@ -559,14 +540,9 @@ def _set_embeddings_input_data( if len(messages) > 0: normalized_messages = normalize_message_roles(messages) - scope = sentry_sdk.get_current_scope() - messages_data = truncate_and_annotate_embedding_inputs( - normalized_messages, span, scope + set_data_normalized( + span, SPANDATA.GEN_AI_EMBEDDINGS_INPUT, normalized_messages, unpack=False ) - if messages_data is not None: - set_data_normalized( - span, SPANDATA.GEN_AI_EMBEDDINGS_INPUT, messages_data, unpack=False - ) set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, "embeddings") diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py index 4b9d629d96..d1f07abb4f 100644 --- a/tests/integrations/openai/test_openai.py +++ b/tests/integrations/openai/test_openai.py @@ -3724,51 +3724,6 @@ def test_openai_message_role_mapping( assert stored_messages[0]["role"] == expected_role -def test_openai_message_truncation(sentry_init, capture_items): - """Test that large messages are truncated properly in OpenAI integration.""" - sentry_init( - integrations=[OpenAIIntegration(include_prompts=True)], - traces_sample_rate=1.0, - send_default_pii=True, - ) - items = capture_items("transaction", "span") - - client = OpenAI(api_key="z") - client.chat.completions._post = mock.Mock(return_value=EXAMPLE_CHAT_COMPLETION) - - large_content = ( - "This is a very long message that will exceed our size limits. " * 1000 - ) - large_messages = [ - {"role": "system", "content": "You are a helpful assistant."}, - {"role": "user", "content": large_content}, - {"role": "assistant", "content": large_content}, - {"role": "user", "content": large_content}, - ] - - with start_transaction(name="openai tx"): - client.chat.completions.create( - model="some-model", - messages=large_messages, - ) - - span = next(item.payload for item in items if item.type == "span") - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"] - - messages_data = span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - assert isinstance(messages_data, str) - - parsed_messages = json.loads(messages_data) - assert isinstance(parsed_messages, list) - assert len(parsed_messages) <= len(large_messages) - - (event,) = (item.payload for item in items if item.type == "transaction") - meta_path = event["_meta"] - span_meta = meta_path["spans"]["0"]["data"] - messages_meta = span_meta[SPANDATA.GEN_AI_REQUEST_MESSAGES] - assert "len" in messages_meta.get("", {}) - - # noinspection PyTypeChecker def test_streaming_chat_completion_ttft( sentry_init, capture_items, get_model_response, server_side_event_chunks diff --git a/tests/test_ai_monitoring.py b/tests/test_ai_monitoring.py index a110ee9ad6..720725dd6b 100644 --- a/tests/test_ai_monitoring.py +++ b/tests/test_ai_monitoring.py @@ -1,19 +1,10 @@ import pytest import sentry_sdk -from sentry_sdk._types import ( - AnnotatedValue, -) from sentry_sdk.ai.monitoring import ai_track from sentry_sdk.ai.utils import ( - MAX_GEN_AI_MESSAGE_BYTES, - MAX_SINGLE_MESSAGE_CONTENT_CHARS, - truncate_and_annotate_messages, - truncate_messages_by_size, - _find_truncation_index, parse_data_uri, ) -from sentry_sdk.utils import safe_serialize def test_ai_track(sentry_init, capture_events): @@ -174,472 +165,6 @@ async def async_tool(**kwargs): assert span["op"] == "custom.async.operation" -@pytest.fixture -def sample_messages(): - """Sample messages similar to what gen_ai integrations would use""" - return [ - {"role": "system", "content": "You are a helpful assistant."}, - { - "role": "user", - "content": "What is the difference between a list and a tuple in Python?", - }, - { - "role": "assistant", - "content": "Lists are mutable and use [], tuples are immutable and use ().", - }, - {"role": "user", "content": "Can you give me some examples?"}, - { - "role": "assistant", - "content": "Sure! Here are examples:\n\n```python\n# List\nmy_list = [1, 2, 3]\nmy_list.append(4)\n\n# Tuple\nmy_tuple = (1, 2, 3)\n# my_tuple.append(4) would error\n```", - }, - ] - - -@pytest.fixture -def large_messages(): - """Messages that will definitely exceed size limits""" - large_content = "This is a very long message. " * 100 - return [ - {"role": "system", "content": large_content}, - {"role": "user", "content": large_content}, - {"role": "assistant", "content": large_content}, - {"role": "user", "content": large_content}, - ] - - -class TestTruncateMessagesBySize: - def test_no_truncation_needed(self, sample_messages): - """Test that messages under the limit are not truncated""" - result, truncation_index = truncate_messages_by_size( - sample_messages, max_bytes=MAX_GEN_AI_MESSAGE_BYTES - ) - assert len(result) == len(sample_messages) - assert result == sample_messages - assert truncation_index == 0 - - def test_truncation_removes_oldest_first(self, large_messages): - """Test that oldest messages are removed first during truncation""" - small_limit = 3000 - result, truncation_index = truncate_messages_by_size( - large_messages, max_bytes=small_limit - ) - assert len(result) < len(large_messages) - - assert result[-1] == large_messages[-1] - assert truncation_index == len(large_messages) - len(result) - - def test_empty_messages_list(self): - """Test handling of empty messages list""" - result, truncation_index = truncate_messages_by_size( - [], max_bytes=MAX_GEN_AI_MESSAGE_BYTES // 500 - ) - assert result == [] - assert truncation_index == 0 - - def test_find_truncation_index( - self, - ): - """Test that the truncation index is found correctly""" - # when represented in JSON, these are each 7 bytes long - messages = ["A" * 5, "B" * 5, "C" * 5, "D" * 5, "E" * 5] - truncation_index = _find_truncation_index(messages, 20) - assert truncation_index == 3 - assert messages[truncation_index:] == ["D" * 5, "E" * 5] - - messages = ["A" * 5, "B" * 5, "C" * 5, "D" * 5, "E" * 5] - truncation_index = _find_truncation_index(messages, 40) - assert truncation_index == 0 - assert messages[truncation_index:] == [ - "A" * 5, - "B" * 5, - "C" * 5, - "D" * 5, - "E" * 5, - ] - - def test_progressive_truncation(self, large_messages): - """Test that truncation works progressively with different limits""" - limits = [ - MAX_GEN_AI_MESSAGE_BYTES // 5, - MAX_GEN_AI_MESSAGE_BYTES // 10, - MAX_GEN_AI_MESSAGE_BYTES // 25, - MAX_GEN_AI_MESSAGE_BYTES // 100, - MAX_GEN_AI_MESSAGE_BYTES // 500, - ] - prev_count = len(large_messages) - - for limit in limits: - result = truncate_messages_by_size(large_messages, max_bytes=limit) - current_count = len(result) - - assert current_count <= prev_count - assert current_count >= 1 - prev_count = current_count - - def test_single_message_truncation(self): - large_content = "This is a very long message. " * 10_000 - - messages = [ - {"role": "system", "content": "You are a helpful assistant."}, - {"role": "user", "content": large_content}, - ] - - result, truncation_index = truncate_messages_by_size( - messages, max_single_message_chars=MAX_SINGLE_MESSAGE_CONTENT_CHARS - ) - - assert len(result) == 1 - assert ( - len(result[0]["content"].rstrip("...")) <= MAX_SINGLE_MESSAGE_CONTENT_CHARS - ) - - # If the last message is too large, the system message is not present - system_msgs = [m for m in result if m.get("role") == "system"] - assert len(system_msgs) == 0 - - # Confirm the user message is truncated with '...' - user_msgs = [m for m in result if m.get("role") == "user"] - assert len(user_msgs) == 1 - assert user_msgs[0]["content"].endswith("...") - assert len(user_msgs[0]["content"]) < len(large_content) - - def test_single_message_truncation_list_content_exceeds_limit(self): - """Test that list-based content (e.g. pydantic-ai multimodal format) is truncated.""" - large_text = "A" * 200_000 - - messages = [ - { - "role": "user", - "content": [ - {"type": "text", "text": large_text}, - ], - }, - ] - - result, _ = truncate_messages_by_size(messages) - - text_part = result[0]["content"][0] - assert text_part["text"].endswith("...") - assert len(text_part["text"]) == MAX_SINGLE_MESSAGE_CONTENT_CHARS + 3 - - def test_single_message_truncation_list_content_under_limit(self): - """Test that small text parts are preserved when non-text parts push size over byte limit.""" - short_text = "Hello world" - large_data_url = "data:image/png;base64," + "A" * 200_000 - - messages = [ - { - "role": "user", - "content": [ - {"type": "text", "text": short_text}, - {"type": "image_url", "image_url": {"url": large_data_url}}, - ], - }, - ] - - result, _ = truncate_messages_by_size(messages) - - text_part = result[0]["content"][0] - assert text_part["text"] == short_text - - def test_single_message_truncation_list_content_mixed_parts(self): - """Test truncation with mixed content types (text + non-text parts).""" - max_chars = 50 - large_data_url = "data:image/png;base64," + "X" * 200_000 - - messages = [ - { - "role": "user", - "content": [ - {"type": "text", "text": "A" * 30}, - {"type": "image_url", "image_url": {"url": large_data_url}}, - {"type": "text", "text": "B" * 30}, - ], - }, - ] - - result, _ = truncate_messages_by_size( - messages, max_single_message_chars=max_chars - ) - - parts = result[0]["content"] - # First text part uses 30 chars of the 50 budget - assert parts[0]["text"] == "A" * 30 - # Image part is unchanged - assert parts[1]["type"] == "image_url" - # Second text part is truncated to remaining 20 chars - assert parts[2]["text"] == "B" * 20 + "..." - - def test_single_message_truncation_list_content_multiple_text_parts(self): - """Test that budget is distributed across multiple text parts.""" - max_chars = 10 - # Two large text parts that together exceed 128KB byte limit - messages = [ - { - "role": "user", - "content": [ - {"type": "text", "text": "A" * 100_000}, - {"type": "text", "text": "B" * 100_000}, - ], - }, - ] - - result, _ = truncate_messages_by_size( - messages, max_single_message_chars=max_chars - ) - - parts = result[0]["content"] - # First part is truncated to the full budget - assert parts[0]["text"] == "A" * 10 + "..." - # Second part gets truncated to 0 chars + ellipsis - assert parts[1]["text"] == "..." - - @pytest.mark.parametrize("content", [None, 42, 3.14, True]) - def test_single_message_truncation_non_str_non_list_content(self, content): - messages = [{"role": "user", "content": content}] - - result, _ = truncate_messages_by_size(messages) - - assert result[0]["content"] is content - - -class TestTruncateAndAnnotateMessages: - def test_only_keeps_last_message(self, sample_messages): - class MockSpan: - def __init__(self): - self.span_id = "test_span_id" - self.data = {} - - def set_data(self, key, value): - self.data[key] = value - - class MockScope: - def __init__(self): - self._gen_ai_original_message_count = {} - - span = MockSpan() - scope = MockScope() - result = truncate_and_annotate_messages(sample_messages, span, scope) - - assert isinstance(result, list) - assert not isinstance(result, AnnotatedValue) - assert len(result) == 1 - assert result[0] == sample_messages[-1] - - def test_truncation_sets_metadata_on_scope(self, large_messages): - class MockSpan: - def __init__(self): - self.span_id = "test_span_id" - self.data = {} - - def set_data(self, key, value): - self.data[key] = value - - class MockScope: - def __init__(self): - self._gen_ai_original_message_count = {} - - small_limit = 3000 - span = MockSpan() - scope = MockScope() - original_count = len(large_messages) - result = truncate_and_annotate_messages( - large_messages, span, scope, max_single_message_chars=small_limit - ) - - assert isinstance(result, list) - assert not isinstance(result, AnnotatedValue) - assert len(result) < len(large_messages) - assert scope._gen_ai_original_message_count[span.span_id] == original_count - - def test_scope_tracks_original_message_count(self, large_messages): - class MockSpan: - def __init__(self): - self.span_id = "test_span_id" - self.data = {} - - def set_data(self, key, value): - self.data[key] = value - - class MockScope: - def __init__(self): - self._gen_ai_original_message_count = {} - - small_limit = 3000 - original_count = len(large_messages) - span = MockSpan() - scope = MockScope() - - result = truncate_and_annotate_messages( - large_messages, span, scope, max_single_message_chars=small_limit - ) - - assert scope._gen_ai_original_message_count[span.span_id] == original_count - assert len(result) == 1 - - def test_empty_messages_returns_none(self): - class MockSpan: - def __init__(self): - self.span_id = "test_span_id" - self.data = {} - - def set_data(self, key, value): - self.data[key] = value - - class MockScope: - def __init__(self): - self._gen_ai_original_message_count = {} - - span = MockSpan() - scope = MockScope() - result = truncate_and_annotate_messages([], span, scope) - assert result is None - - result = truncate_and_annotate_messages(None, span, scope) - assert result is None - - def test_truncated_messages_newest_first(self, large_messages): - class MockSpan: - def __init__(self): - self.span_id = "test_span_id" - self.data = {} - - def set_data(self, key, value): - self.data[key] = value - - class MockScope: - def __init__(self): - self._gen_ai_original_message_count = {} - - small_limit = 3000 - span = MockSpan() - scope = MockScope() - result = truncate_and_annotate_messages( - large_messages, span, scope, max_single_message_chars=small_limit - ) - - assert isinstance(result, list) - assert result[0] == large_messages[-len(result)] - - -class TestClientAnnotation: - def test_client_wraps_truncated_messages_in_annotated_value(self, large_messages): - """Test that client.py properly wraps truncated messages in AnnotatedValue using scope data""" - from sentry_sdk._types import AnnotatedValue - from sentry_sdk.consts import SPANDATA - - class MockSpan: - def __init__(self): - self.span_id = "test_span_123" - self.data = {} - - def set_data(self, key, value): - self.data[key] = value - - class MockScope: - def __init__(self): - self._gen_ai_original_message_count = {} - - small_limit = 3000 - span = MockSpan() - scope = MockScope() - original_count = len(large_messages) - - # Simulate what integrations do - truncated_messages = truncate_and_annotate_messages( - large_messages, span, scope, max_single_message_chars=small_limit - ) - span.set_data(SPANDATA.GEN_AI_REQUEST_MESSAGES, truncated_messages) - - # Verify metadata was set on scope - assert span.span_id in scope._gen_ai_original_message_count - assert scope._gen_ai_original_message_count[span.span_id] > 0 - - # Simulate what client.py does - event = {"spans": [{"span_id": span.span_id, "data": span.data.copy()}]} - - # Mimic client.py logic - using scope to get the original length - for event_span in event["spans"]: - span_id = event_span.get("span_id") - span_data = event_span.get("data", {}) - if ( - span_id - and span_id in scope._gen_ai_original_message_count - and SPANDATA.GEN_AI_REQUEST_MESSAGES in span_data - ): - messages = span_data[SPANDATA.GEN_AI_REQUEST_MESSAGES] - n_original_count = scope._gen_ai_original_message_count[span_id] - - span_data[SPANDATA.GEN_AI_REQUEST_MESSAGES] = AnnotatedValue( - safe_serialize(messages), - {"len": n_original_count}, - ) - - # Verify the annotation happened - messages_value = event["spans"][0]["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - assert isinstance(messages_value, AnnotatedValue) - assert messages_value.metadata["len"] == original_count - assert isinstance(messages_value.value, str) - - def test_annotated_value_shows_correct_original_length(self, large_messages): - """Test that the annotated value correctly shows the original message count before truncation""" - from sentry_sdk.consts import SPANDATA - - class MockSpan: - def __init__(self): - self.span_id = "test_span_456" - self.data = {} - - def set_data(self, key, value): - self.data[key] = value - - class MockScope: - def __init__(self): - self._gen_ai_original_message_count = {} - - small_limit = 3000 - span = MockSpan() - scope = MockScope() - original_message_count = len(large_messages) - - truncated_messages = truncate_and_annotate_messages( - large_messages, span, scope, max_single_message_chars=small_limit - ) - - assert len(truncated_messages) < original_message_count - - assert span.span_id in scope._gen_ai_original_message_count - stored_original_length = scope._gen_ai_original_message_count[span.span_id] - assert stored_original_length == original_message_count - - event = { - "spans": [ - { - "span_id": span.span_id, - "data": {SPANDATA.GEN_AI_REQUEST_MESSAGES: truncated_messages}, - } - ] - } - - for event_span in event["spans"]: - span_id = event_span.get("span_id") - span_data = event_span.get("data", {}) - if ( - span_id - and span_id in scope._gen_ai_original_message_count - and SPANDATA.GEN_AI_REQUEST_MESSAGES in span_data - ): - span_data[SPANDATA.GEN_AI_REQUEST_MESSAGES] = AnnotatedValue( - span_data[SPANDATA.GEN_AI_REQUEST_MESSAGES], - {"len": scope._gen_ai_original_message_count[span_id]}, - ) - - messages_value = event["spans"][0]["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - assert isinstance(messages_value, AnnotatedValue) - assert messages_value.metadata["len"] == stored_original_length - assert len(messages_value.value) == len(truncated_messages) - - class TestParseDataUri: def test_parses_base64_image_data_uri(self): """Test parsing a standard base64-encoded image data URI""" From 8023538f161a1f53c61f150f962438659dce85eb Mon Sep 17 00:00:00 2001 From: alexander-alderman-webb <224035503+alexander-alderman-webb@users.noreply.github.com> Date: Tue, 21 Apr 2026 08:05:33 +0000 Subject: [PATCH 2/2] release: 2.58.0a1 --- CHANGELOG.md | 47 ++++++++++++++++++++++++++++++++++++++++++++ docs/conf.py | 2 +- sentry_sdk/consts.py | 2 +- setup.py | 2 +- 4 files changed, 50 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8892b397dd..e6d3b63afb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,52 @@ # Changelog +## 2.58.0a1 + +### New Features ✨ + +- (ci) Cancel in-progress PR workflows on new commit push by @joshuarli in [#5994](https://github.com/getsentry/sentry-python/pull/5994) +- Send GenAI spans as V2 envelope items by @alexander-alderman-webb in [#6079](https://github.com/getsentry/sentry-python/pull/6079) + +### Bug Fixes 🐛 + +- (google_genai) Redact binary data in inline_data and fix multi-part message extraction by @ericapisani in [#5977](https://github.com/getsentry/sentry-python/pull/5977) +- (profiler) Stop nulling buffer on teardown by @ericapisani in [#6075](https://github.com/getsentry/sentry-python/pull/6075) + +### Internal Changes 🔧 + +#### Anthropic + +- Revert input truncation by @alexander-alderman-webb in [#6113](https://github.com/getsentry/sentry-python/pull/6113) +- Revert input transformation by @alexander-alderman-webb in [#6108](https://github.com/getsentry/sentry-python/pull/6108) + +#### Google Genai + +- Revert input truncation by @alexander-alderman-webb in [#6111](https://github.com/getsentry/sentry-python/pull/6111) +- Revert input transformation by @alexander-alderman-webb in [#6105](https://github.com/getsentry/sentry-python/pull/6105) + +#### Langchain + +- Revert input truncation by @alexander-alderman-webb in [#6115](https://github.com/getsentry/sentry-python/pull/6115) +- Revert input transformation by @alexander-alderman-webb in [#6109](https://github.com/getsentry/sentry-python/pull/6109) + +#### Litellm + +- Revert input truncation by @alexander-alderman-webb in [#6112](https://github.com/getsentry/sentry-python/pull/6112) +- Revert input transformation by @alexander-alderman-webb in [#6107](https://github.com/getsentry/sentry-python/pull/6107) + +#### Pydantic Ai + +- Revert input truncation by @alexander-alderman-webb in [#6106](https://github.com/getsentry/sentry-python/pull/6106) +- Remove dead `Model.request` patch by @alexander-alderman-webb in [#5956](https://github.com/getsentry/sentry-python/pull/5956) + +#### Other + +- (ai) Revert binary blob truncation by @alexander-alderman-webb in [#6110](https://github.com/getsentry/sentry-python/pull/6110) +- (langgraph) Revert input truncation by @alexander-alderman-webb in [#6114](https://github.com/getsentry/sentry-python/pull/6114) +- (openai) Revert input truncation by @alexander-alderman-webb in [#6117](https://github.com/getsentry/sentry-python/pull/6117) +- (openai-agents) Revert input truncation by @alexander-alderman-webb in [#6116](https://github.com/getsentry/sentry-python/pull/6116) +- Set explicit base-branch for codecov action by @ericapisani in [#5992](https://github.com/getsentry/sentry-python/pull/5992) + ## 2.58.0 ### New Features ✨ diff --git a/docs/conf.py b/docs/conf.py index 59010b9a2e..2f855ec55d 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -31,7 +31,7 @@ copyright = "2019-{}, Sentry Team and Contributors".format(datetime.now().year) author = "Sentry Team and Contributors" -release = "2.58.0" +release = "2.58.0a1" version = ".".join(release.split(".")[:2]) # The short X.Y version. diff --git a/sentry_sdk/consts.py b/sentry_sdk/consts.py index 73e5a6d9cb..1ac5759fe3 100644 --- a/sentry_sdk/consts.py +++ b/sentry_sdk/consts.py @@ -1486,4 +1486,4 @@ def _get_default_options() -> "dict[str, Any]": del _get_default_options -VERSION = "2.58.0" +VERSION = "2.58.0a1" diff --git a/setup.py b/setup.py index 3942ee630e..520e1d5768 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ def get_file_text(file_name): setup( name="sentry-sdk", - version="2.58.0", + version="2.58.0a1", author="Sentry Team and Contributors", author_email="hello@sentry.io", url="https://github.com/getsentry/sentry-python",