diff --git a/py/src/braintrust/env.py b/py/src/braintrust/env.py index 19aee1dd..9978b53f 100644 --- a/py/src/braintrust/env.py +++ b/py/src/braintrust/env.py @@ -1,4 +1,5 @@ import io +import logging import math import os import shlex @@ -8,6 +9,9 @@ from typing import TypeVar, cast +_logger = logging.getLogger(__name__) + + T = TypeVar("T") EnvValue = bool | float | int | str _Parser = Callable[[str], EnvValue | None] @@ -150,6 +154,47 @@ def _parse_dotenv_contents(self, contents: str) -> EnvValue | None: return None +_warned_legacy_uuid_conflict = False + + +def _resolve_use_legacy_uuid_ids() -> bool: + """Resolve whether the SDK should generate legacy UUID-based span/trace IDs. + + The default is OpenTelemetry-compatible hex IDs (16-byte trace id / 8-byte + span id) with V4 span-component export. Setting BRAINTRUST_LEGACY_IDS + opts back into UUID IDs with V3 export. + + BRAINTRUST_OTEL_COMPAT (which selects the OpenTelemetry context manager) + requires hex IDs, so it always wins: if both it and BRAINTRUST_LEGACY_IDS + are set, legacy IDs are disabled and a warning is logged (at most once per + process, even though this is re-resolved lazily on each access). + """ + global _warned_legacy_uuid_conflict + + legacy = EnvVar("BRAINTRUST_LEGACY_IDS", EnvParser.BOOL).get(False) + if EnvVar("BRAINTRUST_OTEL_COMPAT", EnvParser.BOOL).get(False): + if legacy and not _warned_legacy_uuid_conflict: + _warned_legacy_uuid_conflict = True + _logger.warning( + "BRAINTRUST_LEGACY_IDS is ignored because BRAINTRUST_OTEL_COMPAT " + "requires OpenTelemetry-compatible hex span IDs. Using hex IDs." + ) + return False + return legacy + + +class _LegacyUuidIdsField: + """Lazy, read-only descriptor for the legacy-UUID-IDs setting. + + Like the other entries on BraintrustEnv, this re-reads the environment on + each access rather than caching at import time, so changing the relevant env + vars (e.g. in tests) is reflected immediately. + """ + + def __get__(self, instance: object, owner: type | None = None) -> bool: + return _resolve_use_legacy_uuid_ids() + + class BraintrustEnv: API_KEY = EnvVar("BRAINTRUST_API_KEY", EnvParser.STRING) HTTP_TIMEOUT = EnvVar("BRAINTRUST_HTTP_TIMEOUT", EnvParser.FLOAT) @@ -163,3 +208,6 @@ class BraintrustEnv: ALL_PUBLISH_PAYLOADS_DIR = EnvVar("BRAINTRUST_ALL_PUBLISH_PAYLOADS_DIR", EnvParser.STRING) DISABLE_ATEXIT_FLUSH = EnvVar("BRAINTRUST_DISABLE_ATEXIT_FLUSH", EnvParser.BOOL) OTEL_COMPAT = EnvVar("BRAINTRUST_OTEL_COMPAT", EnvParser.BOOL) + # Opt out of the default OpenTelemetry-compatible hex span/trace IDs and use + # legacy UUID-based IDs (and V3 span-component export) instead. + LEGACY_IDS = _LegacyUuidIdsField() diff --git a/py/src/braintrust/id_gen.py b/py/src/braintrust/id_gen.py index b9006f6e..072cd6b9 100644 --- a/py/src/braintrust/id_gen.py +++ b/py/src/braintrust/id_gen.py @@ -10,9 +10,11 @@ def get_id_generator(): This eliminates global state and makes tests parallelizable. Each caller gets their own generator instance. + + Defaults to OpenTelemetry-compatible hex IDs. Set BRAINTRUST_LEGACY_IDS + to opt back into legacy UUID-based IDs. """ - use_otel = BraintrustEnv.OTEL_COMPAT.get(False) - return OTELIDGenerator() if use_otel else UUIDGenerator() + return UUIDGenerator() if BraintrustEnv.LEGACY_IDS else OTELIDGenerator() class IDGenerator(ABC): diff --git a/py/src/braintrust/integrations/claude_agent_sdk/test_claude_agent_sdk.py b/py/src/braintrust/integrations/claude_agent_sdk/test_claude_agent_sdk.py index 801f00dc..1ff40395 100644 --- a/py/src/braintrust/integrations/claude_agent_sdk/test_claude_agent_sdk.py +++ b/py/src/braintrust/integrations/claude_agent_sdk/test_claude_agent_sdk.py @@ -210,13 +210,16 @@ async def calculator_handler(args): assert tool_span["output"] is not None assert any(parent_id in llm_span_ids for parent_id in tool_span["span_parents"]) - root_span_id = task_span["span_id"] + # Descendants share the task's trace (``root_span_id``); direct children + # reference the task's ``span_id`` in ``span_parents``. + task_root_span_id = task_span["root_span_id"] + task_span_id = task_span["span_id"] for llm_span in llm_spans: - assert llm_span["root_span_id"] == root_span_id - assert root_span_id in llm_span["span_parents"] + assert llm_span["root_span_id"] == task_root_span_id + assert task_span_id in llm_span["span_parents"] for tool_span in tool_spans: - assert tool_span["root_span_id"] == root_span_id + assert tool_span["root_span_id"] == task_root_span_id assert any(parent_id in llm_span_ids for parent_id in tool_span["span_parents"]) @@ -454,7 +457,8 @@ async def user_prompt_hook(input_data: Any, tool_use_id: str | None, context: An hook_span = function_spans[0] assert task_span["input"] == prompt - assert hook_span["root_span_id"] == task_span["span_id"] + # The hook span is a descendant of the task span, so they share a trace. + assert hook_span["root_span_id"] == task_span["root_span_id"] assert hook_span["input"]["hook_event_name"] == "UserPromptSubmit" assert hook_span["input"]["prompt"] == prompt assert hook_span["output"]["hookSpecificOutput"]["hookEventName"] == "UserPromptSubmit" @@ -546,7 +550,8 @@ async def post_tool_hook(input_data: Any, tool_use_id: str | None, context: Any) post_span = hook_span_by_event["PostToolUse"] for hook_span in (pre_span, post_span): - assert hook_span["root_span_id"] == task_span["span_id"] + # Hook spans are descendants of the task span, so they share a trace. + assert hook_span["root_span_id"] == task_span["root_span_id"] assert hook_span["input"]["tool_name"] == "Bash" assert pre_span["output"]["hookSpecificOutput"]["hookEventName"] == "PreToolUse" @@ -681,7 +686,9 @@ async def test_bundled_subagent_creates_task_span(memory_logger): assert subagent_spans, "Expected at least one subagent task span" assert any(s.get("metadata", {}).get("task_id") for s in subagent_spans) for subagent_span in subagent_spans: - assert subagent_span["root_span_id"] == root_task_span["span_id"] + # Subagent spans are descendants of the root task span, so they share a + # trace; the root task ``span_id`` appears in ``span_parents`` below. + assert subagent_span["root_span_id"] == root_task_span["root_span_id"] parents = set(subagent_span["span_parents"]) tool_use_id = subagent_span.get("metadata", {}).get("tool_use_id") matching_tool_span = next( diff --git a/py/src/braintrust/integrations/huggingface_hub/test_huggingface_hub.py b/py/src/braintrust/integrations/huggingface_hub/test_huggingface_hub.py index 3350f9f6..49f42252 100644 --- a/py/src/braintrust/integrations/huggingface_hub/test_huggingface_hub.py +++ b/py/src/braintrust/integrations/huggingface_hub/test_huggingface_hub.py @@ -249,7 +249,6 @@ def test_wrap_huggingface_hub_chat_completion_sync(memory_logger): # With no parent span on the stack, the LLM span is its own root and has # no ``span_parents``. assert not span.get("span_parents") - assert span["span_id"] == span["root_span_id"] # The user's ``provider=`` kwarg overrides the default "huggingface" # identity so the span reflects the actual routing target. assert span["metadata"]["provider"] == CHAT_PROVIDER @@ -317,7 +316,6 @@ def test_wrap_huggingface_hub_chat_completion_streaming(memory_logger): # when the iterator is exhausted, with no parent on the stack the span is # still its own root. assert not span.get("span_parents") - assert span["span_id"] == span["root_span_id"] assert span["metadata"]["provider"] == CHAT_PROVIDER # Aggregated output is ``{"choices": [{"index", "message": {...}, "finish_reason"?}]}``. @@ -476,7 +474,6 @@ async def _run(): span = spans[0] assert span["span_attributes"]["name"] == "huggingface.chat_completion" assert not span.get("span_parents") - assert span["span_id"] == span["root_span_id"] assert span["metadata"]["provider"] == CHAT_PROVIDER diff --git a/py/src/braintrust/integrations/langchain/test_callbacks.py b/py/src/braintrust/integrations/langchain/test_callbacks.py index e05a5775..ca71016f 100644 --- a/py/src/braintrust/integrations/langchain/test_callbacks.py +++ b/py/src/braintrust/integrations/langchain/test_callbacks.py @@ -62,7 +62,10 @@ def test_llm_calls(logger_memory_logger): spans = memory_logger.pop() assert len(spans) == 3 + # ``root_span_id`` is the root span's own span_id (the parent reference for + # its children); ``trace_root_id`` is the trace shared by every span. root_span_id = spans[0]["span_id"] + trace_root_id = spans[0]["root_span_id"] assert_matches_object( spans, @@ -81,7 +84,7 @@ def test_llm_calls(logger_memory_logger): }, "metadata": {"tags": []}, "span_id": root_span_id, - "root_span_id": root_span_id, + "root_span_id": trace_root_id, }, { "span_attributes": {"name": "ChatPromptTemplate"}, @@ -97,7 +100,7 @@ def test_llm_calls(logger_memory_logger): ] }, "metadata": {"tags": ["seq:step:1"]}, - "root_span_id": root_span_id, + "root_span_id": trace_root_id, "span_parents": [root_span_id], }, { @@ -144,7 +147,7 @@ def test_llm_calls(logger_memory_logger): "tags": ["seq:step:2"], "model": "gpt-4o-mini-2024-07-18", }, - "root_span_id": root_span_id, + "root_span_id": trace_root_id, "span_parents": [root_span_id], }, ], @@ -171,6 +174,7 @@ def test_chain_with_memory(logger_memory_logger): assert len(spans) == 3 root_span_id = spans[0]["span_id"] + trace_root_id = spans[0]["root_span_id"] assert_matches_object( spans, @@ -189,7 +193,7 @@ def test_chain_with_memory(logger_memory_logger): }, "metadata": {"tags": ["test"]}, "span_id": root_span_id, - "root_span_id": root_span_id, + "root_span_id": trace_root_id, }, { "span_attributes": {"name": "ChatPromptTemplate"}, @@ -205,7 +209,7 @@ def test_chain_with_memory(logger_memory_logger): ] }, "metadata": {"tags": ["seq:step:1", "test"]}, - "root_span_id": root_span_id, + "root_span_id": trace_root_id, "span_parents": [root_span_id], }, { @@ -252,7 +256,7 @@ def test_chain_with_memory(logger_memory_logger): "tags": ["seq:step:2", "test"], "model": "gpt-4o-mini-2024-07-18", }, - "root_span_id": root_span_id, + "root_span_id": trace_root_id, "span_parents": [root_span_id], }, ], @@ -301,13 +305,14 @@ def calculator(input: CalculatorInput) -> str: spans = memory_logger.pop() root_span_id = spans[0]["span_id"] + trace_root_id = spans[0]["root_span_id"] assert_matches_object( spans, [ { "span_id": root_span_id, - "root_span_id": root_span_id, + "root_span_id": trace_root_id, "span_attributes": { "name": "ChatOpenAI", "type": "llm", @@ -640,13 +645,13 @@ def test_chain_null_values(logger_memory_logger): flush() spans = memory_logger.pop() - root_span_id = spans[0]["span_id"] + trace_root_id = spans[0]["root_span_id"] assert_matches_object( spans, [ { - "root_span_id": root_span_id, + "root_span_id": trace_root_id, "span_attributes": { "name": "TestChain", "type": "task", @@ -721,7 +726,10 @@ def task_fn(input, hooks): # Find the root eval span root_eval_span = [s for s in spans if s.get("span_attributes", {}).get("name") == "test-consecutive-eval"][0] + # ``root_eval_span_id`` is the eval root's own span_id (the parent reference + # for its children); ``trace_root_id`` is the trace shared by every span. root_eval_span_id = root_eval_span["span_id"] + trace_root_id = root_eval_span["root_span_id"] # Find the eval dataset record spans (direct children of root eval span) eval_record_spans = [ @@ -751,7 +759,7 @@ def task_fn(input, hooks): [ { "span_id": root_eval_span_id, - "root_span_id": root_eval_span_id, + "root_span_id": trace_root_id, "span_attributes": { "name": "test-consecutive-eval", "type": "eval", @@ -765,7 +773,7 @@ def task_fn(input, hooks): [eval_record_1], [ { - "root_span_id": root_eval_span_id, + "root_span_id": trace_root_id, "span_parents": [root_eval_span_id], "span_attributes": { "name": "eval", @@ -781,7 +789,7 @@ def task_fn(input, hooks): [eval_record_2], [ { - "root_span_id": root_eval_span_id, + "root_span_id": trace_root_id, "span_parents": [root_eval_span_id], "span_attributes": { "name": "eval", @@ -797,7 +805,7 @@ def task_fn(input, hooks): [task_1_span], [ { - "root_span_id": root_eval_span_id, + "root_span_id": trace_root_id, "span_parents": [eval_record_1["span_id"]], "span_attributes": { "name": "task", @@ -813,7 +821,7 @@ def task_fn(input, hooks): [task_2_span], [ { - "root_span_id": root_eval_span_id, + "root_span_id": trace_root_id, "span_parents": [eval_record_2["span_id"]], "span_attributes": { "name": "task", diff --git a/py/src/braintrust/integrations/langchain/test_context.py b/py/src/braintrust/integrations/langchain/test_context.py index 37cf552a..b776eee4 100644 --- a/py/src/braintrust/integrations/langchain/test_context.py +++ b/py/src/braintrust/integrations/langchain/test_context.py @@ -63,7 +63,10 @@ def test_global_handler(logger_memory_logger): spans = memory_logger.pop() assert len(spans) > 0 + # ``root_span_id`` is the root span's own span_id (the parent reference for + # its children); ``trace_root_id`` is the trace shared by every span. root_span_id = spans[0]["span_id"] + trace_root_id = spans[0]["root_span_id"] # Spans would be empty if the handler was not registered, let's make sure it logged what we expect assert_matches_object( @@ -83,7 +86,7 @@ def test_global_handler(logger_memory_logger): }, "metadata": {"tags": []}, "span_id": root_span_id, - "root_span_id": root_span_id, + "root_span_id": trace_root_id, }, { "span_attributes": {"name": "ChatPromptTemplate"}, @@ -99,7 +102,7 @@ def test_global_handler(logger_memory_logger): ] }, "metadata": {"tags": ["seq:step:1"]}, - "root_span_id": root_span_id, + "root_span_id": trace_root_id, "span_parents": [root_span_id], }, { @@ -146,7 +149,7 @@ def test_global_handler(logger_memory_logger): "tags": ["seq:step:2"], "model": "gpt-4o-mini-2024-07-18", }, - "root_span_id": root_span_id, + "root_span_id": trace_root_id, "span_parents": [root_span_id], }, ], diff --git a/py/src/braintrust/logger.py b/py/src/braintrust/logger.py index 460f9232..8941b6b9 100644 --- a/py/src/braintrust/logger.py +++ b/py/src/braintrust/logger.py @@ -71,6 +71,18 @@ from .prompt_cache.lru_cache import LRUCache from .prompt_cache.parameters_cache import ParametersCache from .prompt_cache.prompt_cache import PromptCache +from .propagation import ( + BAGGAGE_HEADER, + BRAINTRUST_PARENT_KEY, + TRACEPARENT_HEADER, + TRACESTATE_HEADER, + PropagatedState, + format_traceparent, + get_header, + merge_baggage, + parse_baggage, + parse_traceparent, +) from .queue import DEFAULT_QUEUE_SIZE, LogQueue from .serializable_data_class import SerializableDataClass from .span_identifier_v3 import SpanComponentsV3, SpanObjectTypeV3 @@ -146,9 +158,14 @@ class ParametersRef(TypedDict, total=False): def _get_exporter(): - """Return the active exporter (e.g. the version of SpanComponentsv*)""" - use_v4 = BraintrustEnv.OTEL_COMPAT.get(False) - return SpanComponentsV4 if use_v4 else SpanComponentsV3 + """Return the active exporter (e.g. the version of SpanComponentsv*). + + The export version is coupled to the active ID format: hex IDs (the default) + serialize as V4, legacy UUID IDs serialize as V3. These must move together -- + serializing hex IDs via V3 would lose the compact encoding and risk + corrupting hex values that happen to parse as UUIDs. + """ + return SpanComponentsV3 if BraintrustEnv.LEGACY_IDS else SpanComponentsV4 class Exportable(ABC): @@ -196,7 +213,7 @@ def start_span( span_attributes: SpanAttributes | Mapping[str, Any] | None = None, start_time: float | None = None, set_current: bool | None = None, - parent: str | None = None, + parent: str | dict | None = None, **event: Any, ) -> "Span": """Create a new span. This is useful if you want to log more detailed trace information beyond the scope of a single log event. Data logged over several calls to `Span.log` will be merged into one logical row. @@ -224,6 +241,15 @@ def export(self) -> str: :returns: Serialized representation of this span's identifiers. """ + @abstractmethod + def inject(self, carrier: dict | None = None) -> dict: + """ + Inject W3C trace-context headers (`traceparent` and `baggage`) for this span into a carrier dict, for distributed tracing across service boundaries. + + :param carrier: Optional existing carrier (e.g. outbound HTTP headers) to mutate. A new dict is created if omitted. + :returns: The carrier dict with propagation headers injected. + """ + @abstractmethod def link(self) -> str: """ @@ -330,7 +356,7 @@ def start_span( span_attributes: SpanAttributes | Mapping[str, Any] | None = None, start_time: float | None = None, set_current: bool | None = None, - parent: str | None = None, + parent: str | dict | None = None, **event: Any, ): return self @@ -341,6 +367,9 @@ def end(self, end_time: float | None = None) -> float: def export(self): return "" + def inject(self, carrier: dict | None = None) -> dict: + return carrier if carrier is not None else {} + def link(self) -> str: return NOOP_SPAN_PERMALINK @@ -397,7 +426,7 @@ def __init__(self): "braintrust_current_logger", default=None ) self._local_logger: Logger | None = None - self.current_parent: contextvars.ContextVar[str | None] = contextvars.ContextVar( + self.current_parent: contextvars.ContextVar[str | dict | None] = contextvars.ContextVar( "braintrust_current_parent", default=None ) self.current_span: contextvars.ContextVar[Span] = contextvars.ContextVar( @@ -2342,12 +2371,14 @@ def current_span() -> Span: @contextlib.contextmanager -def parent_context(parent: str | None, state: BraintrustState | None = None): +def parent_context(parent: "str | dict | None", state: BraintrustState | None = None): """ Context manager to temporarily set the parent context for spans. Args: - parent: The parent string to set during the context + parent: The parent to set during the context. May be an exported slug + string or an opaque W3C trace-context dict (from + `extract_trace_context`). state: Optional BraintrustState to use. If not provided, uses the global state. Example: @@ -2363,32 +2394,287 @@ def parent_context(parent: str | None, state: BraintrustState | None = None): state.current_parent.reset(token) -def get_span_parent_object( - parent: str | None = None, state: BraintrustState | None = None -) -> "SpanComponentsV4 | Logger | Experiment | Span": - """Mainly for internal use. Return the parent object for starting a span in a global context. - Applies precedence: current span > propagated parent string > experiment > logger.""" +def _get_span_parent_object_and_propagated_state( + parent: "str | dict | None" = None, state: BraintrustState | None = None +) -> "tuple[SpanComponentsV4 | Logger | Experiment | Span, PropagatedState | None]": + """Resolve the parent object and any forwarded W3C state in a single pass. + + Same precedence as `get_span_parent_object`, but also returns the W3C state + (tracestate + raw traceparent flags) recovered while normalizing the + `parent` argument, so callers don't have to re-run `_normalize_parent` (which + would re-parse baggage and re-resolve the active logger/experiment, + potentially disagreeing if state changed between calls). The state is only + meaningful when a parent slug was resolved; otherwise it is None.""" if state is None: state = _state span = current_span() if span != NOOP_SPAN: - return span + return span, None - parent = parent or state.current_parent.get() - if parent: - return SpanComponentsV4.from_str(parent) + parent = parent if parent is not None else state.current_parent.get() + parent_slug, propagated_state = _normalize_parent(parent) + if parent_slug: + return SpanComponentsV4.from_str(parent_slug), propagated_state experiment = current_experiment() if experiment: - return experiment + return experiment, None logger = current_logger() if logger: - return logger + return logger, None - return NOOP_SPAN + return NOOP_SPAN, None + + +def get_span_parent_object( + parent: "str | dict | None" = None, state: BraintrustState | None = None +) -> "SpanComponentsV4 | Logger | Experiment | Span": + """Mainly for internal use. Return the parent object for starting a span in a global context. + Applies precedence: current span > propagated parent > experiment > logger. + + `parent` may be an exported slug string or an opaque W3C trace-context dict + (from `extract_trace_context`).""" + + parent_obj, _propagated_state = _get_span_parent_object_and_propagated_state(parent, state) + return parent_obj + + +def _current_braintrust_parent(state: BraintrustState | None = None) -> str | None: + """Return the Braintrust parent string for the current logger/experiment, if any. + + Used as the fallback Braintrust parent on receive, when an inbound request + carries trace identity (`traceparent`) but no `braintrust.parent` baggage. + """ + if state is None: + state = _state + + experiment = current_experiment() + if experiment: + try: + components = SpanComponentsV4.from_str(experiment.export()) + return f"experiment_id:{components.object_id}" + except Exception: + return None + + logger = current_logger() + if logger: + try: + components = SpanComponentsV4.from_str(logger.export()) + if components.object_id: + return f"project_id:{components.object_id}" + meta = components.compute_object_metadata_args or {} + name = meta.get("project_name") + if name: + return f"project_name:{name}" + except Exception: + return None + + return None + + +def _braintrust_parent_to_components(braintrust_parent: str): + """Parse a `braintrust.parent` string into (object_type, object_id, compute_args). + + Accepts `project_id:`, `project_name:`, or `experiment_id:`. + Returns None if the value is empty or malformed. + """ + if not braintrust_parent: + return None + if braintrust_parent.startswith("project_id:"): + object_id = braintrust_parent[len("project_id:") :] + return (SpanObjectTypeV3.PROJECT_LOGS, object_id, None) if object_id else None + if braintrust_parent.startswith("project_name:"): + name = braintrust_parent[len("project_name:") :] + return (SpanObjectTypeV3.PROJECT_LOGS, None, {"project_name": name}) if name else None + if braintrust_parent.startswith("experiment_id:"): + object_id = braintrust_parent[len("experiment_id:") :] + return (SpanObjectTypeV3.EXPERIMENT, object_id, None) if object_id else None + return None + + +def _set_header(carrier: dict, name: str, value: str) -> None: + """Set a W3C trace-context header on a carrier, sending the lowercase name. + + Per the W3C Trace Context spec (§3.2.1 / §3.3.1), vendors SHOULD send these + header names in lowercase. ``name`` is always the canonical lowercase key. + A plain ``dict`` carrier is case-sensitive, so any pre-existing case-variant + (e.g. ``Baggage`` from a framework that title-cases headers) must be removed + first, otherwise the carrier would end up with two conflicting headers. + """ + lowered = name.lower() + for key in list(carrier.keys()): + if isinstance(key, str) and key != name and key.lower() == lowered: + del carrier[key] + carrier[name] = value + + +def _inject_into_carrier( + carrier: dict, + trace_id: str, + span_id: str, + braintrust_parent: str | None, + propagated_state: "PropagatedState | None" = None, +) -> None: + """Inject W3C trace-context headers into a carrier dict (in place). + + Emits `traceparent` from the hex trace/span ids, merges `braintrust.parent` + into existing `baggage` when known, and forwards any inbound W3C state + (`tracestate` plus the original `traceparent` trace-flags) carried in + `propagated_state`. Pre-existing, non-Braintrust baggage entries are + preserved. + """ + # Re-emit the inbound trace-flags verbatim so the upstream sampling decision + # (and any future flag bits) is preserved; defaults to sampled when we + # originated the trace (no inbound flags). + trace_flags = propagated_state.trace_flags if propagated_state else None + traceparent = ( + format_traceparent(trace_id, span_id, trace_flags) if trace_flags else format_traceparent(trace_id, span_id) + ) + if traceparent is None: + # Ids aren't W3C-shaped (e.g. legacy UUID mode); nothing to propagate. + return + _set_header(carrier, TRACEPARENT_HEADER, traceparent) + + # Forward upstream tracestate (per W3C, only alongside a valid traceparent). + tracestate = propagated_state.tracestate if propagated_state else None + if tracestate: + _set_header(carrier, TRACESTATE_HEADER, tracestate) + + # Merge braintrust.parent into any existing baggage. Other vendors' members + # are forwarded byte-for-byte (see merge_baggage) so we never rewrite their + # percent-encoding. + existing = get_header(carrier, BAGGAGE_HEADER) + baggage_value = merge_baggage(existing, braintrust_parent) + if baggage_value is not None: + _set_header(carrier, BAGGAGE_HEADER, baggage_value) + + +def inject_trace_context(carrier: dict | None = None, span: "Span | None" = None) -> dict: + """Inject W3C trace-context headers for the current (or given) span into a carrier. + + This is the free-function form of `Span.inject`, and the send-side + counterpart of `extract_trace_context`. If no span is provided, the + currently-active span is used. Propagation is best-effort and never raises. + + :param carrier: Optional carrier dict (e.g. outbound HTTP headers) to mutate. + :param span: Optional span to inject. Defaults to the current span. + :returns: The carrier with propagation headers injected. + """ + if carrier is None: + carrier = {} + span = span if span is not None else current_span() + try: + return span.inject(carrier) + except Exception as e: + logging.warning(f"Error injecting trace context: {e}") + return carrier + + +def extract_trace_context(headers: dict) -> dict | None: + """Extract an opaque W3C trace-context from inbound request headers. + + This is the receive-side counterpart of `Span.inject` / + `inject_trace_context`. The return value is an opaque propagation context + that can be passed as `parent=` to `start_span`: + + ctx = extract_trace_context(request.headers) + with start_span(name="handler", parent=ctx) as span: + ... + + Only the W3C Trace Context headers are interpreted (`traceparent`, `baggage`, + `tracestate`); header lookups are case-insensitive. If no valid `traceparent` + is present, returns None (the caller starts a fresh root span). The Braintrust + container the trace is routed under is resolved when the span is created: from + the `braintrust.parent` baggage entry, or else the currently-active + logger/experiment. + + Callers should treat the return value as opaque. + + :param headers: Inbound request headers (e.g. an HTTP framework's headers). + :returns: An opaque context for `start_span(parent=...)`, or None. + """ + if not headers: + return None + + traceparent = get_header(headers, TRACEPARENT_HEADER) + if not traceparent or parse_traceparent(traceparent) is None: + return None + + context = {TRACEPARENT_HEADER: traceparent} + baggage_value = get_header(headers, BAGGAGE_HEADER) + if baggage_value: + context[BAGGAGE_HEADER] = baggage_value + tracestate = get_header(headers, TRACESTATE_HEADER) + if tracestate: + context[TRACESTATE_HEADER] = tracestate + return context + + +def _resolve_w3c_parent(context: dict) -> "tuple[str | None, PropagatedState | None]": + """Resolve a W3C trace-context dict into (parent_slug, propagated_state). + + Reads `traceparent` for trace identity and `braintrust.parent` from + `baggage` (falling back to the currently-active logger/experiment) for + routing, and builds an internal Braintrust parent slug. Captures the + `tracestate` and raw `traceparent` flags to forward onward. Returns + (None, None) if the context cannot be resolved into a usable parent (so the + caller falls back to local precedence / a fresh root). + """ + traceparent = get_header(context, TRACEPARENT_HEADER) + parsed = parse_traceparent(traceparent) if traceparent else None + if parsed is None: + return None, None + trace_id, span_id, trace_flags = parsed + + # Determine the Braintrust container: baggage -> current logger/experiment. + braintrust_parent = None + baggage_value = get_header(context, BAGGAGE_HEADER) + if baggage_value: + braintrust_parent = parse_baggage(baggage_value).get(BRAINTRUST_PARENT_KEY) + if not braintrust_parent: + braintrust_parent = _current_braintrust_parent() + if not braintrust_parent: + logging.warning( + "Received traceparent without a braintrust.parent and no active logger/experiment; " + "cannot route the trace. Starting a fresh local span instead." + ) + return None, None + + parsed_parent = _braintrust_parent_to_components(braintrust_parent) + if parsed_parent is None: + logging.warning(f"Invalid braintrust.parent: {braintrust_parent!r}") + return None, None + object_type, object_id, compute_args = parsed_parent + + tracestate = get_header(context, TRACESTATE_HEADER) or None + + slug = SpanComponentsV4( + object_type=object_type, + object_id=object_id, + compute_object_metadata_args=compute_args, + row_id="bt-propagation", # non-empty to enable span_id/root_span_id + span_id=span_id, + root_span_id=trace_id, + ).to_str() + return slug, PropagatedState(tracestate=tracestate, trace_flags=trace_flags) + + +def _normalize_parent(parent: "str | dict | None") -> "tuple[str | None, PropagatedState | None]": + """Normalize a `parent` argument into (parent_slug, propagated_state). + + - dict -> interpreted as a W3C trace-context (from `extract_trace_context`) + - str -> an exported span slug (passed through unchanged) + - None -> no parent + + `propagated_state` carries any inbound W3C state (tracestate + raw flags) to + forward on the next `inject`; it is None when there is no inbound W3C context. + """ + if isinstance(parent, dict): + return _resolve_w3c_parent(parent) + return parent, None def _try_log_input(span, f_sig, f_args, f_kwargs): @@ -2574,7 +2860,7 @@ def start_span( span_attributes: SpanAttributes | Mapping[str, Any] | None = None, start_time: float | None = None, set_current: bool | None = None, - parent: str | None = None, + parent: str | dict | None = None, propagated_event: dict[str, Any] | None = None, state: BraintrustState | None = None, **event: Any, @@ -2589,12 +2875,17 @@ def start_span( if not state: state = _state - parent_obj = get_span_parent_object(parent, state) + # Resolve the parent object and any forwarded W3C state in one pass, so we + # don't re-normalize `parent` (which could disagree if the active + # logger/experiment changed between calls). + parent_obj, propagated_state = _get_span_parent_object_and_propagated_state(parent, state) if isinstance(parent_obj, SpanComponentsV4): - if parent_obj.row_id and parent_obj.span_id and parent_obj.root_span_id: + if parent_obj.row_id and _parent_span_ids_usable(parent_obj.span_id, parent_obj.root_span_id): parent_span_ids = ParentSpanIds(span_id=parent_obj.span_id, root_span_id=parent_obj.root_span_id) else: + # No row id (object-only slug): there is no span to link to, so start + # a fresh root, keeping the object routing from the slug. parent_span_ids = None return SpanImpl( parent_object_type=parent_obj.object_type, @@ -2607,6 +2898,7 @@ def start_span( start_time=start_time, set_current=set_current, propagated_event=coalesce(propagated_event, parent_obj.propagated_event), + propagated_state=propagated_state, event=event, state=state, lookup_span_parent=False, @@ -3649,17 +3941,35 @@ def permalink(slug: str, org_name: str | None = None, app_url: str | None = None return _get_error_link() +def _parent_span_ids_usable(span_id: str | None, root_span_id: str | None) -> bool: + """Return True if a parent slug carries span ids the child can link to. + + A child links to any parent slug that carries both a span id and a root span + id, regardless of whether those ids are hex (the default) or legacy UUID. + This keeps the deprecated `start_span(parent=)` path backwards + compatible: a slug exported by an older (UUID) sender still links into a + trace on a newer (hex) receiver, and vice versa. The child's own freshly + generated span id stays in the active format; the backend supports traces + whose nodes mix id formats across a propagation boundary. Empty ids (no row) + are handled by the caller. + """ + return bool(span_id) and bool(root_span_id) + + def _start_span_parent_args( - parent: str | None, + parent: "str | dict | None", parent_object_type: SpanObjectTypeV3, parent_object_id: LazyValue[str], parent_compute_object_metadata_args: dict[str, Any] | None, parent_span_ids: ParentSpanIds | None, propagated_event: dict[str, Any] | None, + propagated_state: "PropagatedState | None" = None, ) -> dict[str, Any]: - if parent: + # `parent` may be an exported slug string or an opaque W3C trace-context dict. + parent_slug, parent_propagated_state = _normalize_parent(parent) + if parent_slug: assert parent_span_ids is None, "Cannot specify both parent and parent_span_ids" - parent_components = SpanComponentsV4.from_str(parent) + parent_components = SpanComponentsV4.from_str(parent_slug) assert parent_object_type == parent_components.object_type, ( f"Mismatch between expected span parent object type {parent_object_type} and provided type {parent_components.object_type}" ) @@ -3674,17 +3984,23 @@ def compute_parent_object_id(): return parent_object_id.get() arg_parent_object_id = LazyValue(compute_parent_object_id, use_mutex=False) - if parent_components.row_id: + if parent_components.row_id and _parent_span_ids_usable( + parent_components.span_id, parent_components.root_span_id + ): arg_parent_span_ids = ParentSpanIds( span_id=parent_components.span_id, root_span_id=parent_components.root_span_id ) else: + # No row id (object-only slug): there is no span to link to, so start + # a fresh root, keeping the object routing from the slug. arg_parent_span_ids = None arg_propagated_event = coalesce(propagated_event, parent_components.propagated_event) + arg_propagated_state = coalesce(propagated_state, parent_propagated_state) else: arg_parent_object_id = parent_object_id arg_parent_span_ids = parent_span_ids arg_propagated_event = propagated_event + arg_propagated_state = propagated_state return dict( parent_object_type=parent_object_type, @@ -3692,6 +4008,7 @@ def compute_parent_object_id(): parent_compute_object_metadata_args=parent_compute_object_metadata_args, parent_span_ids=arg_parent_span_ids, propagated_event=arg_propagated_event, + propagated_state=arg_propagated_state, ) @@ -3886,7 +4203,7 @@ def start_span( span_attributes: SpanAttributes | Mapping[str, Any] | None = None, start_time: float | None = None, set_current: bool | None = None, - parent: str | None = None, + parent: str | dict | None = None, propagated_event: dict[str, Any] | None = None, **event: Any, ) -> Span: @@ -4033,7 +4350,7 @@ def _start_span_impl( span_attributes: SpanAttributes | Mapping[str, Any] | None = None, start_time: float | None = None, set_current: bool | None = None, - parent: str | None = None, + parent: str | dict | None = None, propagated_event: dict[str, Any] | None = None, lookup_span_parent: bool = True, **event: Any, @@ -4140,6 +4457,7 @@ def __init__( set_current: bool | None = None, event: dict[str, Any] | None = None, propagated_event: dict[str, Any] | None = None, + propagated_state: "PropagatedState | None" = None, span_id: str | None = None, root_span_id: str | None = None, state: BraintrustState | None = None, @@ -4171,6 +4489,13 @@ def __init__( if self.propagated_event: merge_dicts(event, self.propagated_event) + # Inbound W3C trace-context state (tracestate + raw traceparent flags) to + # forward on outbound propagation. Captured at the span that received it + # (via extract_trace_context) and inherited by all subspans, so that any + # inject() within the trace re-emits the upstream state unchanged, per the + # W3C Trace Context spec. Not interpreted. + self.propagated_state = propagated_state + caller_location = get_caller_location() if name is None: if not parent_span_ids: @@ -4326,7 +4651,7 @@ def start_span( span_attributes: SpanAttributes | Mapping[str, Any] | None = None, start_time: float | None = None, set_current: bool | None = None, - parent: str | None = None, + parent: str | dict | None = None, propagated_event: dict[str, Any] | None = None, **event: Any, ) -> Span: @@ -4348,6 +4673,7 @@ def start_span( parent_compute_object_metadata_args=self.parent_compute_object_metadata_args, parent_span_ids=parent_span_ids, propagated_event=coalesce(propagated_event, self.propagated_event), + propagated_state=self.propagated_state, ), name=name, type=type, @@ -4377,9 +4703,9 @@ def export(self) -> str: object_id = self.parent_object_id.get() compute_object_metadata_args = None - # Choose SpanComponents version based on BRAINTRUST_OTEL_COMPAT env var - use_v4 = BraintrustEnv.OTEL_COMPAT.get(False) - span_components_class = SpanComponentsV4 if use_v4 else SpanComponentsV3 + # Choose SpanComponents version based on the active ID format (hex -> V4, + # legacy UUID -> V3). Coupled via _get_exporter() so the two never desync. + span_components_class = _get_exporter() # Disable span cache since remote function spans won't be in the local cache self.state.span_cache.disable() @@ -4394,6 +4720,33 @@ def export(self) -> str: propagated_event=self.propagated_event, ).to_str() + def inject(self, carrier: dict | None = None) -> dict: + """Inject W3C trace-context headers for this span into a carrier dict. + + Adds ``traceparent`` (trace identity) and, when this span's Braintrust + parent is known, a ``baggage`` entry ``braintrust.parent=`` + (merged with any pre-existing baggage). Propagation is best-effort and + never raises; if the span's ids are not W3C-shaped hex (e.g. legacy UUID + mode), ``traceparent`` is omitted. + + :param carrier: Optional existing carrier (e.g. outbound HTTP headers) to + mutate and return. A new dict is created if not provided. + :returns: The carrier dict with propagation headers injected. + """ + if carrier is None: + carrier = {} + try: + _inject_into_carrier( + carrier, + trace_id=self.root_span_id, + span_id=self.span_id, + braintrust_parent=self._get_otel_parent(), + propagated_state=self.propagated_state, + ) + except Exception as e: # best-effort: never break the caller + logging.warning(f"Error injecting trace context: {e}") + return carrier + def link(self) -> str: parent_type, info = self._get_parent_info() if parent_type == SpanObjectTypeV3.PROJECT_LOGS: @@ -5315,7 +5668,7 @@ def start_span( span_attributes: SpanAttributes | Mapping[str, Any] | None = None, start_time: float | None = None, set_current: bool | None = None, - parent: str | None = None, + parent: str | dict | None = None, propagated_event: dict[str, Any] | None = None, span_id: str | None = None, root_span_id: str | None = None, @@ -5365,7 +5718,7 @@ def _start_span_impl( span_attributes: SpanAttributes | Mapping[str, Any] | None = None, start_time: float | None = None, set_current: bool | None = None, - parent: str | None = None, + parent: str | dict | None = None, propagated_event: dict[str, Any] | None = None, span_id: str | None = None, root_span_id: str | None = None, diff --git a/py/src/braintrust/propagation.py b/py/src/braintrust/propagation.py new file mode 100644 index 00000000..27742b01 --- /dev/null +++ b/py/src/braintrust/propagation.py @@ -0,0 +1,251 @@ +"""Native W3C Trace Context propagation for Braintrust. + +This module implements the propagation wire format described in the Braintrust +distributed-tracing spec, using pure Python with no dependency on +``opentelemetry``. It parses and serializes the W3C ``traceparent`` and +``baggage`` headers and the Braintrust ``braintrust.parent`` baggage entry. + +Trace identity (trace id + parent span id) is carried in ``traceparent``; the +Braintrust container the trace belongs to (project/experiment) is carried in +``baggage`` under the ``braintrust.parent`` key. +""" + +import logging +import re +from typing import NamedTuple +from urllib.parse import unquote + + +__all__ = [ + "TRACEPARENT_HEADER", + "TRACESTATE_HEADER", + "BAGGAGE_HEADER", + "BRAINTRUST_PARENT_KEY", + "DEFAULT_TRACE_FLAGS", + "ParsedTraceparent", + "PropagatedState", + "parse_traceparent", + "format_traceparent", + "parse_baggage", + "merge_baggage", + "get_header", +] + +log = logging.getLogger(__name__) + +TRACEPARENT_HEADER = "traceparent" +TRACESTATE_HEADER = "tracestate" +BAGGAGE_HEADER = "baggage" +BRAINTRUST_PARENT_KEY = "braintrust.parent" + +# Trace-flags byte we emit for traces we originate: sampled (low bit set). +DEFAULT_TRACE_FLAGS = "01" + + +class ParsedTraceparent(NamedTuple): + """Parsed W3C ``traceparent`` fields. + + Tuple-compatible: unpacks as ``(trace_id, span_id, trace_flags)``. + ``trace_flags`` is the raw 2-hex trace-flags byte (e.g. ``"01"`` sampled, + ``"00"`` not sampled), kept raw so any future flag bits survive a + parse -> format round trip without per-bit handling. + """ + + trace_id: str + span_id: str + trace_flags: str + + +class PropagatedState(NamedTuple): + """Inbound W3C trace-context state that Braintrust forwards but never interprets. + + Captured at the span created from inbound headers (via + ``extract_trace_context``) and inherited by every subspan, so that any + ``inject()`` within the trace re-emits the upstream state unchanged, per the + W3C Trace Context spec. + + - ``tracestate``: the W3C ``tracestate`` header (opaque vendor state). + - ``trace_flags``: the raw 2-hex ``traceparent`` trace-flags byte. Stored raw + so future flag bits are preserved without per-bit handling. + """ + + tracestate: str | None = None + trace_flags: str | None = None + + +# W3C traceparent: version-traceid-parentid-flags, version 00, lowercase hex. +_TRACEPARENT_RE = re.compile(r"^00-([0-9a-f]{32})-([0-9a-f]{16})-([0-9a-f]{2})$") +_ZERO_TRACE_ID = "0" * 32 +_ZERO_SPAN_ID = "0" * 16 + +# W3C baggage size guidance: cap parsing work on absurdly large headers. +_MAX_BAGGAGE_LENGTH = 8192 + + +def get_header(headers, name): + """Case-insensitive header lookup. + + Some frameworks normalize header names to title case (e.g. ``Traceparent``) + while the W3C keys are lowercase. Returns the first matching value or None. + """ + if not headers: + return None + # Fast path: exact (lowercase) match. + value = headers.get(name) + if value is not None: + return value + lowered = name.lower() + for key, val in headers.items(): + if isinstance(key, str) and key.lower() == lowered: + return val + return None + + +def parse_traceparent(value): + """Parse a W3C ``traceparent`` value into a :class:`ParsedTraceparent`. + + Returns a ``(trace_id, span_id, trace_flags)`` named tuple, where + ``trace_flags`` is the raw 2-hex trace-flags byte. Returns None for any + malformed value (bad version, wrong length, non-hex, or all-zero ids). Never + raises. + """ + if not value or not isinstance(value, str): + return None + match = _TRACEPARENT_RE.match(value.strip()) + if not match: + return None + trace_id, span_id, flags = match.group(1), match.group(2), match.group(3) + if trace_id == _ZERO_TRACE_ID or span_id == _ZERO_SPAN_ID: + return None + return ParsedTraceparent(trace_id, span_id, flags) + + +def format_traceparent(trace_id, span_id, trace_flags=DEFAULT_TRACE_FLAGS): + """Serialize a W3C ``traceparent`` value from hex trace/span ids. + + ``trace_flags`` is the raw 2-hex trace-flags byte to emit; it is forwarded + verbatim so any upstream/future flag bits survive. Falls back to + ``DEFAULT_TRACE_FLAGS`` (sampled) when not a valid 2-hex byte. Returns None + if the ids are not valid W3C-shaped hex (so callers can omit the header + rather than emit something malformed). + """ + if not _is_hex(trace_id, 32) or trace_id == _ZERO_TRACE_ID: + return None + if not _is_hex(span_id, 16) or span_id == _ZERO_SPAN_ID: + return None + flags = trace_flags if _is_hex(trace_flags, 2) else DEFAULT_TRACE_FLAGS + return f"00-{trace_id}-{span_id}-{flags}" + + +def parse_baggage(value): + """Parse a W3C ``baggage`` header into an ordered dict of key -> value. + + Tolerates malformed/oversized input by skipping bad entries; never raises. + Property metadata (after ';') is ignored. Keys and values are percent-decoded. + """ + result = {} + if not value or not isinstance(value, str): + return result + if len(value) > _MAX_BAGGAGE_LENGTH: + # Oversized header: best-effort, only look at a bounded prefix. + value = value[:_MAX_BAGGAGE_LENGTH] + for member in value.split(","): + member = member.strip() + if not member or "=" not in member: + continue + # Strip any ';'-delimited properties. + member = member.split(";", 1)[0] + key, _, val = member.partition("=") + key = _percent_decode(key.strip()) + if not key: + continue + result[key] = _percent_decode(val.strip()) + return result + + +def merge_baggage(existing, braintrust_parent): + """Merge a ``braintrust.parent`` value into an existing ``baggage`` header. + + This preserves every other vendor's baggage member byte-for-byte: their raw + ``key=value`` substrings (properties included) are forwarded exactly as + received rather than decoded and re-encoded. Decoding then re-encoding would + silently rewrite another vendor's percent-encoding (e.g. ``path=a%2Fb`` -> + ``path=a/b``), so we keep Braintrust a transparent relay. Whitespace around + list members is insignificant per W3C and is trimmed. + + Only the ``braintrust.parent`` member is (re)serialized, by us, from the + ``braintrust_parent`` argument. Any pre-existing ``braintrust.parent`` + member in ``existing`` is dropped in favor of the supplied value. + + Returns the merged header value, or None if there is nothing to emit (so + callers omit the header rather than emit an empty one). + """ + members = [] + if existing and isinstance(existing, str): + capped = existing[:_MAX_BAGGAGE_LENGTH] if len(existing) > _MAX_BAGGAGE_LENGTH else existing + for raw_member in capped.split(","): + member = raw_member.strip() + if not member or "=" not in member: + continue + # Identify the key (ignoring ';'-delimited properties) only to skip + # any inbound braintrust.parent; everything else is forwarded raw. + key_part = member.split(";", 1)[0].partition("=")[0] + key = _percent_decode(key_part.strip()) + if key == BRAINTRUST_PARENT_KEY: + continue + members.append(member) + + if braintrust_parent: + encoded_key = _percent_encode(BRAINTRUST_PARENT_KEY) + encoded_val = _percent_encode(str(braintrust_parent)) + members.append(f"{encoded_key}={encoded_val}") + + if not members: + return None + return ",".join(members) + + +def _is_hex(value, length): + if not isinstance(value, str) or len(value) != length: + return False + try: + int(value, 16) + except ValueError: + return False + return all(c in "0123456789abcdef" for c in value) + + +# Per W3C Baggage, member values are percent-encoded. We decode inbound values +# with a full RFC3986 ``unquote`` so we interoperate with standard encoders +# (e.g. OpenTelemetry's propagator percent-encodes ``:`` as ``%3A``). On emit we +# percent-encode only our own ``braintrust.parent`` member; byte-for-byte +# pass-through of *other* vendors' baggage is handled by :func:`merge_baggage`, +# which forwards their raw member strings unchanged rather than round-tripping +# them through this codec. +# +# Encoded set is kept minimal: the characters that would otherwise break the +# baggage grammar (member/list delimiters and the property separator) plus +# ``%`` so the encoding is self-describing. +_PERCENT_ENCODE = { + ",": "%2C", + ";": "%3B", + "=": "%3D", + " ": "%20", + "%": "%25", +} + + +def _percent_encode(value): + out = [] + for ch in value: + out.append(_PERCENT_ENCODE.get(ch, ch)) + return "".join(out) + + +def _percent_decode(value): + if "%" not in value: + return value + try: + return unquote(value) + except Exception: + return value diff --git a/py/src/braintrust/span_identifier_v4.py b/py/src/braintrust/span_identifier_v4.py index c881ef49..a8a67ff9 100644 --- a/py/src/braintrust/span_identifier_v4.py +++ b/py/src/braintrust/span_identifier_v4.py @@ -226,10 +226,12 @@ def export(self) -> str: @staticmethod def _from_json_obj(json_obj: dict) -> "SpanComponentsV4": - kwargs = { - **json_obj, - "object_type": SpanObjectTypeV3(json_obj["object_type"]), - } + # Only consume known fields. Unknown keys (e.g. fields added by a newer + # SDK) are ignored rather than passed to the constructor, so that adding + # new optional fields is forward-compatible across SDK versions. + known_fields = {f.name for f in dataclasses.fields(SpanComponentsV4)} + kwargs = {k: v for k, v in json_obj.items() if k in known_fields} + kwargs["object_type"] = SpanObjectTypeV3(json_obj["object_type"]) return SpanComponentsV4(**kwargs) diff --git a/py/src/braintrust/test_context.py b/py/src/braintrust/test_context.py index e6b4e4f6..8ee55f7b 100644 --- a/py/src/braintrust/test_context.py +++ b/py/src/braintrust/test_context.py @@ -1228,8 +1228,10 @@ def test_nested_spans_same_thread(test_logger, with_memory_logger): child_log = next(l for l in logs if l["span_attributes"]["name"] == "child") grandchild_log = next(l for l in logs if l["span_attributes"]["name"] == "grandchild") - # Verify parent chain - assert root_log["span_id"] == root_log["root_span_id"], "Root is root" + # Verify parent chain. The root span has no parents and defines the trace's + # root_span_id (format-agnostic; under legacy UUID mode span_id also equals + # root_span_id, but that does not hold for hex IDs). + assert not root_log.get("span_parents"), "Root has no parents" assert child_log["root_span_id"] == root_log["root_span_id"], "Child same root" assert grandchild_log["root_span_id"] == root_log["root_span_id"], "Grandchild same root" assert root_log["span_id"] in child_log.get("span_parents", []), "Child parent is root" diff --git a/py/src/braintrust/test_env.py b/py/src/braintrust/test_env.py index a4b750be..ee9ce4b8 100644 --- a/py/src/braintrust/test_env.py +++ b/py/src/braintrust/test_env.py @@ -1,6 +1,14 @@ import pytest -from .env import BraintrustEnv, EnvParser, EnvVar, parse_bool, parse_float, parse_int, parse_string +from .env import ( + BraintrustEnv, + EnvParser, + EnvVar, + parse_bool, + parse_float, + parse_int, + parse_string, +) class TestEnvParsers: @@ -138,3 +146,31 @@ def test_otel_compat_uses_shared_bool_parser(self, monkeypatch): monkeypatch.setenv("BRAINTRUST_OTEL_COMPAT", "false") assert BraintrustEnv.OTEL_COMPAT.get(True) is False + + +class TestIdConfig: + # LEGACY_IDS is lazily resolved from the current environment on each + # access, like the other settings, so tests just set env vars and read it. + + def test_hex_ids_default(self, monkeypatch): + monkeypatch.delenv("BRAINTRUST_OTEL_COMPAT", raising=False) + monkeypatch.delenv("BRAINTRUST_LEGACY_IDS", raising=False) + assert BraintrustEnv.LEGACY_IDS is False + + def test_legacy_uuid_opt_out(self, monkeypatch): + monkeypatch.delenv("BRAINTRUST_OTEL_COMPAT", raising=False) + monkeypatch.setenv("BRAINTRUST_LEGACY_IDS", "true") + assert BraintrustEnv.LEGACY_IDS is True + + def test_otel_compat_forces_hex(self, monkeypatch): + # OTEL_COMPAT implies hex IDs regardless of LEGACY_IDS being unset. + monkeypatch.setenv("BRAINTRUST_OTEL_COMPAT", "true") + monkeypatch.delenv("BRAINTRUST_LEGACY_IDS", raising=False) + assert BraintrustEnv.LEGACY_IDS is False + + def test_conflicting_flags_otel_wins(self, monkeypatch): + # OTEL_COMPAT wins over LEGACY_IDS: legacy is disabled (hex IDs) + # rather than raising. + monkeypatch.setenv("BRAINTRUST_OTEL_COMPAT", "true") + monkeypatch.setenv("BRAINTRUST_LEGACY_IDS", "true") + assert BraintrustEnv.LEGACY_IDS is False diff --git a/py/src/braintrust/test_id_gen.py b/py/src/braintrust/test_id_gen.py index b5633979..68bc10c9 100644 --- a/py/src/braintrust/test_id_gen.py +++ b/py/src/braintrust/test_id_gen.py @@ -8,14 +8,18 @@ @pytest.fixture(autouse=True) def reset_id_generator_state(): """Reset ID generator state and environment variables before each test""" - original_env = os.getenv("BRAINTRUST_OTEL_COMPAT") + original_otel = os.getenv("BRAINTRUST_OTEL_COMPAT") + original_legacy = os.getenv("BRAINTRUST_LEGACY_IDS") try: yield finally: os.environ.pop("BRAINTRUST_OTEL_COMPAT", None) - if original_env: - os.environ["BRAINTRUST_OTEL_COMPAT"] = original_env + os.environ.pop("BRAINTRUST_LEGACY_IDS", None) + if original_otel: + os.environ["BRAINTRUST_OTEL_COMPAT"] = original_otel + if original_legacy: + os.environ["BRAINTRUST_LEGACY_IDS"] = original_legacy def test_uuid_generator(): @@ -55,17 +59,21 @@ def test_otel_id_generator(): def test_id_get_env_var(reset_id_generator_state): + # By default (no env vars) the generator produces OTEL-compatible hex IDs. + # BRAINTRUST_OTEL_COMPAT always implies hex; BRAINTRUST_LEGACY_IDS opts + # back into UUIDs (only when OTEL_COMPAT is off). cases = [ - (None, lambda _id: uuid.UUID(_id)), + (None, lambda _id: _assert_is_hex(_id)), ("true", lambda _id: _assert_is_hex(_id)), ("True", lambda _id: _assert_is_hex(_id)), ("TRUE", lambda _id: _assert_is_hex(_id)), - ("false", lambda _id: uuid.UUID(_id)), - ("False", lambda _id: uuid.UUID(_id)), + ("false", lambda _id: _assert_is_hex(_id)), + ("False", lambda _id: _assert_is_hex(_id)), ] for env_var_value, assert_func in cases: os.environ.pop("BRAINTRUST_OTEL_COMPAT", None) + os.environ.pop("BRAINTRUST_LEGACY_IDS", None) if env_var_value is not None: os.environ["BRAINTRUST_OTEL_COMPAT"] = env_var_value generator = id_gen.get_id_generator() @@ -73,6 +81,32 @@ def test_id_get_env_var(reset_id_generator_state): assert_func(generator.get_trace_id()) +def test_id_get_env_var_legacy_uuid(reset_id_generator_state): + # BRAINTRUST_LEGACY_IDS opts back into UUID IDs. + cases = [ + ("true", lambda _id: uuid.UUID(_id)), + ("True", lambda _id: uuid.UUID(_id)), + ("false", lambda _id: _assert_is_hex(_id)), + ] + + for env_var_value, assert_func in cases: + os.environ.pop("BRAINTRUST_OTEL_COMPAT", None) + os.environ.pop("BRAINTRUST_LEGACY_IDS", None) + os.environ["BRAINTRUST_LEGACY_IDS"] = env_var_value + generator = id_gen.get_id_generator() + assert_func(generator.get_span_id()) + assert_func(generator.get_trace_id()) + + +def test_otel_compat_wins_over_legacy_uuid(reset_id_generator_state): + # When both are set, OTEL_COMPAT wins: hex IDs are used (no raise). + os.environ["BRAINTRUST_OTEL_COMPAT"] = "true" + os.environ["BRAINTRUST_LEGACY_IDS"] = "true" + generator = id_gen.get_id_generator() + assert generator.share_root_span_id() is False + _assert_is_hex(generator.get_span_id()) + + def _is_hex(s): return all(c in "0123456789abcdef" for c in s.lower()) diff --git a/py/src/braintrust/test_logger.py b/py/src/braintrust/test_logger.py index 43c162fc..1f3149e5 100644 --- a/py/src/braintrust/test_logger.py +++ b/py/src/braintrust/test_logger.py @@ -2845,16 +2845,19 @@ def reset_id_generator_state(): """Reset ID generator state and environment variables before each test""" logger._state._reset_id_generator() logger._state._reset_context_manager() - original_env = os.getenv("BRAINTRUST_OTEL_COMPAT") + original_otel = os.getenv("BRAINTRUST_OTEL_COMPAT") + original_legacy = os.getenv("BRAINTRUST_LEGACY_IDS") try: yield finally: logger._state._reset_id_generator() logger._state._reset_context_manager() - if "BRAINTRUST_OTEL_COMPAT" in os.environ: - del os.environ["BRAINTRUST_OTEL_COMPAT"] - if original_env: - os.environ["BRAINTRUST_OTEL_COMPAT"] = original_env + os.environ.pop("BRAINTRUST_OTEL_COMPAT", None) + os.environ.pop("BRAINTRUST_LEGACY_IDS", None) + if original_otel: + os.environ["BRAINTRUST_OTEL_COMPAT"] = original_otel + if original_legacy: + os.environ["BRAINTRUST_LEGACY_IDS"] = original_legacy def test_otel_compatible_span_export_import(): @@ -2930,21 +2933,22 @@ def test_span_with_otel_ids_export_import(reset_id_generator_state): def test_span_with_uuid_ids_share_root_span_id(reset_id_generator_state): - """Test that UUID generators share span_id as root_span_id for backwards compatibility.""" + """Test that legacy UUID generators share span_id as root_span_id for backwards compatibility.""" import os - # Ensure UUID generator is used (default behavior) - if "BRAINTRUST_OTEL_COMPAT" in os.environ: - del os.environ["BRAINTRUST_OTEL_COMPAT"] + # Opt into legacy UUID IDs (hex IDs are the default). + os.environ.pop("BRAINTRUST_OTEL_COMPAT", None) + os.environ["BRAINTRUST_LEGACY_IDS"] = "true" + logger._state._reset_id_generator() init_test_logger(__name__) - # Test that UUID generator should share root_span_id + # Test that the legacy UUID generator shares root_span_id generator = get_id_generator() assert generator.share_root_span_id() == True with logger.start_span(name="test") as span: - # Test that UUID spans should share span_id and root_span_id for backwards compatibility + # Test that UUID spans share span_id and root_span_id for backwards compatibility assert span.span_id == span.root_span_id @@ -3135,15 +3139,28 @@ def test_update_span_includes_span_id_and_root_span_id_from_export(with_memory_l assert updated_log["metadata"] == {"foo": "bar"} -def test_get_exporter_returns_v3_by_default(): - """Test that _get_exporter() returns SpanComponentsV3 when OTEL_COMPAT is not set.""" - with preserve_env_vars("BRAINTRUST_OTEL_COMPAT"): +def test_get_exporter_returns_v4_by_default(): + """Test that _get_exporter() returns SpanComponentsV4 by default (no env vars).""" + with preserve_env_vars("BRAINTRUST_OTEL_COMPAT", "BRAINTRUST_LEGACY_IDS"): + os.environ.pop("BRAINTRUST_OTEL_COMPAT", None) + os.environ.pop("BRAINTRUST_LEGACY_IDS", None) + from braintrust.logger import _get_exporter + from braintrust.span_identifier_v4 import SpanComponentsV4 + + exporter = _get_exporter() + assert exporter == SpanComponentsV4, "Should return V4 by default" + + +def test_get_exporter_returns_v3_when_legacy_uuid(): + """Test that _get_exporter() returns SpanComponentsV3 in legacy UUID mode.""" + with preserve_env_vars("BRAINTRUST_OTEL_COMPAT", "BRAINTRUST_LEGACY_IDS"): os.environ.pop("BRAINTRUST_OTEL_COMPAT", None) + os.environ["BRAINTRUST_LEGACY_IDS"] = "true" from braintrust.logger import _get_exporter from braintrust.span_identifier_v3 import SpanComponentsV3 exporter = _get_exporter() - assert exporter == SpanComponentsV3, "Should return V3 by default" + assert exporter == SpanComponentsV3, "Should return V3 in legacy UUID mode" def test_get_exporter_returns_v4_when_otel_enabled(): @@ -3157,10 +3174,25 @@ def test_get_exporter_returns_v4_when_otel_enabled(): assert exporter == SpanComponentsV4, "Should return V4 when OTEL_COMPAT=true" -def test_experiment_export_respects_otel_compat_default(): - """Test that Experiment.export() uses V3 by default.""" - with preserve_env_vars("BRAINTRUST_OTEL_COMPAT"): +def test_experiment_export_uses_v4_by_default(): + """Test that Experiment.export() uses V4 by default.""" + with preserve_env_vars("BRAINTRUST_OTEL_COMPAT", "BRAINTRUST_LEGACY_IDS"): + os.environ.pop("BRAINTRUST_OTEL_COMPAT", None) + os.environ.pop("BRAINTRUST_LEGACY_IDS", None) + experiment = init_test_exp("test-exp") + exported = experiment.export() + + from braintrust.span_identifier_v4 import SpanComponentsV4 + + version = SpanComponentsV4.get_version(exported) + assert version == 4, f"Expected V4 encoding (version=4), got version={version}" + + +def test_experiment_export_uses_v3_in_legacy_mode(): + """Test that Experiment.export() uses V3 in legacy UUID mode.""" + with preserve_env_vars("BRAINTRUST_OTEL_COMPAT", "BRAINTRUST_LEGACY_IDS"): os.environ.pop("BRAINTRUST_OTEL_COMPAT", None) + os.environ["BRAINTRUST_LEGACY_IDS"] = "true" experiment = init_test_exp("test-exp") exported = experiment.export() @@ -3183,10 +3215,25 @@ def test_experiment_export_respects_otel_compat_enabled(): assert version == 4, f"Expected V4 encoding (version=4), got version={version}" -def test_logger_export_respects_otel_compat_default(): - """Test that Logger.export() uses V3 by default.""" - with preserve_env_vars("BRAINTRUST_OTEL_COMPAT"): +def test_logger_export_uses_v4_by_default(): + """Test that Logger.export() uses V4 by default.""" + with preserve_env_vars("BRAINTRUST_OTEL_COMPAT", "BRAINTRUST_LEGACY_IDS"): + os.environ.pop("BRAINTRUST_OTEL_COMPAT", None) + os.environ.pop("BRAINTRUST_LEGACY_IDS", None) + test_logger = init_test_logger(__name__) + exported = test_logger.export() + + from braintrust.span_identifier_v4 import SpanComponentsV4 + + version = SpanComponentsV4.get_version(exported) + assert version == 4, f"Expected V4 encoding (version=4), got version={version}" + + +def test_logger_export_uses_v3_in_legacy_mode(): + """Test that Logger.export() uses V3 in legacy UUID mode.""" + with preserve_env_vars("BRAINTRUST_OTEL_COMPAT", "BRAINTRUST_LEGACY_IDS"): os.environ.pop("BRAINTRUST_OTEL_COMPAT", None) + os.environ["BRAINTRUST_LEGACY_IDS"] = "true" test_logger = init_test_logger(__name__) exported = test_logger.export() diff --git a/py/src/braintrust/test_otel.py b/py/src/braintrust/test_otel.py index 2a8abc8a..65d116ab 100644 --- a/py/src/braintrust/test_otel.py +++ b/py/src/braintrust/test_otel.py @@ -859,3 +859,71 @@ def fields(self): assert result is not None assert isinstance(result, str) assert len(result) > 0 + + +# --------------------------------------------------------------------------- # +# Cross-paradigm: native W3C propagation <-> OpenTelemetry propagation +# --------------------------------------------------------------------------- # + + +def test_native_inject_extracted_by_otel(): + """Native Span.inject() headers are parseable by OTel's W3C propagator.""" + if not _check_otel_installed(): + pytest.skip("OpenTelemetry SDK not fully installed, skipping test") + + from braintrust.logger import _internal_with_memory_background_logger + from braintrust.test_helpers import init_test_logger + from opentelemetry import trace + from opentelemetry.propagate import extract + + with _internal_with_memory_background_logger(): + logger = init_test_logger("cross-paradigm-native-to-otel") + with logger.start_span(name="native_a") as span: + carrier = span.inject({}) + native_trace_id = span.root_span_id + native_span_id = span.span_id + + # OTel extracts trace identity from the native-produced W3C headers. + ctx = extract(carrier) + otel_span = trace.get_current_span(ctx) + sc = otel_span.get_span_context() + assert format(sc.trace_id, "032x") == native_trace_id + assert format(sc.span_id, "016x") == native_span_id + + +def test_otel_inject_extracted_by_native(): + """OTel-injected W3C headers (+ braintrust.parent baggage) are parseable natively.""" + if not _check_otel_installed(): + pytest.skip("OpenTelemetry SDK not fully installed, skipping test") + + from braintrust.logger import _internal_with_memory_background_logger, extract_trace_context + from braintrust.test_helpers import init_test_logger + from opentelemetry import baggage, context, trace + from opentelemetry.propagate import inject + from opentelemetry.trace import NonRecordingSpan, SpanContext, TraceFlags + + trace_id = 0x4BF92F3577B34DA6A3CE929D0E0E4736 + span_id = 0x00F067AA0BA902B7 + span_context = SpanContext( + trace_id=trace_id, + span_id=span_id, + is_remote=False, + trace_flags=TraceFlags(TraceFlags.SAMPLED), + ) + ctx = trace.set_span_in_context(NonRecordingSpan(span_context)) + ctx = baggage.set_baggage("braintrust.parent", "project_id:abc123", context=ctx) + + token = context.attach(ctx) + try: + carrier = {} + inject(carrier) + finally: + context.detach(token) + + parent = extract_trace_context(carrier) + assert parent is not None + with _internal_with_memory_background_logger(): + logger = init_test_logger("otel-to-native") + with logger.start_span(name="native", parent=parent) as span: + assert span.root_span_id == format(trace_id, "032x") + assert span.span_parents == [format(span_id, "016x")] diff --git a/py/src/braintrust/test_propagation.py b/py/src/braintrust/test_propagation.py new file mode 100644 index 00000000..1b61a8a2 --- /dev/null +++ b/py/src/braintrust/test_propagation.py @@ -0,0 +1,708 @@ +"""Tests for native W3C trace-context propagation. + +These tests cover the Braintrust distributed-tracing spec's test matrix using +the pure-Python propagation path (no opentelemetry dependency), so they run in +the `test_core` nox session. +""" + +import re + +import pytest +from braintrust.logger import ( + _internal_with_memory_background_logger, + extract_trace_context, + inject_trace_context, +) +from braintrust.propagation import ( + BAGGAGE_HEADER, + BRAINTRUST_PARENT_KEY, + TRACEPARENT_HEADER, + TRACESTATE_HEADER, + format_traceparent, + get_header, + merge_baggage, + parse_baggage, + parse_traceparent, +) +from braintrust.span_identifier_v4 import SpanObjectTypeV3 +from braintrust.test_helpers import init_test_logger + + +TRACEPARENT_RE = re.compile(r"^00-[0-9a-f]{32}-[0-9a-f]{16}-[0-9a-f]{2}$") + +VALID_TRACE_ID = "4bf92f3577b34da6a3ce929d0e0e4736" +VALID_SPAN_ID = "00f067aa0ba902b7" +VALID_TRACEPARENT = f"00-{VALID_TRACE_ID}-{VALID_SPAN_ID}-01" + + +# --------------------------------------------------------------------------- # +# Primitives: traceparent / baggage parse + format +# --------------------------------------------------------------------------- # + + +class TestTraceparent: + def test_parse_valid(self): + assert parse_traceparent(VALID_TRACEPARENT) == (VALID_TRACE_ID, VALID_SPAN_ID, "01") + + def test_parse_strips_whitespace(self): + assert parse_traceparent(f" {VALID_TRACEPARENT} ") == (VALID_TRACE_ID, VALID_SPAN_ID, "01") + + @pytest.mark.parametrize( + "value", + [ + "", + None, + "invalid", + "00-tooshort-00f067aa0ba902b7-01", + f"00-{VALID_TRACE_ID}-00f067aa-01", # short span id + f"99-{VALID_TRACE_ID}-{VALID_SPAN_ID}-01", # bad version + f"00-{'0' * 32}-{VALID_SPAN_ID}-01", # zero trace id + f"00-{VALID_TRACE_ID}-{'0' * 16}-01", # zero span id + f"00-{VALID_TRACE_ID.upper()}-{VALID_SPAN_ID}-01", # uppercase hex + ], + ) + def test_parse_invalid(self, value): + assert parse_traceparent(value) is None + + def test_format_round_trip(self): + tp = format_traceparent(VALID_TRACE_ID, VALID_SPAN_ID) + assert TRACEPARENT_RE.match(tp) + assert parse_traceparent(tp) == (VALID_TRACE_ID, VALID_SPAN_ID, "01") + + def test_format_rejects_non_hex(self): + assert format_traceparent("not-hex", VALID_SPAN_ID) is None + assert format_traceparent(VALID_TRACE_ID, "00000000-0000") is None + assert format_traceparent("0" * 32, VALID_SPAN_ID) is None + + def test_parse_reports_trace_flags(self): + # The raw trace-flags byte must be recoverable so it can be carried + # through extract -> inject. A not-sampled (`-00`) inbound trace must not + # be silently upgraded to sampled. + assert parse_traceparent(f"00-{VALID_TRACE_ID}-{VALID_SPAN_ID}-01").trace_flags == "01" + assert parse_traceparent(f"00-{VALID_TRACE_ID}-{VALID_SPAN_ID}-00").trace_flags == "00" + + def test_format_preserves_flags_round_trip(self): + # Re-emitting an inbound not-sampled trace must stay `-00`, not flip to + # `-01`. Without carrying the flags through, a mid-chain service would + # override the upstream sampling decision. + parsed = parse_traceparent(f"00-{VALID_TRACE_ID}-{VALID_SPAN_ID}-00") + tp = format_traceparent(VALID_TRACE_ID, VALID_SPAN_ID, parsed.trace_flags) + assert tp.endswith("-00") + assert parse_traceparent(tp).trace_flags == "00" + + def test_format_defaults_to_sampled(self): + # With no flags supplied (a trace we originate), default to sampled. + assert format_traceparent(VALID_TRACE_ID, VALID_SPAN_ID).endswith("-01") + + def test_format_falls_back_on_bad_flags(self): + # A malformed flags value falls back to the sampled default rather than + # emitting a malformed traceparent. + assert format_traceparent(VALID_TRACE_ID, VALID_SPAN_ID, "zz").endswith("-01") + + +class TestBaggage: + def test_parse_simple(self): + assert parse_baggage("braintrust.parent=project_id:abc") == {"braintrust.parent": "project_id:abc"} + + def test_parse_preserves_unrelated_keys(self): + parsed = parse_baggage("foo=bar,braintrust.parent=project_id:abc,baz=qux") + assert parsed["foo"] == "bar" + assert parsed["baz"] == "qux" + assert parsed["braintrust.parent"] == "project_id:abc" + + def test_parse_ignores_properties(self): + assert parse_baggage("k=v;prop=1") == {"k": "v"} + + @pytest.mark.parametrize("value", ["", None, "no-equals", ",,,"]) + def test_parse_malformed_does_not_raise(self, value): + assert parse_baggage(value) == {} + + def test_parse_oversized_does_not_raise(self): + big = "x=" + ("a" * 100000) + # Must not raise; returns something bounded. + assert isinstance(parse_baggage(big), dict) + + def test_parse_decodes_standard_encoder_values(self): + # Per the W3C Baggage spec, values are percent-encoded, and standard + # encoders (e.g. OpenTelemetry's propagator) percent-encode `:` as + # `%3A`. We must fully decode inbound values to interoperate, otherwise + # `braintrust.parent=project_id%3Aabc` is not recognized downstream. + assert parse_baggage(f"{BRAINTRUST_PARENT_KEY}=project_id%3Aabc123") == { + BRAINTRUST_PARENT_KEY: "project_id:abc123" + } + + +class TestMergeBaggage: + def test_adds_braintrust_parent_when_no_existing(self): + merged = merge_baggage(None, "project_id:abc") + assert parse_baggage(merged) == {BRAINTRUST_PARENT_KEY: "project_id:abc"} + + def test_none_parent_and_no_existing_returns_none(self): + assert merge_baggage(None, None) is None + assert merge_baggage("", None) is None + + def test_preserves_unrelated_baggage_byte_for_byte(self): + # An upstream vendor may percent-encode octets outside the small set + # Braintrust itself encodes (e.g. `/` as `%2F`). Forwarding such baggage + # must be a transparent relay: the raw member is forwarded unchanged, so + # we never silently rewrite another vendor's value (`path=a%2Fb` must not + # become `path=a/b`). + merged = merge_baggage("path=a%2Fb,user=alice", "project_id:abc") + assert "path=a%2Fb" in merged + assert "user=alice" in merged + assert f"{BRAINTRUST_PARENT_KEY}=project_id:abc" in merged + + def test_does_not_decode_unowned_percent_sequences(self): + # `%41` is the percent-encoding of `A`. A transparent relay must not + # collapse `a%41b` to `aAb`; the inbound wire form is forwarded unchanged. + merged = merge_baggage("k=a%41b", None) + assert merged == "k=a%41b" + + @pytest.mark.parametrize( + "value", + [ + "a%2Fb", # `/` outside our encode set + "x%3Ay", # `:` (what OTel encodes) + "c%2Cd", # encoded comma + "a%3Db", # encoded `=` + "%C3%A9", # multi-byte UTF-8 (é) already percent-encoded + "%2520", # a literal `%20` the upstream double-encoded + ], + ) + def test_unowned_value_encodings_pass_through_verbatim(self, value): + # Whatever percent-encoding an upstream vendor chose for its own value + # must survive byte-for-byte; we are a relay for keys we do not own. + merged = merge_baggage(f"vendor={value}", None) + assert merged == f"vendor={value}" + + def test_multiple_unowned_members_pass_through_verbatim(self): + # Several unrelated members, each with a different encoding, are all + # forwarded unchanged and in order, with braintrust.parent appended. + inbound = "p1=a%2Fb,p2=x%3Ay,p3=c%2Cd" + merged = merge_baggage(inbound, "project_id:p") + assert merged == f"{inbound},{BRAINTRUST_PARENT_KEY}=project_id:p" + + def test_preserves_member_properties(self): + # W3C baggage members may carry `;`-delimited properties. Unlike + # parse_baggage (which drops them), the relay forwards the full member + # verbatim, properties included. + merged = merge_baggage("k=v;meta=1;ttl=30,vendor=y", None) + assert merged == "k=v;meta=1;ttl=30,vendor=y" + + def test_empty_value_member_is_preserved(self): + merged = merge_baggage("k=,vendor=y", None) + assert merged == "k=,vendor=y" + + def test_optional_whitespace_is_trimmed(self): + # W3C treats whitespace around list members as insignificant, so + # trimming it on forward is lossless. Pin the behavior so it is not + # mistaken for a pass-through bug. + merged = merge_baggage(" a=1 , b=2 ", "project_id:p") + assert merged == f"a=1,b=2,{BRAINTRUST_PARENT_KEY}=project_id:p" + + def test_replaces_existing_braintrust_parent(self): + # A stale inbound braintrust.parent must be dropped in favor of the + # value we supply, not duplicated. + merged = merge_baggage( + f"{BRAINTRUST_PARENT_KEY}=project_id:old,vendor=x", + "project_id:new", + ) + parsed = parse_baggage(merged) + assert parsed[BRAINTRUST_PARENT_KEY] == "project_id:new" + assert parsed["vendor"] == "x" + # Only one braintrust.parent member is emitted. + assert merged.count(f"{BRAINTRUST_PARENT_KEY}=") == 1 + + def test_drops_existing_braintrust_parent_when_no_new_value(self): + # If we have no braintrust.parent to add, an inbound one is still + # consumed (it is ours to own) rather than forwarded raw. + merged = merge_baggage(f"{BRAINTRUST_PARENT_KEY}=project_id:old,vendor=x", None) + assert merged == "vendor=x" + + def test_encodes_braintrust_parent_with_reserved_chars(self): + # Braintrust owns its braintrust.parent value, which can carry an + # arbitrary project_name containing reserved characters. That value must + # be encoded on emit and decode back cleanly. + merged = merge_baggage(None, "project_name:a,b c") + assert parse_baggage(merged) == {BRAINTRUST_PARENT_KEY: "project_name:a,b c"} + + def test_skips_malformed_existing_members(self): + merged = merge_baggage("garbage,,k=v,no-equals", "project_id:abc") + parsed = parse_baggage(merged) + assert parsed["k"] == "v" + assert parsed[BRAINTRUST_PARENT_KEY] == "project_id:abc" + + +def test_get_header_case_insensitive(): + headers = {"TraceParent": VALID_TRACEPARENT, "BAGGAGE": "foo=bar"} + assert get_header(headers, "traceparent") == VALID_TRACEPARENT + assert get_header(headers, "baggage") == "foo=bar" + assert get_header(headers, "missing") is None + + +# --------------------------------------------------------------------------- # +# Send: header injection +# --------------------------------------------------------------------------- # + + +@pytest.fixture +def memory_and_logger(): + with _internal_with_memory_background_logger() as mem: + logger = init_test_logger("propagation-test") + yield mem, logger + + +class TestInject: + def test_traceparent_well_formed_and_matches_span(self, memory_and_logger): + _mem, logger = memory_and_logger + with logger.start_span(name="svc_a") as span: + carrier = span.inject({}) + + tp = carrier[TRACEPARENT_HEADER] + assert TRACEPARENT_RE.match(tp) + trace_id, span_id, _flags = parse_traceparent(tp) + assert trace_id == span.root_span_id + assert span_id == span.span_id + + def test_baggage_contains_braintrust_parent(self, memory_and_logger): + _mem, logger = memory_and_logger + with logger.start_span(name="svc_a") as span: + carrier = span.inject({}) + + parsed = parse_baggage(carrier[BAGGAGE_HEADER]) + assert parsed[BRAINTRUST_PARENT_KEY] == "project_name:propagation-test" + + def test_preexisting_baggage_preserved(self, memory_and_logger): + _mem, logger = memory_and_logger + with logger.start_span(name="svc_a") as span: + carrier = span.inject({BAGGAGE_HEADER: "user=alice,team=eng"}) + + parsed = parse_baggage(carrier[BAGGAGE_HEADER]) + assert parsed["user"] == "alice" + assert parsed["team"] == "eng" + assert parsed[BRAINTRUST_PARENT_KEY] == "project_name:propagation-test" + + def test_title_cased_baggage_emits_single_lowercase_header(self, memory_and_logger): + # Per W3C (§3.3.1) the header name SHOULD be sent lowercase. A carrier + # that arrives with a title-cased `Baggage` (e.g. from a framework that + # normalizes header casing) must be rewritten to a single lowercase + # `baggage` key, not left with two conflicting case-variants. + _mem, logger = memory_and_logger + with logger.start_span(name="svc_a") as span: + carrier = span.inject({"Baggage": "user=alice"}) + + baggage_keys = [k for k in carrier if k.lower() == BAGGAGE_HEADER] + assert baggage_keys == [BAGGAGE_HEADER] + parsed = parse_baggage(carrier[BAGGAGE_HEADER]) + assert parsed["user"] == "alice" + assert parsed[BRAINTRUST_PARENT_KEY] == "project_name:propagation-test" + + def test_title_cased_traceparent_emits_single_lowercase_header(self, memory_and_logger): + # A pre-existing title-cased `Traceparent` must be replaced by a single + # lowercase `traceparent` (W3C §3.2.1), with no stale variant remaining. + _mem, logger = memory_and_logger + with logger.start_span(name="svc_a") as span: + carrier = span.inject({"Traceparent": "stale"}) + + traceparent_keys = [k for k in carrier if k.lower() == TRACEPARENT_HEADER] + assert traceparent_keys == [TRACEPARENT_HEADER] + tp = parse_traceparent(carrier[TRACEPARENT_HEADER]) + assert (tp.trace_id, tp.span_id) == (span.root_span_id, span.span_id) + + def test_never_emits_x_bt_parent(self, memory_and_logger): + _mem, logger = memory_and_logger + with logger.start_span(name="svc_a") as span: + carrier = span.inject({}) + assert "x-bt-parent" not in {k.lower() for k in carrier} + + def test_no_braintrust_parent_injects_traceparent_without_baggage(self): + # When the Braintrust parent is unknown but the span has W3C-shaped hex + # ids, traceparent is still injected and braintrust.parent is absent from + # baggage (rather than emitted empty). + from braintrust.logger import _inject_into_carrier + + carrier = {} + _inject_into_carrier( + carrier, + trace_id="4bf92f3577b34da6a3ce929d0e0e4736", + span_id="00f067aa0ba902b7", + braintrust_parent=None, + ) + assert TRACEPARENT_RE.match(carrier[TRACEPARENT_HEADER]) + # No braintrust parent and no pre-existing baggage -> no baggage header. + assert BAGGAGE_HEADER not in carrier + + def test_no_braintrust_parent_preserves_existing_baggage_without_bt_key(self): + # With an unknown Braintrust parent, pre-existing baggage is preserved + # but no (empty) braintrust.parent entry is added. + from braintrust.logger import _inject_into_carrier + + carrier = {BAGGAGE_HEADER: "user=alice"} + _inject_into_carrier( + carrier, + trace_id="4bf92f3577b34da6a3ce929d0e0e4736", + span_id="00f067aa0ba902b7", + braintrust_parent=None, + ) + parsed = parse_baggage(carrier[BAGGAGE_HEADER]) + assert parsed["user"] == "alice" + assert BRAINTRUST_PARENT_KEY not in parsed + + def test_inject_trace_context_free_function(self, memory_and_logger): + _mem, logger = memory_and_logger + with logger.start_span(name="svc_a") as span: + carrier = inject_trace_context() + tp = parse_traceparent(carrier[TRACEPARENT_HEADER]) + assert (tp.trace_id, tp.span_id) == (span.root_span_id, span.span_id) + + def test_inject_no_current_span_is_safe(self): + # No active span -> NOOP span -> no traceparent, no raise. + carrier = inject_trace_context({}) + assert TRACEPARENT_HEADER not in carrier + + +# --------------------------------------------------------------------------- # +# Receive: header extraction +# --------------------------------------------------------------------------- # + + +class TestExtract: + # extract_trace_context returns an opaque W3C-headers dict; behavior is + # asserted by passing it into start_span and inspecting the resulting span. + + def test_traceparent_with_baggage_parent(self, memory_and_logger): + _mem, logger = memory_and_logger + ctx = extract_trace_context( + { + "traceparent": VALID_TRACEPARENT, + "baggage": f"{BRAINTRUST_PARENT_KEY}=project_id:abc123", + } + ) + with logger.start_span(name="h", parent=ctx) as span: + assert span.root_span_id == VALID_TRACE_ID + assert span.span_parents == [VALID_SPAN_ID] + + def test_traceparent_baggage_with_unrelated_keys(self, memory_and_logger): + _mem, logger = memory_and_logger + ctx = extract_trace_context( + { + "traceparent": VALID_TRACEPARENT, + "baggage": f"user=alice,{BRAINTRUST_PARENT_KEY}=project_id:abc,team=eng", + } + ) + with logger.start_span(name="h", parent=ctx) as span: + assert span.root_span_id == VALID_TRACE_ID + assert span.span_parents == [VALID_SPAN_ID] + + def test_traceparent_no_baggage_uses_current_logger(self, memory_and_logger): + # No braintrust.parent in baggage -> route under the active logger. + _mem, logger = memory_and_logger + ctx = extract_trace_context({"traceparent": VALID_TRACEPARENT}) + assert ctx is not None # context is still produced; routing resolved at start_span + with logger.start_span(name="h", parent=ctx) as span: + assert span.root_span_id == VALID_TRACE_ID + assert span.span_parents == [VALID_SPAN_ID] + + def test_no_headers_returns_none(self): + assert extract_trace_context({}) is None + assert extract_trace_context(None) is None + + def test_malformed_traceparent_returns_none(self): + # No valid traceparent -> no context (fresh root downstream). + assert ( + extract_trace_context( + { + "traceparent": "garbage", + "baggage": f"{BRAINTRUST_PARENT_KEY}=project_id:abc", + } + ) + is None + ) + + def test_case_insensitive_headers(self, memory_and_logger): + _mem, logger = memory_and_logger + ctx = extract_trace_context( + { + "TraceParent": VALID_TRACEPARENT, + "Baggage": f"{BRAINTRUST_PARENT_KEY}=project_id:abc", + } + ) + with logger.start_span(name="h", parent=ctx) as span: + assert span.root_span_id == VALID_TRACE_ID + assert span.span_parents == [VALID_SPAN_ID] + + def test_extract_returns_opaque_dict(self): + ctx = extract_trace_context( + { + "traceparent": VALID_TRACEPARENT, + "baggage": f"{BRAINTRUST_PARENT_KEY}=project_id:abc", + "tracestate": "congo=t61", + } + ) + assert isinstance(ctx, dict) + assert ctx[TRACEPARENT_HEADER] == VALID_TRACEPARENT + + def test_no_parent_and_no_logger_starts_fresh_root(self, memory_and_logger): + # Valid traceparent but no braintrust.parent and (here) routing falls to + # the active logger; with linkage when a logger is present we still link. + # When neither baggage nor logger can route, start_span must not raise. + ctx = extract_trace_context({"traceparent": VALID_TRACEPARENT}) + # No active logger context here would yield a fresh root; with the + # fixture's logger active, it links. Either way, no exception. + _mem, logger = memory_and_logger + with logger.start_span(name="h", parent=ctx) as span: + assert span.span_id is not None + + +# --------------------------------------------------------------------------- # +# Round trip + cross-service linking +# --------------------------------------------------------------------------- # + + +def test_round_trip_inject_extract(memory_and_logger): + _mem, logger = memory_and_logger + with logger.start_span(name="svc_a") as span_a: + carrier = span_a.inject({}) + a_root, a_span = span_a.root_span_id, span_a.span_id + + parent = extract_trace_context(carrier) + with logger.start_span(name="svc_b", parent=parent) as span_b: + assert span_b.root_span_id == a_root + assert span_b.span_parents == [a_span] + + +def test_cross_service_linking(memory_and_logger): + _mem, logger = memory_and_logger + with logger.start_span(name="svc_a") as span_a: + carrier = span_a.inject({}) + a_root, a_span = span_a.root_span_id, span_a.span_id + + parent = extract_trace_context(carrier) + with logger.start_span(name="svc_b", parent=parent) as span_b: + assert span_b.root_span_id == a_root + assert span_b.span_parents == [a_span] + + +def test_inject_does_not_break_span_emission_without_parent(): + # Inject with an unknown braintrust parent must not drop the span. + with _internal_with_memory_background_logger() as mem: + logger = init_test_logger("emit-test") + with logger.start_span(name="svc_a") as span: + span.inject({}) + span.log(output="hello") + logger.flush() + spans = mem.pop() + assert any(s.get("output") == "hello" for s in spans) + + +def test_legacy_export_slug_round_trips_with_hex_ids(memory_and_logger): + # The pre-existing span.export() + start_span(parent=slug) pattern must keep + # working with the new default hex IDs (8-byte span id, 16-byte trace id). + _mem, logger = memory_and_logger + with logger.start_span(name="parent") as parent: + slug = parent.export() + p_root, p_span = parent.root_span_id, parent.span_id + + # Sanity: ids are OTEL-shaped hex. + assert len(p_span) == 16 + assert len(p_root) == 32 + + with logger.start_span(name="child", parent=slug) as child: + assert child.root_span_id == p_root + assert child.span_parents == [p_span] + + +def test_inject_noops_in_legacy_uuid_mode(): + # In legacy UUID mode, span ids aren't W3C-shaped, so inject must not write + # traceparent/baggage and must leave pre-existing headers untouched. + import os + + from braintrust.test_helpers import preserve_env_vars + + with preserve_env_vars("BRAINTRUST_OTEL_COMPAT", "BRAINTRUST_LEGACY_IDS"): + os.environ.pop("BRAINTRUST_OTEL_COMPAT", None) + os.environ["BRAINTRUST_LEGACY_IDS"] = "true" + with _internal_with_memory_background_logger(): + logger = init_test_logger("legacy-inject") + with logger.start_span(name="p") as span: + # Legacy spans use UUID ids (share root == span). + assert len(span.span_id) == 36 + carrier = span.inject({"existing": "header"}) + + assert carrier == {"existing": "header"} + assert TRACEPARENT_HEADER not in carrier + assert BAGGAGE_HEADER not in carrier + + +def test_legacy_parent_slug_linked_in_hex_mode(): + # Back-compat: in hex mode (default), a parent slug carrying legacy UUID ids + # is still a usable parent. The child links to the slug's span/root ids even + # though they are a different format from the child's own (hex) span id. This + # keeps `start_span(parent=)` working across an SDK upgrade where an + # older (UUID) sender's slug reaches a newer (hex) receiver. + import uuid + + from braintrust.span_identifier_v3 import SpanComponentsV3 + + p_span = str(uuid.uuid4()) + p_root = str(uuid.uuid4()) + legacy_slug = SpanComponentsV3( + object_type=SpanObjectTypeV3.PROJECT_LOGS, + object_id="legacy-proj", + row_id=str(uuid.uuid4()), + span_id=p_span, + root_span_id=p_root, + ).to_str() + + with _internal_with_memory_background_logger(): + logger = init_test_logger("legacy-proj") + with logger.start_span(name="child", parent=legacy_slug) as child: + # Links to the slug's UUID ids; the child's own span id stays hex. + assert child.root_span_id == p_root + assert child.span_parents == [p_span] + assert len(child.span_id) == 16 + + +def test_legacy_parent_slug_linked_in_hex_mode_toplevel_start_span(): + # Same as above, but through the module-level `start_span`, which resolves + # the parent slug independently of `Logger.start_span`. + import uuid + + from braintrust.logger import start_span + from braintrust.span_identifier_v3 import SpanComponentsV3 + + p_span = str(uuid.uuid4()) + p_root = str(uuid.uuid4()) + legacy_slug = SpanComponentsV3( + object_type=SpanObjectTypeV3.PROJECT_LOGS, + object_id="legacy-proj", + row_id=str(uuid.uuid4()), + span_id=p_span, + root_span_id=p_root, + ).to_str() + + with _internal_with_memory_background_logger(): + init_test_logger("legacy-proj") + with start_span(name="child", parent=legacy_slug) as child: + assert child.root_span_id == p_root + assert child.span_parents == [p_span] + assert len(child.span_id) == 16 + + +def test_legacy_parent_slug_linked_in_legacy_mode(): + # In legacy UUID mode, a parent slug carrying UUID ids links to the slug's + # span/root ids (same format). + import os + import uuid + + from braintrust.span_identifier_v3 import SpanComponentsV3 + from braintrust.test_helpers import preserve_env_vars + + p_span = str(uuid.uuid4()) + p_root = str(uuid.uuid4()) + legacy_slug = SpanComponentsV3( + object_type=SpanObjectTypeV3.PROJECT_LOGS, + object_id="legacy-proj", + row_id=str(uuid.uuid4()), + span_id=p_span, + root_span_id=p_root, + ).to_str() + + with preserve_env_vars("BRAINTRUST_OTEL_COMPAT", "BRAINTRUST_LEGACY_IDS"): + os.environ.pop("BRAINTRUST_OTEL_COMPAT", None) + os.environ["BRAINTRUST_LEGACY_IDS"] = "true" + with _internal_with_memory_background_logger(): + logger = init_test_logger("legacy-proj") + with logger.start_span(name="child", parent=legacy_slug) as child: + assert child.root_span_id == p_root + assert child.span_parents == [p_span] + + +def test_hex_parent_slug_linked_in_legacy_mode(): + # Back-compat (reverse direction): in legacy UUID mode, a parent slug + # carrying hex ids still links to the slug's span/root ids. The child's own + # span id stays in the active (UUID) format. + import os + + from braintrust.span_identifier_v4 import SpanComponentsV4 + from braintrust.test_helpers import preserve_env_vars + + p_span = "00f067aa0ba902b7" # 8-byte hex + p_root = "4bf92f3577b34da6a3ce929d0e0e4736" # 16-byte hex + hex_slug = SpanComponentsV4( + object_type=SpanObjectTypeV3.PROJECT_LOGS, + object_id="legacy-proj", + row_id="bt-row", + span_id=p_span, + root_span_id=p_root, + ).to_str() + + with preserve_env_vars("BRAINTRUST_OTEL_COMPAT", "BRAINTRUST_LEGACY_IDS"): + os.environ.pop("BRAINTRUST_OTEL_COMPAT", None) + os.environ["BRAINTRUST_LEGACY_IDS"] = "true" + with _internal_with_memory_background_logger(): + logger = init_test_logger("legacy-proj") + with logger.start_span(name="child", parent=hex_slug) as child: + # Links to the slug's hex ids; the child's own span id stays UUID. + assert child.root_span_id == p_root + assert child.span_parents == [p_span] + assert len(child.span_id) == 36 + + +# --------------------------------------------------------------------------- # +# tracestate pass-through (W3C: forward upstream vendor state) +# --------------------------------------------------------------------------- # + +UPSTREAM_TRACESTATE = "congo=t61rcWkgMzE,rojo=00f067aa0ba902b7" + + +class TestTracestate: + def test_extract_then_inject_forwards_tracestate(self, memory_and_logger): + # Pass-through: a span started from inbound headers carrying tracestate + # forwards that tracestate unchanged when it later injects. + _mem, logger = memory_and_logger + inbound = { + "traceparent": VALID_TRACEPARENT, + "baggage": f"{BRAINTRUST_PARENT_KEY}=project_id:abc", + "tracestate": UPSTREAM_TRACESTATE, + } + parent = extract_trace_context(inbound) + with logger.start_span(name="mid", parent=parent) as span: + outbound = span.inject({}) + assert outbound[TRACESTATE_HEADER] == UPSTREAM_TRACESTATE + assert TRACEPARENT_RE.match(outbound[TRACEPARENT_HEADER]) + + def test_no_tracestate_emitted_when_none_inbound(self, memory_and_logger): + # A trace we originate (or that arrives without tracestate) emits none. + _mem, logger = memory_and_logger + with logger.start_span(name="root") as span: + outbound = span.inject({}) + assert TRACESTATE_HEADER not in outbound + + def test_extract_then_inject_preserves_unsampled_flag(self, memory_and_logger): + # A span started from inbound headers carrying a not-sampled (`-00`) + # traceparent must re-emit `-00` when it injects, not upgrade it to `-01`. + # Otherwise a mid-chain Braintrust service overrides the upstream + # sampling decision for everything downstream. + _mem, logger = memory_and_logger + unsampled = f"00-{VALID_TRACE_ID}-{VALID_SPAN_ID}-00" + inbound = { + "traceparent": unsampled, + "baggage": f"{BRAINTRUST_PARENT_KEY}=project_id:abc", + } + parent = extract_trace_context(inbound) + with logger.start_span(name="mid", parent=parent) as span: + outbound = span.inject({}) + assert outbound[TRACEPARENT_HEADER].endswith("-00") + + def test_extract_then_inject_preserves_sampled_flag(self, memory_and_logger): + # The sampled (`-01`) case must also round-trip unchanged. + _mem, logger = memory_and_logger + inbound = { + "traceparent": VALID_TRACEPARENT, # ...-01 + "baggage": f"{BRAINTRUST_PARENT_KEY}=project_id:abc", + } + parent = extract_trace_context(inbound) + with logger.start_span(name="mid", parent=parent) as span: + outbound = span.inject({}) + assert outbound[TRACEPARENT_HEADER].endswith("-01") diff --git a/py/src/braintrust/test_span_components.py b/py/src/braintrust/test_span_components.py index 51b89db2..1d654cb5 100644 --- a/py/src/braintrust/test_span_components.py +++ b/py/src/braintrust/test_span_components.py @@ -3,6 +3,7 @@ Tests serialization, deserialization, OTEL compatibility, and backward compatibility. """ +import os from uuid import uuid4 import pytest @@ -248,6 +249,16 @@ def test_invalid_object_type(self): object_id="test-id", ) + def test_unknown_json_fields_ignored(self): + """Forward-compat: unknown JSON fields (e.g. added by a newer SDK) are ignored, not errored.""" + obj = { + "object_type": SpanObjectTypeV3.PROJECT_LOGS.value, + "object_id": "p", + "future_field": "x", + } + comp = SpanComponentsV4._from_json_obj(obj) + assert comp.object_id == "p" + def test_missing_required_fields(self): """Test that missing required fields raise errors.""" with pytest.raises(AssertionError): @@ -329,76 +340,53 @@ def test_object_id_fields_without_object_id(self): class TestExportFormatSelection: - """Test that span export format is selected based on BRAINTRUST_OTEL_COMPAT environment variable.""" + """Test that span export format is coupled to the active ID format. - def test_export_format_based_on_env_variable(self): - """Test that export format changes based on BRAINTRUST_OTEL_COMPAT environment variable.""" - import os + Hex IDs (the default) serialize as V4; legacy UUID IDs (opted into via + BRAINTRUST_LEGACY_IDS) serialize as V3. + """ - from braintrust.test_helpers import init_test_logger + def test_export_format_based_on_env_variable(self): + """Test that export format follows the legacy-UUID opt-out flag.""" + from braintrust.test_helpers import init_test_logger, preserve_env_vars - # Test with OTEL_COMPAT=false (should use V3) - original_env = os.environ.get("BRAINTRUST_OTEL_COMPAT") - try: - os.environ["BRAINTRUST_OTEL_COMPAT"] = "false" + with preserve_env_vars("BRAINTRUST_OTEL_COMPAT", "BRAINTRUST_LEGACY_IDS"): + # Legacy UUID mode should use V3. + os.environ.pop("BRAINTRUST_OTEL_COMPAT", None) + os.environ["BRAINTRUST_LEGACY_IDS"] = "true" - # Initialize test logger and create a span l = init_test_logger("test_export_v3") with l.start_span(name="test_span") as span: export_v3_mode = span.export() - # Verify it can be parsed by V3 - parsed_as_v3 = SpanComponentsV3.from_str(export_v3_mode) - assert parsed_as_v3 is not None + assert SpanComponentsV4.get_version(export_v3_mode) == 3 - # Test with OTEL_COMPAT=true (should use V4) - os.environ["BRAINTRUST_OTEL_COMPAT"] = "true" + # Default (hex) mode should use V4. + os.environ.pop("BRAINTRUST_LEGACY_IDS", None) - # Initialize test logger and create a span l = init_test_logger("test_export_v4") with l.start_span(name="test_span") as span: export_v4_mode = span.export() - # Verify it can be parsed by V4 - parsed_as_v4 = SpanComponentsV4.from_str(export_v4_mode) - assert parsed_as_v4 is not None - - # Both should be parseable by V4 (backward compatibility) - v4_from_v3 = SpanComponentsV4.from_str(export_v3_mode) - v4_from_v4 = SpanComponentsV4.from_str(export_v4_mode) - assert v4_from_v3 is not None - assert v4_from_v4 is not None - - finally: - # Clean up environment - if original_env is not None: - os.environ["BRAINTRUST_OTEL_COMPAT"] = original_env - elif "BRAINTRUST_OTEL_COMPAT" in os.environ: - del os.environ["BRAINTRUST_OTEL_COMPAT"] - - def test_export_uses_v3_by_default(self): - """Test that export uses V3 format by default when BRAINTRUST_OTEL_COMPAT is not set.""" - import os - - from braintrust.test_helpers import init_test_logger - - # Ensure environment variable is not set - original_env = os.environ.get("BRAINTRUST_OTEL_COMPAT") - try: - if "BRAINTRUST_OTEL_COMPAT" in os.environ: - del os.environ["BRAINTRUST_OTEL_COMPAT"] - - # Initialize test logger and create a span - l = init_test_logger("test_default_v3") + assert SpanComponentsV4.get_version(export_v4_mode) == 4 + + # Both should be parseable by V4 (backward compatibility). + assert SpanComponentsV4.from_str(export_v3_mode) is not None + assert SpanComponentsV4.from_str(export_v4_mode) is not None + + def test_export_uses_v4_by_default(self): + """Test that export uses V4 format by default (no env vars set).""" + from braintrust.test_helpers import init_test_logger, preserve_env_vars + + with preserve_env_vars("BRAINTRUST_OTEL_COMPAT", "BRAINTRUST_LEGACY_IDS"): + os.environ.pop("BRAINTRUST_OTEL_COMPAT", None) + os.environ.pop("BRAINTRUST_LEGACY_IDS", None) + + l = init_test_logger("test_default_v4") with l.start_span(name="test_span") as span: export_default = span.export() - # Should be parseable by V3 since V3 is the default - parsed_as_v3 = SpanComponentsV3.from_str(export_default) - assert parsed_as_v3 is not None - assert parsed_as_v3.object_type is not None - - finally: - # Restore environment - if original_env is not None: - os.environ["BRAINTRUST_OTEL_COMPAT"] = original_env + assert SpanComponentsV4.get_version(export_default) == 4 + parsed = SpanComponentsV4.from_str(export_default) + assert parsed is not None + assert parsed.object_type is not None