Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 48 additions & 0 deletions py/src/braintrust/env.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import io
import logging
import math
import os
import shlex
Expand All @@ -8,6 +9,9 @@
from typing import TypeVar, cast


_logger = logging.getLogger(__name__)


T = TypeVar("T")
EnvValue = bool | float | int | str
_Parser = Callable[[str], EnvValue | None]
Expand Down Expand Up @@ -150,6 +154,47 @@ def _parse_dotenv_contents(self, contents: str) -> EnvValue | None:
return None


_warned_legacy_uuid_conflict = False


def _resolve_use_legacy_uuid_ids() -> bool:
"""Resolve whether the SDK should generate legacy UUID-based span/trace IDs.

The default is OpenTelemetry-compatible hex IDs (16-byte trace id / 8-byte
span id) with V4 span-component export. Setting BRAINTRUST_LEGACY_IDS
opts back into UUID IDs with V3 export.

BRAINTRUST_OTEL_COMPAT (which selects the OpenTelemetry context manager)
requires hex IDs, so it always wins: if both it and BRAINTRUST_LEGACY_IDS
are set, legacy IDs are disabled and a warning is logged (at most once per
process, even though this is re-resolved lazily on each access).
"""
global _warned_legacy_uuid_conflict

legacy = EnvVar("BRAINTRUST_LEGACY_IDS", EnvParser.BOOL).get(False)
if EnvVar("BRAINTRUST_OTEL_COMPAT", EnvParser.BOOL).get(False):
if legacy and not _warned_legacy_uuid_conflict:
_warned_legacy_uuid_conflict = True
_logger.warning(
"BRAINTRUST_LEGACY_IDS is ignored because BRAINTRUST_OTEL_COMPAT "
"requires OpenTelemetry-compatible hex span IDs. Using hex IDs."
)
return False
return legacy


class _LegacyUuidIdsField:
"""Lazy, read-only descriptor for the legacy-UUID-IDs setting.

Like the other entries on BraintrustEnv, this re-reads the environment on
each access rather than caching at import time, so changing the relevant env
vars (e.g. in tests) is reflected immediately.
"""

def __get__(self, instance: object, owner: type | None = None) -> bool:
return _resolve_use_legacy_uuid_ids()


class BraintrustEnv:
API_KEY = EnvVar("BRAINTRUST_API_KEY", EnvParser.STRING)
HTTP_TIMEOUT = EnvVar("BRAINTRUST_HTTP_TIMEOUT", EnvParser.FLOAT)
Expand All @@ -163,3 +208,6 @@ class BraintrustEnv:
ALL_PUBLISH_PAYLOADS_DIR = EnvVar("BRAINTRUST_ALL_PUBLISH_PAYLOADS_DIR", EnvParser.STRING)
DISABLE_ATEXIT_FLUSH = EnvVar("BRAINTRUST_DISABLE_ATEXIT_FLUSH", EnvParser.BOOL)
OTEL_COMPAT = EnvVar("BRAINTRUST_OTEL_COMPAT", EnvParser.BOOL)
# Opt out of the default OpenTelemetry-compatible hex span/trace IDs and use
# legacy UUID-based IDs (and V3 span-component export) instead.
LEGACY_IDS = _LegacyUuidIdsField()
6 changes: 4 additions & 2 deletions py/src/braintrust/id_gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,11 @@ def get_id_generator():

This eliminates global state and makes tests parallelizable.
Each caller gets their own generator instance.

Defaults to OpenTelemetry-compatible hex IDs. Set BRAINTRUST_LEGACY_IDS
to opt back into legacy UUID-based IDs.
"""
use_otel = BraintrustEnv.OTEL_COMPAT.get(False)
return OTELIDGenerator() if use_otel else UUIDGenerator()
return UUIDGenerator() if BraintrustEnv.LEGACY_IDS else OTELIDGenerator()


class IDGenerator(ABC):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -210,13 +210,16 @@ async def calculator_handler(args):
assert tool_span["output"] is not None
assert any(parent_id in llm_span_ids for parent_id in tool_span["span_parents"])

root_span_id = task_span["span_id"]
# Descendants share the task's trace (``root_span_id``); direct children
# reference the task's ``span_id`` in ``span_parents``.
task_root_span_id = task_span["root_span_id"]
task_span_id = task_span["span_id"]
for llm_span in llm_spans:
assert llm_span["root_span_id"] == root_span_id
assert root_span_id in llm_span["span_parents"]
assert llm_span["root_span_id"] == task_root_span_id
assert task_span_id in llm_span["span_parents"]

for tool_span in tool_spans:
assert tool_span["root_span_id"] == root_span_id
assert tool_span["root_span_id"] == task_root_span_id
assert any(parent_id in llm_span_ids for parent_id in tool_span["span_parents"])


Expand Down Expand Up @@ -454,7 +457,8 @@ async def user_prompt_hook(input_data: Any, tool_use_id: str | None, context: An

hook_span = function_spans[0]
assert task_span["input"] == prompt
assert hook_span["root_span_id"] == task_span["span_id"]
# The hook span is a descendant of the task span, so they share a trace.
assert hook_span["root_span_id"] == task_span["root_span_id"]
assert hook_span["input"]["hook_event_name"] == "UserPromptSubmit"
assert hook_span["input"]["prompt"] == prompt
assert hook_span["output"]["hookSpecificOutput"]["hookEventName"] == "UserPromptSubmit"
Expand Down Expand Up @@ -546,7 +550,8 @@ async def post_tool_hook(input_data: Any, tool_use_id: str | None, context: Any)
post_span = hook_span_by_event["PostToolUse"]

for hook_span in (pre_span, post_span):
assert hook_span["root_span_id"] == task_span["span_id"]
# Hook spans are descendants of the task span, so they share a trace.
assert hook_span["root_span_id"] == task_span["root_span_id"]
assert hook_span["input"]["tool_name"] == "Bash"

assert pre_span["output"]["hookSpecificOutput"]["hookEventName"] == "PreToolUse"
Expand Down Expand Up @@ -681,7 +686,9 @@ async def test_bundled_subagent_creates_task_span(memory_logger):
assert subagent_spans, "Expected at least one subagent task span"
assert any(s.get("metadata", {}).get("task_id") for s in subagent_spans)
for subagent_span in subagent_spans:
assert subagent_span["root_span_id"] == root_task_span["span_id"]
# Subagent spans are descendants of the root task span, so they share a
# trace; the root task ``span_id`` appears in ``span_parents`` below.
assert subagent_span["root_span_id"] == root_task_span["root_span_id"]
parents = set(subagent_span["span_parents"])
tool_use_id = subagent_span.get("metadata", {}).get("tool_use_id")
matching_tool_span = next(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,6 @@ def test_wrap_huggingface_hub_chat_completion_sync(memory_logger):
# With no parent span on the stack, the LLM span is its own root and has
# no ``span_parents``.
assert not span.get("span_parents")
assert span["span_id"] == span["root_span_id"]
# The user's ``provider=`` kwarg overrides the default "huggingface"
# identity so the span reflects the actual routing target.
assert span["metadata"]["provider"] == CHAT_PROVIDER
Expand Down Expand Up @@ -317,7 +316,6 @@ def test_wrap_huggingface_hub_chat_completion_streaming(memory_logger):
# when the iterator is exhausted, with no parent on the stack the span is
# still its own root.
assert not span.get("span_parents")
assert span["span_id"] == span["root_span_id"]
assert span["metadata"]["provider"] == CHAT_PROVIDER

# Aggregated output is ``{"choices": [{"index", "message": {...}, "finish_reason"?}]}``.
Expand Down Expand Up @@ -476,7 +474,6 @@ async def _run():
span = spans[0]
assert span["span_attributes"]["name"] == "huggingface.chat_completion"
assert not span.get("span_parents")
assert span["span_id"] == span["root_span_id"]
assert span["metadata"]["provider"] == CHAT_PROVIDER


Expand Down
36 changes: 22 additions & 14 deletions py/src/braintrust/integrations/langchain/test_callbacks.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,10 @@ def test_llm_calls(logger_memory_logger):
spans = memory_logger.pop()
assert len(spans) == 3

# ``root_span_id`` is the root span's own span_id (the parent reference for
# its children); ``trace_root_id`` is the trace shared by every span.
root_span_id = spans[0]["span_id"]
trace_root_id = spans[0]["root_span_id"]

assert_matches_object(
spans,
Expand All @@ -81,7 +84,7 @@ def test_llm_calls(logger_memory_logger):
},
"metadata": {"tags": []},
"span_id": root_span_id,
"root_span_id": root_span_id,
"root_span_id": trace_root_id,
},
{
"span_attributes": {"name": "ChatPromptTemplate"},
Expand All @@ -97,7 +100,7 @@ def test_llm_calls(logger_memory_logger):
]
},
"metadata": {"tags": ["seq:step:1"]},
"root_span_id": root_span_id,
"root_span_id": trace_root_id,
"span_parents": [root_span_id],
},
{
Expand Down Expand Up @@ -144,7 +147,7 @@ def test_llm_calls(logger_memory_logger):
"tags": ["seq:step:2"],
"model": "gpt-4o-mini-2024-07-18",
},
"root_span_id": root_span_id,
"root_span_id": trace_root_id,
"span_parents": [root_span_id],
},
],
Expand All @@ -171,6 +174,7 @@ def test_chain_with_memory(logger_memory_logger):
assert len(spans) == 3

root_span_id = spans[0]["span_id"]
trace_root_id = spans[0]["root_span_id"]

assert_matches_object(
spans,
Expand All @@ -189,7 +193,7 @@ def test_chain_with_memory(logger_memory_logger):
},
"metadata": {"tags": ["test"]},
"span_id": root_span_id,
"root_span_id": root_span_id,
"root_span_id": trace_root_id,
},
{
"span_attributes": {"name": "ChatPromptTemplate"},
Expand All @@ -205,7 +209,7 @@ def test_chain_with_memory(logger_memory_logger):
]
},
"metadata": {"tags": ["seq:step:1", "test"]},
"root_span_id": root_span_id,
"root_span_id": trace_root_id,
"span_parents": [root_span_id],
},
{
Expand Down Expand Up @@ -252,7 +256,7 @@ def test_chain_with_memory(logger_memory_logger):
"tags": ["seq:step:2", "test"],
"model": "gpt-4o-mini-2024-07-18",
},
"root_span_id": root_span_id,
"root_span_id": trace_root_id,
"span_parents": [root_span_id],
},
],
Expand Down Expand Up @@ -301,13 +305,14 @@ def calculator(input: CalculatorInput) -> str:

spans = memory_logger.pop()
root_span_id = spans[0]["span_id"]
trace_root_id = spans[0]["root_span_id"]

assert_matches_object(
spans,
[
{
"span_id": root_span_id,
"root_span_id": root_span_id,
"root_span_id": trace_root_id,
"span_attributes": {
"name": "ChatOpenAI",
"type": "llm",
Expand Down Expand Up @@ -640,13 +645,13 @@ def test_chain_null_values(logger_memory_logger):
flush()

spans = memory_logger.pop()
root_span_id = spans[0]["span_id"]
trace_root_id = spans[0]["root_span_id"]

assert_matches_object(
spans,
[
{
"root_span_id": root_span_id,
"root_span_id": trace_root_id,
"span_attributes": {
"name": "TestChain",
"type": "task",
Expand Down Expand Up @@ -721,7 +726,10 @@ def task_fn(input, hooks):

# Find the root eval span
root_eval_span = [s for s in spans if s.get("span_attributes", {}).get("name") == "test-consecutive-eval"][0]
# ``root_eval_span_id`` is the eval root's own span_id (the parent reference
# for its children); ``trace_root_id`` is the trace shared by every span.
root_eval_span_id = root_eval_span["span_id"]
trace_root_id = root_eval_span["root_span_id"]

# Find the eval dataset record spans (direct children of root eval span)
eval_record_spans = [
Expand Down Expand Up @@ -751,7 +759,7 @@ def task_fn(input, hooks):
[
{
"span_id": root_eval_span_id,
"root_span_id": root_eval_span_id,
"root_span_id": trace_root_id,
"span_attributes": {
"name": "test-consecutive-eval",
"type": "eval",
Expand All @@ -765,7 +773,7 @@ def task_fn(input, hooks):
[eval_record_1],
[
{
"root_span_id": root_eval_span_id,
"root_span_id": trace_root_id,
"span_parents": [root_eval_span_id],
"span_attributes": {
"name": "eval",
Expand All @@ -781,7 +789,7 @@ def task_fn(input, hooks):
[eval_record_2],
[
{
"root_span_id": root_eval_span_id,
"root_span_id": trace_root_id,
"span_parents": [root_eval_span_id],
"span_attributes": {
"name": "eval",
Expand All @@ -797,7 +805,7 @@ def task_fn(input, hooks):
[task_1_span],
[
{
"root_span_id": root_eval_span_id,
"root_span_id": trace_root_id,
"span_parents": [eval_record_1["span_id"]],
"span_attributes": {
"name": "task",
Expand All @@ -813,7 +821,7 @@ def task_fn(input, hooks):
[task_2_span],
[
{
"root_span_id": root_eval_span_id,
"root_span_id": trace_root_id,
"span_parents": [eval_record_2["span_id"]],
"span_attributes": {
"name": "task",
Expand Down
9 changes: 6 additions & 3 deletions py/src/braintrust/integrations/langchain/test_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,10 @@ def test_global_handler(logger_memory_logger):
spans = memory_logger.pop()
assert len(spans) > 0

# ``root_span_id`` is the root span's own span_id (the parent reference for
# its children); ``trace_root_id`` is the trace shared by every span.
root_span_id = spans[0]["span_id"]
trace_root_id = spans[0]["root_span_id"]

# Spans would be empty if the handler was not registered, let's make sure it logged what we expect
assert_matches_object(
Expand All @@ -83,7 +86,7 @@ def test_global_handler(logger_memory_logger):
},
"metadata": {"tags": []},
"span_id": root_span_id,
"root_span_id": root_span_id,
"root_span_id": trace_root_id,
},
{
"span_attributes": {"name": "ChatPromptTemplate"},
Expand All @@ -99,7 +102,7 @@ def test_global_handler(logger_memory_logger):
]
},
"metadata": {"tags": ["seq:step:1"]},
"root_span_id": root_span_id,
"root_span_id": trace_root_id,
"span_parents": [root_span_id],
},
{
Expand Down Expand Up @@ -146,7 +149,7 @@ def test_global_handler(logger_memory_logger):
"tags": ["seq:step:2"],
"model": "gpt-4o-mini-2024-07-18",
},
"root_span_id": root_span_id,
"root_span_id": trace_root_id,
"span_parents": [root_span_id],
},
],
Expand Down
Loading
Loading