diff --git a/src/strands/models/_defaults.py b/src/strands/models/_defaults.py new file mode 100644 index 000000000..e463b8ef6 --- /dev/null +++ b/src/strands/models/_defaults.py @@ -0,0 +1,177 @@ +"""Default model metadata lookup tables. + +Provides context window limits for known model IDs across all providers. +Values sourced from provider documentation and +https://github.com/BerriAI/litellm/blob/litellm_internal_staging/model_prices_and_context_window.json + +Applied to providers with well-known, fixed model IDs: Bedrock, Anthropic, OpenAI, +OpenAI Responses, Gemini, and Mistral. Providers that use local/custom model IDs +(Ollama, LlamaCpp, SageMaker) or proxy to other providers with their own prefixed +ID format (LiteLLM) are excluded — their context windows depend on deployment config, +not a static table. +""" + +import logging +from collections.abc import Mapping +from typing import TypeVar + +logger = logging.getLogger(__name__) + +_C = TypeVar("_C", bound=Mapping[str, object]) + +# Context window limits (in tokens) for known model IDs. +# +# Best-effort lookup table — unknown models return None and callers +# fall back gracefully (e.g. proactive compression is disabled). +# Users can always override with an explicit context_window_limit in their model config. +# +# For Bedrock models with cross-region prefixes (e.g. us., eu., global.), +# get_context_window_limit strips the prefix before lookup so only the base model ID is needed here. +_CONTEXT_WINDOW_LIMITS: dict[str, int] = { + # Anthropic (direct API) + "claude-sonnet-4-6": 1_000_000, + "claude-sonnet-4-20250514": 1_000_000, + "claude-sonnet-4-5": 200_000, + "claude-sonnet-4-5-20250929": 200_000, + "claude-opus-4-6": 1_000_000, + "claude-opus-4-6-20260205": 1_000_000, + "claude-opus-4-7": 1_000_000, + "claude-opus-4-7-20260416": 1_000_000, + "claude-opus-4-5": 200_000, + "claude-opus-4-5-20251101": 200_000, + "claude-opus-4-20250514": 200_000, + "claude-opus-4-1": 200_000, + "claude-opus-4-1-20250805": 200_000, + "claude-haiku-4-5": 200_000, + "claude-haiku-4-5-20251001": 200_000, + "claude-3-7-sonnet-20250219": 200_000, + "claude-3-5-sonnet-20241022": 200_000, + "claude-3-5-sonnet-20240620": 200_000, + "claude-3-5-haiku-20241022": 200_000, + "claude-3-opus-20240229": 200_000, + "claude-3-haiku-20240307": 200_000, + # Bedrock Anthropic (base model IDs — cross-region prefixes stripped by get_context_window_limit) + "anthropic.claude-sonnet-4-6": 1_000_000, + "anthropic.claude-sonnet-4-20250514-v1:0": 1_000_000, + "anthropic.claude-sonnet-4-5-20250929-v1:0": 200_000, + "anthropic.claude-opus-4-6-v1": 1_000_000, + "anthropic.claude-opus-4-7": 1_000_000, + "anthropic.claude-opus-4-5-20251101-v1:0": 200_000, + "anthropic.claude-opus-4-20250514-v1:0": 200_000, + "anthropic.claude-opus-4-1-20250805-v1:0": 200_000, + "anthropic.claude-haiku-4-5-20251001-v1:0": 200_000, + "anthropic.claude-haiku-4-5@20251001": 200_000, + "anthropic.claude-3-7-sonnet-20250219-v1:0": 200_000, + "anthropic.claude-3-7-sonnet-20240620-v1:0": 200_000, + "anthropic.claude-3-5-sonnet-20241022-v2:0": 200_000, + "anthropic.claude-3-5-sonnet-20240620-v1:0": 200_000, + "anthropic.claude-3-5-haiku-20241022-v1:0": 200_000, + "anthropic.claude-3-opus-20240229-v1:0": 200_000, + "anthropic.claude-3-haiku-20240307-v1:0": 200_000, + "anthropic.claude-3-sonnet-20240229-v1:0": 200_000, + "anthropic.claude-mythos-preview": 1_000_000, + # Bedrock Amazon Nova + "amazon.nova-pro-v1:0": 300_000, + "amazon.nova-lite-v1:0": 300_000, + "amazon.nova-micro-v1:0": 128_000, + "amazon.nova-premier-v1:0": 1_000_000, + "amazon.nova-2-lite-v1:0": 1_000_000, + "amazon.nova-2-pro-preview-20251202-v1:0": 1_000_000, + # OpenAI + "gpt-5.5": 1_050_000, + "gpt-5.5-pro": 1_050_000, + "gpt-5.4": 1_050_000, + "gpt-5.4-pro": 1_050_000, + "gpt-5.4-mini": 272_000, + "gpt-5.4-nano": 272_000, + "gpt-5.2": 272_000, + "gpt-5.2-pro": 272_000, + "gpt-5.1": 272_000, + "gpt-5": 272_000, + "gpt-5-mini": 272_000, + "gpt-5-nano": 272_000, + "gpt-5-pro": 128_000, + "gpt-4.1": 1_047_576, + "gpt-4.1-mini": 1_047_576, + "gpt-4.1-nano": 1_047_576, + "gpt-4o": 128_000, + "gpt-4o-mini": 128_000, + "gpt-4-turbo": 128_000, + "o3": 200_000, + "o3-mini": 200_000, + "o3-pro": 200_000, + "o4-mini": 200_000, + "o1": 200_000, + # Google Gemini + "gemini-2.5-flash": 1_048_576, + "gemini-2.5-flash-lite": 1_048_576, + "gemini-2.5-pro": 1_048_576, + "gemini-2.0-flash": 1_048_576, + "gemini-2.0-flash-lite": 1_048_576, + "gemini-3-pro-preview": 1_048_576, + "gemini-3-flash-preview": 1_048_576, + "gemini-3.1-pro-preview": 1_048_576, + "gemini-3.1-flash-lite-preview": 1_048_576, + # Mistral + "mistral-large-latest": 262_144, + "mistral-large-2512": 262_144, + "mistral-large-3": 262_144, + "mistral-medium-latest": 131_072, + "mistral-medium-2505": 131_072, + "mistral-small-latest": 131_072, + "mistral-small-3-2-2506": 131_072, +} + + +def get_context_window_limit(model_id: str) -> int | None: + """Look up the context window limit for a model ID. + + For Bedrock cross-region model IDs (e.g. ``us.anthropic.claude-sonnet-4-6``), + the region prefix is stripped as a fallback if the direct lookup fails. + + Args: + model_id: The model ID to look up. + + Returns: + The context window limit in tokens, or None if not found. + """ + direct = _CONTEXT_WINDOW_LIMITS.get(model_id) + if direct is not None: + return direct + + # Fallback: strip prefix before first dot and retry (handles cross-region prefixes) + dot_index = model_id.find(".") + if dot_index != -1: + stripped = model_id[dot_index + 1 :] + result = _CONTEXT_WINDOW_LIMITS.get(stripped) + if result is not None: + logger.debug( + "model_id=<%s>, stripped_id=<%s> | resolved context window limit via prefix strip", model_id, stripped + ) + return result + + return None + + +def resolve_config_metadata(config: _C, model_id: str) -> _C: + """Resolve model metadata fields on a config dict from built-in lookup tables. + + When ``context_window_limit`` is not explicitly set, looks it up from the built-in table. + Explicit values pass through unchanged. Returns a new dict only when resolution adds a field; + otherwise returns the original config to avoid unnecessary allocation. + + Args: + config: The stored model config dict. + model_id: The model ID to look up. + + Returns: + The config with resolved metadata, or the original config if nothing to resolve. + """ + if "context_window_limit" in config: + return config + + limit = get_context_window_limit(model_id) + if limit is None: + return config + + return {**config, "context_window_limit": limit} # type: ignore[return-value] diff --git a/src/strands/models/anthropic.py b/src/strands/models/anthropic.py index 54fdaaf00..ece7cd8d1 100644 --- a/src/strands/models/anthropic.py +++ b/src/strands/models/anthropic.py @@ -20,6 +20,7 @@ from ..types.exceptions import ContextWindowOverflowException, ModelThrottledException from ..types.streaming import StreamEvent from ..types.tools import ToolChoice, ToolChoiceToolDict, ToolSpec +from ._defaults import resolve_config_metadata from ._validation import _has_location_source, validate_config_keys from .model import BaseModelConfig, Model @@ -95,7 +96,7 @@ def get_config(self) -> AnthropicConfig: Returns: The Anthropic model configuration. """ - return self.config + return resolve_config_metadata(self.config, self.config["model_id"]) def _format_request_message_content(self, content: ContentBlock) -> dict[str, Any]: """Format an Anthropic content block. diff --git a/src/strands/models/bedrock.py b/src/strands/models/bedrock.py index d535bbc51..baa2807c4 100644 --- a/src/strands/models/bedrock.py +++ b/src/strands/models/bedrock.py @@ -31,6 +31,7 @@ ) from ..types.streaming import CitationsDelta, StreamEvent from ..types.tools import ToolChoice, ToolSpec +from ._defaults import resolve_config_metadata from ._strict_schema import ensure_strict_json_schema from ._validation import validate_config_keys from .model import BaseModelConfig, CacheConfig, Model @@ -217,7 +218,7 @@ def get_config(self) -> BedrockConfig: Returns: The Bedrock model configuration. """ - return self.config + return resolve_config_metadata(self.config, self.config.get("model_id", "")) def _format_request( self, diff --git a/src/strands/models/gemini.py b/src/strands/models/gemini.py index 892dce52d..65b925c6d 100644 --- a/src/strands/models/gemini.py +++ b/src/strands/models/gemini.py @@ -19,6 +19,7 @@ from ..types.exceptions import ContextWindowOverflowException, ModelThrottledException, ProviderTokenCountError from ..types.streaming import StreamEvent from ..types.tools import ToolChoice, ToolSpec +from ._defaults import resolve_config_metadata from ._validation import _has_location_source, validate_config_keys from .model import BaseModelConfig, Model @@ -115,7 +116,7 @@ def get_config(self) -> GeminiConfig: Returns: The Gemini model configuration. """ - return self.config + return resolve_config_metadata(self.config, self.config["model_id"]) def _get_client(self) -> genai.Client: """Get a Gemini client for making requests. diff --git a/src/strands/models/mistral.py b/src/strands/models/mistral.py index c4a23b244..2ae00cef9 100644 --- a/src/strands/models/mistral.py +++ b/src/strands/models/mistral.py @@ -17,6 +17,7 @@ from ..types.exceptions import ModelThrottledException from ..types.streaming import StopReason, StreamEvent from ..types.tools import ToolChoice, ToolResult, ToolSpec, ToolUse +from ._defaults import resolve_config_metadata from ._validation import _has_location_source, validate_config_keys, warn_on_tool_choice_not_supported from .model import BaseModelConfig, Model @@ -114,7 +115,7 @@ def get_config(self) -> MistralConfig: Returns: The Mistral model configuration. """ - return self.config + return resolve_config_metadata(self.config, self.config["model_id"]) def _format_request_message_content(self, content: ContentBlock) -> str | dict[str, Any]: """Format a Mistral content block. diff --git a/src/strands/models/openai.py b/src/strands/models/openai.py index ea16c7713..94d4b0b90 100644 --- a/src/strands/models/openai.py +++ b/src/strands/models/openai.py @@ -21,6 +21,7 @@ from ..types.exceptions import ContextWindowOverflowException, ModelThrottledException from ..types.streaming import StreamEvent from ..types.tools import ToolChoice, ToolResult, ToolSpec, ToolUse +from ._defaults import resolve_config_metadata from ._openai_bedrock import BedrockMantleConfig, resolve_bedrock_client_args from ._validation import _has_location_source, validate_config_keys from .model import BaseModelConfig, Model @@ -150,7 +151,9 @@ def get_config(self) -> OpenAIConfig: Returns: The OpenAI model configuration. """ - return cast(OpenAIModel.OpenAIConfig, self.config) + return cast( + OpenAIModel.OpenAIConfig, resolve_config_metadata(self.config, str(self.config.get("model_id", ""))) + ) @classmethod def format_request_message_content(cls, content: ContentBlock, **kwargs: Any) -> dict[str, Any]: diff --git a/src/strands/models/openai_responses.py b/src/strands/models/openai_responses.py index 4aff07ccd..a78cef73a 100644 --- a/src/strands/models/openai_responses.py +++ b/src/strands/models/openai_responses.py @@ -58,6 +58,7 @@ from ..types.exceptions import ContextWindowOverflowException, ModelThrottledException # noqa: E402 from ..types.streaming import StreamEvent # noqa: E402 from ..types.tools import ToolChoice, ToolResult, ToolSpec, ToolUse # noqa: E402 +from ._defaults import resolve_config_metadata # noqa: E402 from ._openai_bedrock import BedrockMantleConfig, resolve_bedrock_client_args # noqa: E402 from ._validation import validate_config_keys # noqa: E402 from .model import BaseModelConfig, Model # noqa: E402 @@ -210,7 +211,10 @@ def get_config(self) -> OpenAIResponsesConfig: Returns: The OpenAI Responses API model configuration. """ - return cast(OpenAIResponsesModel.OpenAIResponsesConfig, self.config) + return cast( + OpenAIResponsesModel.OpenAIResponsesConfig, + resolve_config_metadata(self.config, str(self.config.get("model_id", ""))), + ) @override async def count_tokens( diff --git a/tests/strands/models/test_anthropic.py b/tests/strands/models/test_anthropic.py index 8e004dbb7..abb56a441 100644 --- a/tests/strands/models/test_anthropic.py +++ b/tests/strands/models/test_anthropic.py @@ -82,6 +82,30 @@ def test__init__model_configs(anthropic_client, model_id, max_tokens): assert tru_temperature == exp_temperature +def test__init__auto_populates_context_window_limit(anthropic_client): + _ = anthropic_client + + model = AnthropicModel(model_id="claude-sonnet-4-20250514", max_tokens=1) + + assert model.get_config().get("context_window_limit") == 1_000_000 + + +def test__init__explicit_context_window_limit_not_overridden(anthropic_client): + _ = anthropic_client + + model = AnthropicModel(model_id="claude-sonnet-4-20250514", max_tokens=1, context_window_limit=100_000) + + assert model.get_config().get("context_window_limit") == 100_000 + + +def test__init__unknown_model_no_context_window_limit(anthropic_client): + _ = anthropic_client + + model = AnthropicModel(model_id="unknown-model", max_tokens=1) + + assert model.get_config().get("context_window_limit") is None + + def test_update_config(model, model_id): model.update_config(model_id=model_id) diff --git a/tests/strands/models/test_bedrock.py b/tests/strands/models/test_bedrock.py index a80ca091e..e42fc8e1f 100644 --- a/tests/strands/models/test_bedrock.py +++ b/tests/strands/models/test_bedrock.py @@ -296,6 +296,46 @@ def test__init__context_window_limit(bedrock_client): assert model.context_window_limit == 200_000 +def test__init__auto_populates_context_window_limit(bedrock_client): + _ = bedrock_client + + model = BedrockModel(model_id="anthropic.claude-sonnet-4-20250514-v1:0") + + assert model.get_config().get("context_window_limit") == 1_000_000 + + +def test__init__auto_populates_context_window_limit_cross_region(bedrock_client): + _ = bedrock_client + + model = BedrockModel(model_id="us.anthropic.claude-sonnet-4-6") + + assert model.get_config().get("context_window_limit") == 1_000_000 + + +def test__init__auto_populates_context_window_limit_default_model(bedrock_client): + _ = bedrock_client + + model = BedrockModel() + + assert model.get_config().get("context_window_limit") == 1_000_000 + + +def test__init__explicit_context_window_limit_not_overridden(bedrock_client): + _ = bedrock_client + + model = BedrockModel(model_id="anthropic.claude-sonnet-4-20250514-v1:0", context_window_limit=100_000) + + assert model.get_config().get("context_window_limit") == 100_000 + + +def test__init__unknown_model_no_context_window_limit(bedrock_client): + _ = bedrock_client + + model = BedrockModel(model_id="unknown.model-v1:0") + + assert model.get_config().get("context_window_limit") is None + + def test_update_config(model, model_id): model.update_config(model_id=model_id) diff --git a/tests/strands/models/test_defaults.py b/tests/strands/models/test_defaults.py new file mode 100644 index 000000000..94c602fc1 --- /dev/null +++ b/tests/strands/models/test_defaults.py @@ -0,0 +1,76 @@ +"""Tests for model metadata lookup tables.""" + +from strands.models._defaults import get_context_window_limit, resolve_config_metadata + + +class TestGetContextWindowLimit: + """Tests for get_context_window_limit.""" + + def test_known_anthropic_direct_api(self): + assert get_context_window_limit("claude-sonnet-4-6") == 1_000_000 + assert get_context_window_limit("claude-opus-4-6") == 1_000_000 + assert get_context_window_limit("claude-opus-4-5") == 200_000 + assert get_context_window_limit("claude-haiku-4-5") == 200_000 + + def test_known_bedrock_anthropic(self): + assert get_context_window_limit("anthropic.claude-sonnet-4-6") == 1_000_000 + assert get_context_window_limit("anthropic.claude-haiku-4-5-20251001-v1:0") == 200_000 + + def test_known_bedrock_nova(self): + assert get_context_window_limit("amazon.nova-pro-v1:0") == 300_000 + assert get_context_window_limit("amazon.nova-micro-v1:0") == 128_000 + + def test_known_openai(self): + assert get_context_window_limit("gpt-5.4") == 1_050_000 + assert get_context_window_limit("gpt-4o") == 128_000 + assert get_context_window_limit("o3") == 200_000 + assert get_context_window_limit("o4-mini") == 200_000 + + def test_known_gemini(self): + assert get_context_window_limit("gemini-2.5-flash") == 1_048_576 + assert get_context_window_limit("gemini-2.5-pro") == 1_048_576 + + def test_strips_bedrock_cross_region_prefix(self): + assert get_context_window_limit("us.anthropic.claude-sonnet-4-6") == 1_000_000 + assert get_context_window_limit("global.anthropic.claude-sonnet-4-6") == 1_000_000 + assert get_context_window_limit("eu.anthropic.claude-sonnet-4-6") == 1_000_000 + assert get_context_window_limit("ap.anthropic.claude-sonnet-4-6") == 1_000_000 + + def test_strips_any_prefix_as_fallback(self): + # Any prefix before the first dot is stripped if direct lookup fails + assert get_context_window_limit("custom.anthropic.claude-sonnet-4-6") == 1_000_000 + + def test_unknown_model_returns_none(self): + assert get_context_window_limit("unknown-model-xyz") is None + assert get_context_window_limit("foo.unknown-model-xyz") is None + + +class TestResolveConfigMetadata: + """Tests for resolve_config_metadata.""" + + def test_resolves_context_window_limit(self): + config: dict = {"model_id": "claude-sonnet-4-6"} + result = resolve_config_metadata(config, "claude-sonnet-4-6") + assert result["context_window_limit"] == 1_000_000 + + def test_preserves_explicit_context_window_limit(self): + config: dict = {"model_id": "claude-sonnet-4-6", "context_window_limit": 100_000} + result = resolve_config_metadata(config, "claude-sonnet-4-6") + assert result["context_window_limit"] == 100_000 + + def test_returns_original_config_when_explicit(self): + config: dict = {"model_id": "claude-sonnet-4-6", "context_window_limit": 100_000} + result = resolve_config_metadata(config, "claude-sonnet-4-6") + assert result is config + + def test_returns_original_config_when_unknown_model(self): + config: dict = {"model_id": "unknown-model"} + result = resolve_config_metadata(config, "unknown-model") + assert result is config + assert "context_window_limit" not in result + + def test_returns_new_dict_when_resolved(self): + config: dict = {"model_id": "claude-sonnet-4-6"} + result = resolve_config_metadata(config, "claude-sonnet-4-6") + assert result is not config + assert "context_window_limit" not in config diff --git a/tests/strands/models/test_gemini.py b/tests/strands/models/test_gemini.py index fe6936ccc..91a55d899 100644 --- a/tests/strands/models/test_gemini.py +++ b/tests/strands/models/test_gemini.py @@ -79,6 +79,30 @@ def test__init__context_window_limit(gemini_client): assert model.context_window_limit == 1_048_576 +def test__init__auto_populates_context_window_limit(gemini_client): + _ = gemini_client + + model = GeminiModel(model_id="gemini-2.5-flash") + + assert model.get_config().get("context_window_limit") == 1_048_576 + + +def test__init__explicit_context_window_limit_not_overridden(gemini_client): + _ = gemini_client + + model = GeminiModel(model_id="gemini-2.5-flash", context_window_limit=500_000) + + assert model.get_config().get("context_window_limit") == 500_000 + + +def test__init__unknown_model_no_context_window_limit(gemini_client): + _ = gemini_client + + model = GeminiModel(model_id="unknown-model") + + assert model.get_config().get("context_window_limit") is None + + def test_update_config(model, model_id): model.update_config(model_id=model_id) diff --git a/tests/strands/models/test_mistral.py b/tests/strands/models/test_mistral.py index 57189748e..dd2728785 100644 --- a/tests/strands/models/test_mistral.py +++ b/tests/strands/models/test_mistral.py @@ -80,6 +80,30 @@ def test__init__model_configs(mistral_client, model_id, max_tokens): assert actual_temperature == exp_temperature +def test__init__auto_populates_context_window_limit(mistral_client): + _ = mistral_client + + model = MistralModel(model_id="mistral-large-latest", max_tokens=1) + + assert model.get_config().get("context_window_limit") == 262_144 + + +def test__init__explicit_context_window_limit_not_overridden(mistral_client): + _ = mistral_client + + model = MistralModel(model_id="mistral-large-latest", max_tokens=1, context_window_limit=100_000) + + assert model.get_config().get("context_window_limit") == 100_000 + + +def test__init__unknown_model_no_context_window_limit(mistral_client): + _ = mistral_client + + model = MistralModel(model_id="unknown-model", max_tokens=1) + + assert model.get_config().get("context_window_limit") is None + + def test_update_config(model, model_id): model.update_config(model_id=model_id) diff --git a/tests/strands/models/test_openai.py b/tests/strands/models/test_openai.py index b43915b07..613acd163 100644 --- a/tests/strands/models/test_openai.py +++ b/tests/strands/models/test_openai.py @@ -99,6 +99,30 @@ def test__init__context_window_limit(openai_client): assert model.context_window_limit == 128_000 +def test__init__auto_populates_context_window_limit(openai_client): + _ = openai_client + + model = OpenAIModel(model_id="gpt-4o") + + assert model.get_config().get("context_window_limit") == 128_000 + + +def test__init__explicit_context_window_limit_not_overridden(openai_client): + _ = openai_client + + model = OpenAIModel(model_id="gpt-4o", context_window_limit=50_000) + + assert model.get_config().get("context_window_limit") == 50_000 + + +def test__init__unknown_model_no_context_window_limit(openai_client): + _ = openai_client + + model = OpenAIModel(model_id="unknown-model") + + assert model.get_config().get("context_window_limit") is None + + @pytest.mark.parametrize( "content, exp_result", [ diff --git a/tests/strands/models/test_openai_responses.py b/tests/strands/models/test_openai_responses.py index b35d2d0de..97ee9e305 100644 --- a/tests/strands/models/test_openai_responses.py +++ b/tests/strands/models/test_openai_responses.py @@ -71,11 +71,29 @@ def test__init__(model_id): model = OpenAIResponsesModel(model_id=model_id, params={"max_output_tokens": 100}) tru_config = model.get_config() - exp_config = {"model_id": "gpt-4o", "params": {"max_output_tokens": 100}} + exp_config = {"model_id": "gpt-4o", "params": {"max_output_tokens": 100}, "context_window_limit": 128_000} assert tru_config == exp_config +def test__init__auto_populates_context_window_limit(): + model = OpenAIResponsesModel(model_id="gpt-4o") + + assert model.get_config().get("context_window_limit") == 128_000 + + +def test__init__explicit_context_window_limit_not_overridden(): + model = OpenAIResponsesModel(model_id="gpt-4o", context_window_limit=50_000) + + assert model.get_config().get("context_window_limit") == 50_000 + + +def test__init__unknown_model_no_context_window_limit(): + model = OpenAIResponsesModel(model_id="unknown-model") + + assert model.get_config().get("context_window_limit") is None + + def test_update_config(model, model_id): model.update_config(model_id=model_id)