diff --git a/crates/openshell-core/src/inference.rs b/crates/openshell-core/src/inference.rs index 3071d53cd..821d907ca 100644 --- a/crates/openshell-core/src/inference.rs +++ b/crates/openshell-core/src/inference.rs @@ -18,6 +18,15 @@ pub enum AuthHeader { Bearer, /// Custom header name (e.g. `x-api-key` for Anthropic). Custom(&'static str), + /// Do not inject any auth header on outgoing requests. The upstream + /// is expected to authenticate itself — used when the configured + /// `default_base_url` (or operator-supplied base-URL override) points + /// at a translating bridge / proxy that holds operator-side + /// credentials in its own pod and ignores caller-supplied auth. + /// Currently used by the `aws-bedrock` profile, where `SigV4` signing + /// is deferred to a follow-up PR; today the only supported shape is + /// a bridge-fronted upstream. + None, } // --------------------------------------------------------------------------- @@ -69,6 +78,8 @@ const ANTHROPIC_PROTOCOLS: &[&str] = &["anthropic_messages", "model_discovery"]; /// base-URL-override escape hatch path. const VERTEX_AI_PROTOCOLS: &[&str] = &["anthropic_messages", "model_discovery"]; +const AWS_BEDROCK_PROTOCOLS: &[&str] = &["aws_bedrock_invoke", "aws_bedrock_invoke_stream"]; + static OPENAI_PROFILE: InferenceProviderProfile = InferenceProviderProfile { provider_type: "openai", default_base_url: "https://api.openai.com/v1", @@ -155,6 +166,37 @@ static NVIDIA_PROFILE: InferenceProviderProfile = InferenceProviderProfile { passthrough_headers: &["x-model-id"], }; +// AWS Bedrock — registered as bridge-fronted (no router-side auth +// injection). Real AWS Bedrock requires `SigV4` signing of every request, +// which is deferred to a follow-up PR (see #1704 thread). Until then, +// operators point `BEDROCK_BASE_URL` at a translating bridge or +// Bedrock-compatible proxy that handles auth in its own pod. The router +// passes Bedrock InvokeModel requests through opaquely; the L7 patterns +// `/model/{modelId}/invoke` and `/model/{modelId}/invoke-with-response-stream` +// are wired up in `crates/openshell-sandbox/src/l7/inference.rs`. +// +// Note: `default_base_url` is intentionally an empty string. Without +// `BEDROCK_BASE_URL` config, route resolution rejects the provider +// rather than silently forwarding prompts to real AWS Bedrock with +// `auth: None` (which would fail upstream and risks operator +// surprise). Once the `SigV4` follow-up lands, the default can revert +// to `https://bedrock-runtime.us-east-1.amazonaws.com`. +static AWS_BEDROCK_PROFILE: InferenceProviderProfile = InferenceProviderProfile { + provider_type: "aws-bedrock", + default_base_url: "", + protocols: AWS_BEDROCK_PROTOCOLS, + // No single API key for Bedrock — `SigV4` takes four credentials + // (access key id, secret, session token, region) and signs requests + // rather than injecting a header. Until the `SigV4` follow-up lands + // the router-side auth shape is `None` and no credential lookup is + // required at route time. + credential_key_names: &[], + base_url_config_keys: &["BEDROCK_BASE_URL"], + auth: AuthHeader::None, + default_headers: &[], + passthrough_headers: &[], +}; + /// Canonicalize an inference provider type string to a well-known identifier. /// /// Returns `Some(canonical_name)` for recognized inference providers, @@ -167,6 +209,7 @@ pub fn normalize_inference_provider_type(input: &str) -> Option<&'static str> { "openai" => Some("openai"), "anthropic" => Some("anthropic"), "nvidia" => Some("nvidia"), + "aws-bedrock" => Some("aws-bedrock"), "google-vertex-ai" | "vertex" | "vertex-ai" | "google-vertex" | "gcp-vertex" => { Some("google-vertex-ai") } @@ -184,6 +227,7 @@ pub fn profile_for(provider_type: &str) -> Option<&'static InferenceProviderProf "anthropic" => Some(&ANTHROPIC_PROFILE), "nvidia" => Some(&NVIDIA_PROFILE), "google-vertex-ai" => Some(&VERTEX_AI_PROFILE), + "aws-bedrock" => Some(&AWS_BEDROCK_PROFILE), _ => None, } } @@ -303,7 +347,31 @@ mod tests { assert!(profile_for("openai").is_some()); assert!(profile_for("anthropic").is_some()); assert!(profile_for("nvidia").is_some()); + assert!(profile_for("aws-bedrock").is_some()); assert!(profile_for("OpenAI").is_some()); // case insensitive + assert!(profile_for("AWS-Bedrock").is_some()); // case insensitive + } + + #[test] + fn aws_bedrock_uses_no_auth_header() { + let (auth, headers) = auth_for_provider_type("aws-bedrock"); + assert_eq!(auth, AuthHeader::None); + assert!(headers.is_empty()); + } + + #[test] + fn aws_bedrock_profile_has_no_credential_keys() { + let profile = profile_for("aws-bedrock").expect("profile registered"); + // No router-side credential lookup until the `SigV4` follow-up. + assert!(profile.credential_key_names.is_empty()); + assert_eq!(profile.base_url_config_keys, &["BEDROCK_BASE_URL"]); + } + + #[test] + fn aws_bedrock_protocols_are_bedrock_specific() { + let profile = profile_for("aws-bedrock").expect("profile registered"); + assert!(profile.protocols.contains(&"aws_bedrock_invoke")); + assert!(profile.protocols.contains(&"aws_bedrock_invoke_stream")); } #[test] diff --git a/crates/openshell-providers/src/profiles.rs b/crates/openshell-providers/src/profiles.rs index 624ee0711..5dca23763 100644 --- a/crates/openshell-providers/src/profiles.rs +++ b/crates/openshell-providers/src/profiles.rs @@ -19,6 +19,7 @@ use std::sync::OnceLock; const PATH_TEMPLATE_CREDENTIAL_PLACEHOLDER: &str = "{credential}"; const BUILT_IN_PROFILE_YAMLS: &[&str] = &[ + include_str!("../../../providers/aws-bedrock.yaml"), include_str!("../../../providers/claude-code.yaml"), include_str!("../../../providers/codex.yaml"), include_str!("../../../providers/copilot.yaml"), diff --git a/crates/openshell-router/src/backend.rs b/crates/openshell-router/src/backend.rs index 9eb63c88b..2bae7f869 100644 --- a/crates/openshell-router/src/backend.rs +++ b/crates/openshell-router/src/backend.rs @@ -199,6 +199,30 @@ fn prepare_backend_request( body: bytes::Bytes, stream_response: bool, ) -> Result<(reqwest::RequestBuilder, String), RouterError> { + // For AWS Bedrock routes the model id is encoded in the URL path + // (`/model/{modelId}/invoke[-with-response-stream]`), not in the + // JSON body. The caller's path can carry any model id; rewrite it + // to the operator-configured `route.model` so a sandbox cannot + // pick a different upstream model than what `inference set` + // configured. If the path is not a recognized Bedrock shape on a + // Bedrock route, reject the request rather than forwarding + // verbatim. + let rewritten_path: String; + let path = if route_is_bedrock(route) { + match rewrite_bedrock_path(route, path) { + Some(p) => { + rewritten_path = p; + rewritten_path.as_str() + } + None => { + return Err(RouterError::Internal(format!( + "AWS Bedrock route received non-Bedrock path '{path}'; expected /model//invoke[-with-response-stream]" + ))); + } + } + } else { + path + }; let url = build_provider_url(route, &route.model, path, stream_response); let headers = sanitize_request_headers(route, headers); @@ -216,6 +240,13 @@ fn prepare_backend_request( AuthHeader::Custom(header_name) => { builder = builder.header(*header_name, &route.api_key); } + AuthHeader::None => { + // Bridge-fronted upstream: no router-side auth injection. + // The configured `endpoint` is expected to be a translating + // bridge / proxy whose own pod holds operator-side + // credentials. Used today by the `aws-bedrock` profile + // (SigV4 signing is a separate follow-up). + } } for (name, value) in &headers { builder = builder.header(name.as_str(), value.as_str()); @@ -252,6 +283,14 @@ fn prepare_backend_request( // in the body; strip it so Vertex AI does not reject the // request with "Extra inputs are not permitted". obj.remove("model"); + } else if route_is_bedrock(route) { + // AWS Bedrock InvokeModel encodes the model in the URL + // path; the request body is the raw provider-specific + // payload (e.g. an Anthropic Messages body for Claude + // models, a Mistral payload for Mistral models). The + // body must not be mutated — injecting a "model" field + // here would either be silently ignored or rejected as + // an unexpected key by the upstream / bridge. } else { obj.insert( "model".to_string(), @@ -775,6 +814,66 @@ fn build_backend_url(endpoint: &str, path: &str) -> String { format!("{base}{path}") } +/// Check whether a route targets an AWS Bedrock `InvokeModel` endpoint. +/// +/// Returns true when any of the route's protocols is one of the Bedrock +/// invocation protocols. Used to gate Bedrock-specific request shaping +/// (path-segment rewriting, skipped body-model injection) in +/// [`prepare_backend_request`]. +fn route_is_bedrock(route: &ResolvedRoute) -> bool { + route + .protocols + .iter() + .any(|p| p == "aws_bedrock_invoke" || p == "aws_bedrock_invoke_stream") +} + +/// Parse a Bedrock invocation path into its `(model_id, action_suffix)` +/// components. +/// +/// Recognized shapes (caller's path on the way into the router): +/// - `/model//invoke` → action `/invoke` +/// - `/model//invoke-with-response-stream` → action +/// `/invoke-with-response-stream` +/// +/// `` must be non-empty and contain no `/`. A trailing query +/// string is stripped before matching. Returns `None` when the path +/// does not match either shape — the caller treats that as a malformed +/// request and rejects rather than forwarding verbatim. +fn parse_bedrock_invocation_path(path: &str) -> Option<(&str, &'static str)> { + let path_only = path.split('?').next().unwrap_or(path); + let rest = path_only.strip_prefix("/model/")?; + let slash_at = rest.find('/')?; + if slash_at == 0 { + return None; + } + let model_id = &rest[..slash_at]; + let suffix = &rest[slash_at..]; + let action: &'static str = match suffix { + "/invoke" => "/invoke", + "/invoke-with-response-stream" => "/invoke-with-response-stream", + _ => return None, + }; + Some((model_id, action)) +} + +/// Rewrite a Bedrock invocation path so the model segment is the +/// operator-configured `route.model` rather than whatever the caller +/// supplied. Returns the rewritten path on success, or `None` when the +/// inbound path is not a recognized Bedrock invocation shape. +/// +/// Why rewrite rather than reject: the inbound L7 pattern detector +/// already accepts only `/model/{x}/invoke[-with-response-stream]` +/// shapes for Bedrock routes, so a caller-supplied model segment that +/// differs from the operator-configured one is the only case this +/// function changes — and changing it (vs. rejecting) lets sandbox +/// code that hardcodes a different model continue to work, while still +/// guaranteeing the operator's chosen model is what reaches the +/// upstream. +fn rewrite_bedrock_path(route: &ResolvedRoute, path: &str) -> Option { + let (_caller_model, action) = parse_bedrock_invocation_path(path)?; + Some(format!("/model/{}{}", route.model, action)) +} + /// Check whether a route targets a Vertex AI Anthropic rawPredict endpoint. /// /// The predicate is purely structural — it tests `model_in_path`, @@ -800,10 +899,13 @@ fn is_vertex_anthropic_rawpredict_route(route: &ResolvedRoute) -> bool { mod tests { use super::{ ValidationFailure, ValidationFailureKind, build_backend_url, build_provider_url, - verify_backend_endpoint, + parse_bedrock_invocation_path, prepare_backend_request, rewrite_bedrock_path, + route_is_bedrock, verify_backend_endpoint, }; + use crate::RouterError; use crate::config::{DEFAULT_ROUTE_TIMEOUT, ResolvedRoute}; use openshell_core::inference::AuthHeader; + use std::time::Duration; use wiremock::matchers::{body_partial_json, header, method, path}; use wiremock::{Mock, MockServer, ResponseTemplate}; @@ -1670,7 +1772,7 @@ mod tests { ); let headers = vec![("content-type".to_string(), "application/json".to_string())]; - let (builder, _url) = super::prepare_backend_request( + let (builder, _url) = prepare_backend_request( &client, &route, "POST", @@ -1741,7 +1843,7 @@ mod tests { ); let headers = vec![("content-type".to_string(), "application/json".to_string())]; - let (builder, _url) = super::prepare_backend_request( + let (builder, _url) = prepare_backend_request( &client, &route, "POST", @@ -1863,7 +1965,7 @@ mod tests { ); let headers = vec![("content-type".to_string(), "application/json".to_string())]; - let (builder, _url) = super::prepare_backend_request( + let (builder, _url) = prepare_backend_request( &client, &route, "POST", @@ -1925,7 +2027,7 @@ mod tests { ); let headers = vec![("content-type".to_string(), "application/json".to_string())]; - let (builder, _url) = super::prepare_backend_request( + let (builder, _url) = prepare_backend_request( &client, &route, "POST", @@ -1989,7 +2091,7 @@ mod tests { ); let headers = vec![("content-type".to_string(), "application/json".to_string())]; - let (builder, _url) = super::prepare_backend_request( + let (builder, _url) = prepare_backend_request( &client, &route, "POST", @@ -2022,4 +2124,277 @@ mod tests { "Vertex Gemini route must still rewrite the model field, got: {received_body}" ); } + + // ============================================================ + // AWS Bedrock route shaping (path rewriting + body preservation) + // ============================================================ + + /// `parse_bedrock_invocation_path` rejects malformed paths. + #[test] + fn parse_bedrock_invocation_path_rejects_malformed() { + // Empty model id: `/model//invoke` + assert!(parse_bedrock_invocation_path("/model//invoke").is_none()); + // Multi-segment model id: `/model/a/b/invoke` + assert!(parse_bedrock_invocation_path("/model/a/b/invoke").is_none()); + // Unknown action: `/model/foo/converse` + assert!(parse_bedrock_invocation_path("/model/foo/converse").is_none()); + // Wrong prefix: `/v1/messages` + assert!(parse_bedrock_invocation_path("/v1/messages").is_none()); + // Missing slash before action + assert!(parse_bedrock_invocation_path("/model/foo").is_none()); + } + + #[test] + fn parse_bedrock_invocation_path_accepts_invoke() { + let parsed = parse_bedrock_invocation_path( + "/model/anthropic.claude-3-5-sonnet-20241022-v2:0/invoke", + ); + assert_eq!( + parsed, + Some(("anthropic.claude-3-5-sonnet-20241022-v2:0", "/invoke")) + ); + } + + #[test] + fn parse_bedrock_invocation_path_accepts_invoke_with_response_stream() { + let parsed = parse_bedrock_invocation_path( + "/model/anthropic.claude-opus-4-7/invoke-with-response-stream", + ); + assert_eq!( + parsed, + Some(("anthropic.claude-opus-4-7", "/invoke-with-response-stream")) + ); + } + + #[test] + fn parse_bedrock_invocation_path_strips_query_string() { + let parsed = + parse_bedrock_invocation_path("/model/anthropic.claude-opus-4-7/invoke?trace=1"); + assert_eq!(parsed, Some(("anthropic.claude-opus-4-7", "/invoke"))); + } + + /// `route_is_bedrock` matches both Bedrock protocol variants. + #[test] + fn route_is_bedrock_matches_invoke_protocols() { + let invoke_only = test_route( + "https://example.com", + &["aws_bedrock_invoke"], + AuthHeader::None, + ); + assert!(route_is_bedrock(&invoke_only)); + + let stream_only = test_route( + "https://example.com", + &["aws_bedrock_invoke_stream"], + AuthHeader::None, + ); + assert!(route_is_bedrock(&stream_only)); + + let both = test_route( + "https://example.com", + &["aws_bedrock_invoke", "aws_bedrock_invoke_stream"], + AuthHeader::None, + ); + assert!(route_is_bedrock(&both)); + + let openai = test_route( + "https://example.com", + &["openai_chat_completions"], + AuthHeader::Bearer, + ); + assert!(!route_is_bedrock(&openai)); + } + + /// `rewrite_bedrock_path` swaps caller's model segment for the + /// route-configured model on both invoke variants. + #[test] + fn rewrite_bedrock_path_substitutes_operator_model() { + let mut route = test_route( + "https://bedrock-bridge.example", + &["aws_bedrock_invoke", "aws_bedrock_invoke_stream"], + AuthHeader::None, + ); + route.model = "anthropic.claude-opus-4-7".to_string(); + + let rewritten = rewrite_bedrock_path(&route, "/model/some-other-model/invoke"); + assert_eq!( + rewritten, + Some("/model/anthropic.claude-opus-4-7/invoke".to_string()) + ); + + let rewritten_stream = rewrite_bedrock_path(&route, "/model/x/invoke-with-response-stream"); + assert_eq!( + rewritten_stream, + Some("/model/anthropic.claude-opus-4-7/invoke-with-response-stream".to_string()) + ); + } + + #[test] + fn rewrite_bedrock_path_returns_none_for_non_bedrock_path() { + let route = test_route( + "https://bedrock-bridge.example", + &["aws_bedrock_invoke"], + AuthHeader::None, + ); + assert_eq!(rewrite_bedrock_path(&route, "/v1/messages"), None); + assert_eq!(rewrite_bedrock_path(&route, "/model//invoke"), None); + assert_eq!(rewrite_bedrock_path(&route, "/model/a/b/invoke"), None); + } + + /// End-to-end: an inbound Bedrock request that names a different + /// model in the path arrives at the upstream/bridge with the + /// operator's model, and the body is unchanged (no `"model"` + /// injection). + #[tokio::test] + async fn bedrock_route_rewrites_model_in_path_and_preserves_body() { + let mock_server = MockServer::start().await; + let mut route = test_route( + &mock_server.uri(), + &["aws_bedrock_invoke", "aws_bedrock_invoke_stream"], + AuthHeader::None, + ); + route.model = "anthropic.claude-opus-4-7".to_string(); + + // The mock asserts the upstream sees the operator's model in + // the path, NOT the caller's model. + Mock::given(method("POST")) + .and(path("/model/anthropic.claude-opus-4-7/invoke")) + // Caller body has a "model" key; we expect it to pass + // through unchanged. The mock uses body_partial_json so + // additional fields are OK; the assertion below pins the + // body more tightly. + .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!({"ok": true}))) + .mount(&mock_server) + .await; + + let client = reqwest::Client::builder() + .timeout(Duration::from_secs(5)) + .build() + .expect("client"); + + // Caller-supplied body — we deliberately include a "model" + // field naming a DIFFERENT model than the operator's, to + // verify the router does not inject route.model on top of + // it. The body should pass through verbatim because Bedrock + // encodes the model in the path. + let caller_body = serde_json::json!({ + "model": "caller-supplied-model-name", + "messages": [{"role": "user", "content": "hi"}], + }); + + let (builder, url) = prepare_backend_request( + &client, + &route, + "POST", + "/model/some-other-model/invoke", + &[], + bytes::Bytes::from(caller_body.to_string()), + false, + ) + .expect("prepare should succeed"); + + // URL should target the operator's model, not the caller's. + assert!( + url.ends_with("/model/anthropic.claude-opus-4-7/invoke"), + "URL must use operator model, got: {url}" + ); + + let resp = builder.send().await.expect("send"); + assert_eq!(resp.status(), 200); + + // Inspect what wiremock actually received. + let received = mock_server.received_requests().await.expect("requests"); + assert_eq!(received.len(), 1); + let req = &received[0]; + let received_body: serde_json::Value = + serde_json::from_slice(&req.body).expect("json body"); + // Caller's model name should pass through (NOT replaced by + // route.model). This proves the body is untouched. + assert_eq!( + received_body.get("model").and_then(|v| v.as_str()), + Some("caller-supplied-model-name"), + "Bedrock route must NOT rewrite body model, got: {received_body}" + ); + assert!( + received_body.get("messages").is_some(), + "messages field should pass through unchanged" + ); + } + + /// Streaming variant: the same path-rewrite + body-preservation + /// contract applies to invoke-with-response-stream. + #[tokio::test] + async fn bedrock_route_streaming_rewrites_model_in_path() { + let mock_server = MockServer::start().await; + let mut route = test_route( + &mock_server.uri(), + &["aws_bedrock_invoke", "aws_bedrock_invoke_stream"], + AuthHeader::None, + ); + route.model = "anthropic.claude-opus-4-7".to_string(); + + Mock::given(method("POST")) + .and(path( + "/model/anthropic.claude-opus-4-7/invoke-with-response-stream", + )) + .respond_with(ResponseTemplate::new(200).set_body_string("event: ok\n\n")) + .mount(&mock_server) + .await; + + let client = reqwest::Client::builder() + .timeout(Duration::from_secs(5)) + .build() + .expect("client"); + + let (builder, url) = prepare_backend_request( + &client, + &route, + "POST", + "/model/another-model/invoke-with-response-stream", + &[], + bytes::Bytes::from(r#"{"messages":[]}"#), + true, + ) + .expect("prepare should succeed"); + + assert!( + url.ends_with("/model/anthropic.claude-opus-4-7/invoke-with-response-stream"), + "Streaming URL must use operator model, got: {url}" + ); + + let resp = builder.send().await.expect("send"); + assert_eq!(resp.status(), 200); + } + + /// Defense-in-depth: a Bedrock route receiving a non-Bedrock path + /// is rejected rather than forwarded. The L7 pattern detector + /// upstream of the router should never produce this combination, + /// but if it ever did, we must not silently forward. + #[test] + fn bedrock_route_rejects_non_bedrock_path() { + let client = reqwest::Client::new(); + let route = test_route( + "https://bedrock-bridge.example", + &["aws_bedrock_invoke"], + AuthHeader::None, + ); + let result = prepare_backend_request( + &client, + &route, + "POST", + "/v1/messages", + &[], + bytes::Bytes::from(r"{}"), + false, + ); + match result { + Err(RouterError::Internal(msg)) => { + assert!( + msg.contains("Bedrock") && msg.contains("/v1/messages"), + "error must name the offending path, got: {msg}" + ); + } + other => panic!("expected RouterError::Internal, got {other:?}"), + } + } } diff --git a/crates/openshell-sandbox/src/l7/inference.rs b/crates/openshell-sandbox/src/l7/inference.rs index ec789ef95..10b58cc36 100644 --- a/crates/openshell-sandbox/src/l7/inference.rs +++ b/crates/openshell-sandbox/src/l7/inference.rs @@ -60,7 +60,7 @@ impl InferenceApiPattern { } } -/// Default patterns for known inference APIs (`OpenAI`, Anthropic). +/// Default patterns for known inference APIs (`OpenAI`, Anthropic, AWS Bedrock). pub fn default_patterns() -> Vec { vec![ InferenceApiPattern { @@ -114,10 +114,42 @@ pub fn default_patterns() -> Vec { kind: "models_get".to_string(), framing: ResponseFraming::Buffered, }, + // AWS Bedrock InvokeModel + InvokeModelWithResponseStream. The `*` + // segment is the Bedrock model id (e.g. `anthropic.claude-opus-4-7`). + // + // Framing differs between the two endpoints. InvokeModel returns ONE + // JSON object the client decodes whole — it must be served buffered + // with an accurate `Content-Length`, otherwise the streaming proxy's + // size-cap or idle-timeout failure mode would append an SSE error + // event to bytes the caller decodes as one JSON object, silently + // corrupting it. InvokeModelWithResponseStream returns an + // AWS event-stream of binary chunks and must go through the + // streaming path so chunks reach the agent incrementally. + InferenceApiPattern { + method: "POST".to_string(), + path_glob: "/model/*/invoke".to_string(), + protocol: "aws_bedrock_invoke".to_string(), + kind: "messages".to_string(), + framing: ResponseFraming::Buffered, + }, + InferenceApiPattern { + method: "POST".to_string(), + path_glob: "/model/*/invoke-with-response-stream".to_string(), + protocol: "aws_bedrock_invoke_stream".to_string(), + kind: "messages".to_string(), + framing: ResponseFraming::Streaming, + }, ] } /// Check if an HTTP request matches a known inference API pattern. +/// +/// Path globs support two wildcard shapes (one per pattern, not both): +/// - **Trailing `/*`**: `/v1/models/*` matches `/v1/models` and any +/// `/v1/models/` (one or many path segments). +/// - **Middle `/*/`**: `/model/*/invoke` matches `/model//invoke` +/// for a single non-empty segment that contains no `/`. Used for +/// AWS Bedrock's `/model/{modelId}/invoke[-with-response-stream]`. pub fn detect_inference_pattern<'a>( method: &str, path: &str, @@ -137,6 +169,21 @@ pub fn detect_inference_pattern<'a>( .is_some_and(|suffix| suffix.starts_with('/')); } + if let Some((before, after)) = p.path_glob.split_once("/*/") { + let Some(rest) = path_only.strip_prefix(before) else { + return false; + }; + let Some(rest) = rest.strip_prefix('/') else { + return false; + }; + // rest must look like `/` where is non-empty + // and contains no `/` (single path segment). + let Some(slash_at) = rest.find('/') else { + return false; + }; + return slash_at > 0 && rest[slash_at + 1..] == *after; + } + path_only == p.path_glob }) } @@ -543,6 +590,103 @@ mod tests { } } + #[test] + fn detect_aws_bedrock_invoke() { + let patterns = default_patterns(); + let result = + detect_inference_pattern("POST", "/model/anthropic.claude-opus-4-7/invoke", &patterns); + assert!(result.is_some()); + assert_eq!(result.unwrap().protocol, "aws_bedrock_invoke"); + assert_eq!(result.unwrap().kind, "messages"); + } + + #[test] + fn detect_aws_bedrock_invoke_stream() { + let patterns = default_patterns(); + let result = detect_inference_pattern( + "POST", + "/model/anthropic.claude-opus-4-7/invoke-with-response-stream", + &patterns, + ); + assert!(result.is_some()); + assert_eq!(result.unwrap().protocol, "aws_bedrock_invoke_stream"); + } + + #[test] + fn aws_bedrock_invoke_with_query_string() { + let patterns = default_patterns(); + let result = detect_inference_pattern("POST", "/model/foo.bar/invoke?trace=1", &patterns); + assert!(result.is_some()); + assert_eq!(result.unwrap().protocol, "aws_bedrock_invoke"); + } + + #[test] + fn aws_bedrock_rejects_empty_model_id() { + let patterns = default_patterns(); + // `/model//invoke` — empty wildcard segment is not a valid Bedrock id. + assert!(detect_inference_pattern("POST", "/model//invoke", &patterns).is_none()); + } + + #[test] + fn aws_bedrock_rejects_multi_segment_model_id() { + let patterns = default_patterns(); + // The `*` matches a single path segment only; multi-segment ids must + // not match (would be a path-traversal liability otherwise). + assert!(detect_inference_pattern("POST", "/model/foo/bar/invoke", &patterns).is_none()); + } + + #[test] + fn aws_bedrock_rejects_get() { + let patterns = default_patterns(); + assert!( + detect_inference_pattern("GET", "/model/anthropic.claude-opus-4-7/invoke", &patterns) + .is_none() + ); + } + + #[test] + fn aws_bedrock_rejects_unknown_action() { + let patterns = default_patterns(); + assert!(detect_inference_pattern("POST", "/model/foo/converse", &patterns).is_none()); + } + + /// `InvokeModel` returns one JSON object — must be served buffered. + /// Sending it through the streaming proxy would risk truncation or an + /// appended SSE error event corrupting the JSON body the caller decodes. + #[test] + fn aws_bedrock_invoke_is_buffered() { + let patterns = default_patterns(); + let invoke = + detect_inference_pattern("POST", "/model/anthropic.claude-opus-4-7/invoke", &patterns) + .expect("InvokeModel pattern must match"); + assert_eq!(invoke.protocol, "aws_bedrock_invoke"); + assert!( + invoke.is_buffered(), + "InvokeModel must be Buffered (one JSON object, accurate Content-Length); \ + streaming would risk corrupting the response" + ); + } + + /// `InvokeModelWithResponseStream` returns an AWS event-stream of + /// binary chunks — must go through the streaming proxy so chunks + /// reach the agent incrementally. + #[test] + fn aws_bedrock_invoke_stream_is_streaming() { + let patterns = default_patterns(); + let stream = detect_inference_pattern( + "POST", + "/model/anthropic.claude-opus-4-7/invoke-with-response-stream", + &patterns, + ) + .expect("InvokeModelWithResponseStream pattern must match"); + assert_eq!(stream.protocol, "aws_bedrock_invoke_stream"); + assert!( + !stream.is_buffered(), + "InvokeModelWithResponseStream must be Streaming so the AWS \ + event-stream chunks reach the agent incrementally" + ); + } + #[test] fn parse_simple_post_request() { let body = b"{\"hello\":true}"; diff --git a/crates/openshell-server/src/grpc/provider.rs b/crates/openshell-server/src/grpc/provider.rs index 4552fceae..10c10868e 100644 --- a/crates/openshell-server/src/grpc/provider.rs +++ b/crates/openshell-server/src/grpc/provider.rs @@ -1821,6 +1821,7 @@ mod tests { assert_eq!( ids, vec![ + "aws-bedrock", "claude-code", "codex", "copilot", diff --git a/crates/openshell-server/src/inference.rs b/crates/openshell-server/src/inference.rs index 13496cd99..519039c75 100644 --- a/crates/openshell-server/src/inference.rs +++ b/crates/openshell-server/src/inference.rs @@ -620,26 +620,33 @@ fn resolve_provider_route( let profile = openshell_core::inference::profile_for(&provider_type).ok_or_else(|| { Status::invalid_argument(format!( "provider '{name}' has unsupported type '{raw_provider_type}' for cluster inference \ - (supported: openai, anthropic, nvidia, google-vertex-ai)", + (supported: openai, anthropic, nvidia, google-vertex-ai, aws-bedrock)", name = provider.object_name() )) })?; - let api_key = find_provider_api_key( - provider, - profile.credential_key_names, - if provider_type == "google-vertex-ai" { - CredentialLookup::PreferredOnly - } else { - CredentialLookup::PreferredThenAny - }, - ) - .ok_or_else(|| { - Status::invalid_argument(format!( - "provider '{name}' has no usable API key credential", - name = provider.object_name() - )) - })?; + // Profiles with `auth: None` are bridge-fronted — the upstream + // authenticates itself, so the router doesn't need a credential at + // route-resolution time. Today this is `aws-bedrock`. + let api_key = if matches!(profile.auth, openshell_core::inference::AuthHeader::None) { + String::new() + } else { + find_provider_api_key( + provider, + profile.credential_key_names, + if provider_type == "google-vertex-ai" { + CredentialLookup::PreferredOnly + } else { + CredentialLookup::PreferredThenAny + }, + ) + .ok_or_else(|| { + Status::invalid_argument(format!( + "provider '{name}' has no usable API key credential", + name = provider.object_name() + )) + })? + }; // Vertex AI requires a model-aware URL; delegate to specialised resolver. if provider_type == "google-vertex-ai" { @@ -1059,6 +1066,139 @@ mod tests { assert_eq!(config.model_id, "gpt-4.1"); } + #[tokio::test] + async fn upsert_cluster_route_succeeds_for_aws_bedrock_with_bridge_url() { + // aws-bedrock is registered with `auth: AuthHeader::None` (the + // bridge-fronted shape) so route resolution does NOT require a + // real API key — but `provider create` still requires a + // non-empty credentials map at the gRPC layer, so operators + // pass a placeholder credential per the docs. The router + // ignores it on the outbound path. + // + // The other half of the contract is `BEDROCK_BASE_URL`: with + // `default_base_url: ""` in the core profile, providers + // without it fail route resolution rather than silently + // forwarding prompts to AWS Bedrock with no usable auth. This + // test pins down the success path. + let store = test_store().await; + + let provider = Provider { + metadata: Some(openshell_core::proto::datamodel::v1::ObjectMeta { + id: "provider-bedrock-bridge".to_string(), + name: "bedrock-bridge".to_string(), + created_at_ms: 1_000_000, + labels: std::collections::HashMap::new(), + resource_version: 0, + }), + r#type: "aws-bedrock".to_string(), + // Placeholder credential — the router ignores it because + // auth: None skips header injection. Mirrors the + // doc-recommended `--credential AWS_ACCESS_KEY_ID=unused-bridge-fronted-shape`. + credentials: std::iter::once(( + "AWS_ACCESS_KEY_ID".to_string(), + "unused-bridge-fronted-shape".to_string(), + )) + .collect(), + config: std::iter::once(( + "BEDROCK_BASE_URL".to_string(), + "http://bedrock-bridge.demo.svc.cluster.local:8080".to_string(), + )) + .collect(), + credential_expires_at_ms: std::collections::HashMap::new(), + }; + store + .put_message(&provider) + .await + .expect("provider should persist"); + + let upserted = upsert_cluster_inference_route( + &store, + CLUSTER_INFERENCE_ROUTE_NAME, + "bedrock-bridge", + "anthropic.claude-3-5-sonnet-20241022-v2:0", + 0, + false, + ) + .await + .expect("upsert should succeed for aws-bedrock provider"); + + assert_eq!(upserted.route.object_name(), CLUSTER_INFERENCE_ROUTE_NAME); + let config = upserted.route.config.as_ref().expect("config"); + assert_eq!(config.provider_name, "bedrock-bridge"); + assert_eq!(config.model_id, "anthropic.claude-3-5-sonnet-20241022-v2:0"); + + // Verify the resolved route metadata reflects bridge-fronted + // auth (empty api_key + provider_type = "aws-bedrock"). Note + // the api_key is empty even though the provider has a + // credential — auth: None skips api-key lookup entirely. + let managed = resolve_route_by_name(&store, CLUSTER_INFERENCE_ROUTE_NAME) + .await + .expect("route should resolve") + .expect("managed route should exist"); + assert_eq!(managed.provider_type, "aws-bedrock"); + assert_eq!( + managed.base_url, + "http://bedrock-bridge.demo.svc.cluster.local:8080" + ); + assert_eq!(managed.api_key, ""); + } + + #[tokio::test] + async fn upsert_cluster_route_rejects_aws_bedrock_without_bedrock_base_url() { + // The companion to upsert_cluster_route_succeeds_for_aws_bedrock_with_bridge_url: + // an aws-bedrock provider without BEDROCK_BASE_URL must be + // rejected at route resolution. This pins down the safety + // contract johntmyers asked for — until the SigV4 follow-up + // lands, the router must NOT silently forward prompts to AWS + // with auth: None. + // + // Mechanism: AWS_BEDROCK_PROFILE.default_base_url is "". When + // the provider has no BEDROCK_BASE_URL config, base_url + // resolves to empty, triggering the existing + // empty-base_url check in resolve_provider_route. + let store = test_store().await; + + let provider = Provider { + metadata: Some(openshell_core::proto::datamodel::v1::ObjectMeta { + id: "provider-bedrock-misconfigured".to_string(), + name: "bedrock-misconfigured".to_string(), + created_at_ms: 1_000_000, + labels: std::collections::HashMap::new(), + resource_version: 0, + }), + r#type: "aws-bedrock".to_string(), + credentials: std::iter::once(( + "AWS_ACCESS_KEY_ID".to_string(), + "unused-bridge-fronted-shape".to_string(), + )) + .collect(), + // Intentionally no BEDROCK_BASE_URL. + config: std::collections::HashMap::new(), + credential_expires_at_ms: std::collections::HashMap::new(), + }; + store + .put_message(&provider) + .await + .expect("provider should persist"); + + let err = upsert_cluster_inference_route( + &store, + CLUSTER_INFERENCE_ROUTE_NAME, + "bedrock-misconfigured", + "anthropic.claude-3-5-sonnet-20241022-v2:0", + 0, + false, + ) + .await + .expect_err("upsert should reject aws-bedrock provider without BEDROCK_BASE_URL"); + assert_eq!(err.code(), tonic::Code::InvalidArgument); + assert!( + err.message().contains("empty base_url"), + "error should name the missing base_url, got: {}", + err.message() + ); + } + #[tokio::test] async fn resolve_managed_route_returns_none_when_missing() { let store = test_store().await; diff --git a/docs/sandboxes/inference-routing.mdx b/docs/sandboxes/inference-routing.mdx index 0a4e9d726..e9ec37d85 100644 --- a/docs/sandboxes/inference-routing.mdx +++ b/docs/sandboxes/inference-routing.mdx @@ -24,9 +24,9 @@ If code calls an external inference host directly, OpenShell evaluates that traf | Property | Detail | |---|---| | Credentials | No sandbox API keys needed. Credentials come from the configured provider record. The router strips caller-supplied `Authorization` before forwarding the request. | -| Header forwarding | `inference.local` forwards only a per-provider header allowlist. OpenAI routes allow `openai-organization` and `x-model-id`. Anthropic routes allow `anthropic-version` and `anthropic-beta`. Vertex Claude rawPredict routes strip `anthropic-beta` and do not forward `anthropic-version` as a header because the router injects `anthropic_version` into the Vertex request body. NVIDIA routes allow `x-model-id`. All other caller headers are stripped. | +| Header forwarding | `inference.local` forwards only a per-provider header allowlist. OpenAI routes allow `openai-organization` and `x-model-id`. Anthropic routes allow `anthropic-version` and `anthropic-beta`. Vertex Claude rawPredict routes strip `anthropic-beta` and do not forward `anthropic-version` as a header because the router injects `anthropic_version` into the Vertex request body. NVIDIA routes allow `x-model-id`. AWS Bedrock routes have no passthrough headers today. All other caller headers are stripped. | | Configuration | One provider and one model define sandbox inference for the active gateway. Every sandbox on that gateway sees the same `inference.local` backend. | -| Provider support | NVIDIA, Anthropic, Google Vertex AI, and any OpenAI-compatible provider all work through the same endpoint. Vertex routes Claude models through `/v1/messages` and non-Anthropic models through `/v1/chat/completions`. The gateway resolves the upstream Vertex host from the provider config, including regional, global, and supported multi-region endpoints. | +| Provider support | NVIDIA, Anthropic, Google Vertex AI, AWS Bedrock (via a translating bridge — direct AWS with SigV4 signing is a separate follow-up), and any OpenAI-compatible provider all work through the same endpoint. Vertex routes Claude models through `/v1/messages` and non-Anthropic models through `/v1/chat/completions`. The gateway resolves the upstream Vertex host from the provider config, including regional, global, and supported multi-region endpoints. | | Streaming reliability | The router tolerates idle gaps of up to 120 seconds between streamed chunks so long reasoning responses are not cut off mid-stream. | | Hot refresh | OpenShell picks up provider credential changes and inference updates without recreating sandboxes. Changes propagate within about 5 seconds by default. | @@ -54,6 +54,21 @@ Supported request patterns depend on the provider configured for `inference.loca |---|---|---| | Messages | `POST` | `/v1/messages` | + + + + +| Pattern | Method | Path | +|---|---|---| +| InvokeModel | `POST` | `/model/{modelId}/invoke` | +| InvokeModelWithResponseStream | `POST` | `/model/{modelId}/invoke-with-response-stream` | + +The `{modelId}` segment is constrained to a single non-empty path segment to avoid path-traversal liabilities. `/model//invoke` and `/model/a/b/invoke` both no-match. + + +Today the `aws-bedrock` provider type is bridge-fronted only. The router does not inject any auth header on outbound requests; the configured `BEDROCK_BASE_URL` is expected to point at a translating bridge or Bedrock-compatible proxy whose own pod holds operator-side credentials. SigV4 signing for direct AWS Bedrock is deferred to a follow-up release. + + @@ -148,6 +163,35 @@ openshell provider create --name anthropic-prod --type anthropic --from-existing This reads `ANTHROPIC_API_KEY` from your environment. + + + + +```shell +openshell provider create \ + --name bedrock-bridge \ + --type aws-bedrock \ + --credential AWS_ACCESS_KEY_ID=unused-bridge-fronted-shape \ + --config BEDROCK_BASE_URL=http://your-bedrock-bridge.your-ns.svc.cluster.local:8080 +``` + +Then set the inference route, passing `--no-verify` because the validation probe does not yet support Bedrock protocols: + +```shell +openshell inference set \ + --provider bedrock-bridge \ + --model anthropic.claude-3-5-sonnet-20241022-v2:0 \ + --no-verify +``` + +**Why a placeholder credential?** `provider create` requires a non-empty `credentials` map. The bridge-fronted shape ignores AWS credentials at the router level (the bridge holds operator-side auth in its own pod), so any string value satisfies the structural requirement; `unused-bridge-fronted-shape` makes the intent obvious in `openshell provider get` output. When the SigV4 follow-up lands, this becomes a real key. + +**About the bridge-fronted shape.** The router does not inject any auth header on outbound requests. Point `BEDROCK_BASE_URL` at a translating bridge or Bedrock-compatible proxy that handles authentication in its own pod. The bridge is expected to accept Bedrock InvokeModel requests on the patterns listed above and forward to the operator's real upstream. + +**About `--no-verify`.** The default validation probe does not yet recognize the `aws_bedrock_invoke` and `aws_bedrock_invoke_stream` protocols, so without `--no-verify` the `inference set` call would fail before it could mint a route. The first sandbox round-trip is the real verification today. + +**For direct AWS Bedrock**, refer to a future release that adds the SigV4 router-side signer. Until then, a `BEDROCK_BASE_URL` is required at provider-create time — the core profile sets `default_base_url: ""`, so route resolution rejects providers without it rather than silently forwarding prompts to AWS with no usable auth. + diff --git a/docs/sandboxes/manage-providers.mdx b/docs/sandboxes/manage-providers.mdx index a6b9654d0..bc522e2ce 100644 --- a/docs/sandboxes/manage-providers.mdx +++ b/docs/sandboxes/manage-providers.mdx @@ -247,6 +247,7 @@ The following provider types are supported. | Type | Environment Variables Injected | Typical Use | |---|---|---| | `anthropic` | `ANTHROPIC_API_KEY` | Anthropic API | +| `aws-bedrock` | `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`, `AWS_SESSION_TOKEN`, `AWS_REGION` | AWS Bedrock InvokeModel via a translating bridge. Today the router does not inject any auth header; the configured `BEDROCK_BASE_URL` upstream is expected to handle auth itself. Refer to [Inference Routing](/sandboxes/inference-routing). | | `claude` | `ANTHROPIC_API_KEY`, `CLAUDE_API_KEY` | Claude Code, Anthropic API | | `codex` | `OPENAI_API_KEY` | OpenAI Codex | | `copilot` | `COPILOT_GITHUB_TOKEN`, `GH_TOKEN`, `GITHUB_TOKEN` | GitHub Copilot CLI | @@ -273,6 +274,7 @@ The following providers have been tested with `inference.local`. Any provider th | Provider | Name | Type | Base URL | API Key Variable | |---|---|---|---|---| +| AWS Bedrock (via bridge) | `bedrock-bridge` | `aws-bedrock` | Operator-supplied `BEDROCK_BASE_URL` | None at router level (bridge holds creds) | | NVIDIA API Catalog | `nvidia-prod` | `nvidia` | `https://integrate.api.nvidia.com/v1` | `NVIDIA_API_KEY` | | Anthropic | `anthropic-prod` | `anthropic` | `https://api.anthropic.com` | `ANTHROPIC_API_KEY` | | Google Vertex AI | `vertex-prod` | `google-vertex-ai` | Regional, global, or multi-region Vertex endpoint | `GOOGLE_VERTEX_AI_TOKEN` or `GOOGLE_VERTEX_AI_SERVICE_ACCOUNT_TOKEN` | diff --git a/providers/aws-bedrock.yaml b/providers/aws-bedrock.yaml new file mode 100644 index 000000000..e284a3bd9 --- /dev/null +++ b/providers/aws-bedrock.yaml @@ -0,0 +1,57 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +id: aws-bedrock +display_name: AWS Bedrock +description: | + Anthropic + Mistral + Llama models served via the AWS Bedrock InvokeModel API. + + Today this profile supports a bridge-fronted shape only: operators + point `BEDROCK_BASE_URL` at a translating bridge or + Bedrock-compatible proxy that handles auth in its own pod. Direct + AWS Bedrock with router-side SigV4 signing is a separate follow-up; + until that lands the AWS_* credentials below are declarative + schema only — none are required, none are auto-discovered, and the + router does not consume them. +category: inference +inference_capable: true +credentials: + # Declarative-only until the SigV4 follow-up lands. None of these are + # required for the bridge-fronted shape; the router does not inject + # them on outbound requests. + - name: aws_access_key_id + description: AWS access key id (used by the SigV4 signer follow-up; unused today) + env_vars: [AWS_ACCESS_KEY_ID] + required: false + - name: aws_secret_access_key + description: AWS secret access key (used by the SigV4 signer follow-up; unused today) + env_vars: [AWS_SECRET_ACCESS_KEY] + required: false + - name: aws_session_token + description: Optional session token for temporary credentials (STS, IAM Roles for Service Accounts) + env_vars: [AWS_SESSION_TOKEN] + required: false + - name: aws_region + description: AWS region (used by the SigV4 signer follow-up; unused today) + env_vars: [AWS_REGION, AWS_DEFAULT_REGION] + required: false +discovery: + # Bridge-fronted routing intentionally does not consume AWS + # credentials, so `--from-existing` does not scan for them today. + # The SigV4 follow-up will repopulate this list. + credentials: [] +endpoints: + # Default endpoint targets us-east-1 for the SigV4-fronted shape that + # comes later. The YAML loader does not yet substitute a `{region}` + # placeholder; operators in other regions or running against a bridge + # override via the `BEDROCK_BASE_URL` config-key (mirrors how the + # `anthropic` profile accepts `ANTHROPIC_BASE_URL`). Until then the + # core profile sets `default_base_url: ""` so route resolution + # rejects providers without `BEDROCK_BASE_URL` rather than silently + # forwarding prompts to AWS with no usable auth. + - host: bedrock-runtime.us-east-1.amazonaws.com + port: 443 + protocol: rest + access: read-write + enforcement: enforce +binaries: [/usr/bin/claude, /usr/local/bin/claude]