diff --git a/.release-please-manifest.json b/.release-please-manifest.json index 26b1ce2..2b2b4fa 100644 --- a/.release-please-manifest.json +++ b/.release-please-manifest.json @@ -1,3 +1,3 @@ { - ".": "0.50.0" + ".": "0.51.0" } \ No newline at end of file diff --git a/.stats.yml b/.stats.yml index bed1034..d58220c 100644 --- a/.stats.yml +++ b/.stats.yml @@ -1,4 +1,4 @@ configured_endpoints: 112 -openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/kernel%2Fkernel-686a9addd4f9356ca26ff3ff04e1a11466d77a412859829075566394922b715d.yml -openapi_spec_hash: 7a9e9c2023400d44bcbfb87b7ec07708 +openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/kernel%2Fkernel-a674e3c4c0063942621d1b4e7f67b72f7e240c12dd88564fe16627618ba33dd6.yml +openapi_spec_hash: 8b97c87f0dafe5fc5e5a7365f3687755 config_hash: 08d55086449943a8fec212b870061a3f diff --git a/CHANGELOG.md b/CHANGELOG.md index 8789b91..139da21 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,54 @@ # Changelog +## 0.51.0 (2026-04-25) + +Full Changelog: [v0.50.0...v0.51.0](https://github.com/kernel/kernel-python-sdk/compare/v0.50.0...v0.51.0) + +### Features + +* add browser-scoped session client ([7ca6887](https://github.com/kernel/kernel-python-sdk/commit/7ca68877e7011bb83862b7cc810a20d8254ea7dd)) +* Expire stuck IN_PROGRESS managed auth sessions via background worker ([7781a3b](https://github.com/kernel/kernel-python-sdk/commit/7781a3b4635ded02dea60adf85878f50f7b7fb27)) +* Expose browser_session_id on managed auth connection ([0ccb507](https://github.com/kernel/kernel-python-sdk/commit/0ccb50744032b4c31e0575fa7b06fb20503c8f55)) +* generate browser-scoped resource bindings ([53b17c8](https://github.com/kernel/kernel-python-sdk/commit/53b17c8241cc71261d1e96f5929cbd4f05b2064b)) + + +### Bug Fixes + +* address python browser routing ci follow-ups ([9690923](https://github.com/kernel/kernel-python-sdk/commit/9690923666cfe07de76267eee050d7743a8bad6f)) +* evict deleted browser routes ([a873a18](https://github.com/kernel/kernel-python-sdk/commit/a873a18eba3f36937dc177ab981372e395722f8b)) +* finish python browser routing cleanup ([694907a](https://github.com/kernel/kernel-python-sdk/commit/694907ab3419477e7058b85a7365ac4cce941105)) +* normalize browser route cache session IDs ([f4c247b](https://github.com/kernel/kernel-python-sdk/commit/f4c247b425680b54d0ce3c7738fb82313bca7918)) +* normalize python browser request string bodies ([3ce80e7](https://github.com/kernel/kernel-python-sdk/commit/3ce80e767d373b638ba1c2959bf18bf999629db0)) +* quiet generator-script pyright noise ([0bdf85e](https://github.com/kernel/kernel-python-sdk/commit/0bdf85e0c38d4813056b61599273e88c7a64713a)) +* reserve internal browser request query params ([b2c7aac](https://github.com/kernel/kernel-python-sdk/commit/b2c7aacac09a1bb7680cf493e9985438b169286c)) +* satisfy browser-scoped lint checks ([8e8dde2](https://github.com/kernel/kernel-python-sdk/commit/8e8dde241c8817944baaacd155fe196f200868e8)) +* satisfy generated browser-scoped type checks ([b410245](https://github.com/kernel/kernel-python-sdk/commit/b410245e1ad4bf8e29c17c59a5931654567b141f)) +* sniff browser pool route cache updates ([5328730](https://github.com/kernel/kernel-python-sdk/commit/532873072f0400029768d1b7cf54b9fb1428ada9)) +* type-check browser-scoped helpers ([cfff5b4](https://github.com/kernel/kernel-python-sdk/commit/cfff5b4c3635d327dd1ac0779d4e17e395efbec0)) + + +### Chores + +* fix browser-scoped test import order ([fc34859](https://github.com/kernel/kernel-python-sdk/commit/fc34859c4f60f84038b425d9930c512e58134dea)) +* **internal:** more robust bootstrap script ([6c9cdf3](https://github.com/kernel/kernel-python-sdk/commit/6c9cdf3ce828fab358c7e060f4e3313408cad257)) +* keep browser-scoped generator lint clean ([a80716b](https://github.com/kernel/kernel-python-sdk/commit/a80716b791bf1f707aa7869290c47caefb0d9e27)) + + +### Documentation + +* flesh out browser-scoped example ([ca5d188](https://github.com/kernel/kernel-python-sdk/commit/ca5d1884b590634df5623945e9585e0a66228ec3)) + + +### Refactors + +* clean up python browser routing diff ([622f844](https://github.com/kernel/kernel-python-sdk/commit/622f8448a8a32f00b41d6e4890bfaf0a9374bd3e)) +* drop browser-scoped wrapper clients ([dba503e](https://github.com/kernel/kernel-python-sdk/commit/dba503e832d54aa8d462d3d74b3027f8a9e865b6)) +* inline browser resource passthrough returns ([02a2f59](https://github.com/kernel/kernel-python-sdk/commit/02a2f595c7e76ae7f0cea2ec1e88075df3a25be1)) +* move python browser routing rollout to env ([0647d5c](https://github.com/kernel/kernel-python-sdk/commit/0647d5cab166e680bcb3436d1b502c3215492400)) +* rename browser routing subresources config ([3ae9dab](https://github.com/kernel/kernel-python-sdk/commit/3ae9dab6b841e6f1191cdde073e36696f97feb39)) +* simplify browser routing cache ([de0476f](https://github.com/kernel/kernel-python-sdk/commit/de0476fc043df48a58dd4067bb4b3c0fe7a83f0e)) +* sniff browser routes in response hooks ([563de7d](https://github.com/kernel/kernel-python-sdk/commit/563de7d0ac8f141320edb060b4671935808e473a)) + ## 0.50.0 (2026-04-20) Full Changelog: [v0.49.0...v0.50.0](https://github.com/kernel/kernel-python-sdk/compare/v0.49.0...v0.50.0) diff --git a/examples/browser_routing.py b/examples/browser_routing.py new file mode 100644 index 0000000..68627ab --- /dev/null +++ b/examples/browser_routing.py @@ -0,0 +1,25 @@ +"""Example: direct-to-VM browser routing for process exec and raw HTTP.""" + +from typing import Any, cast + +import httpx + +from kernel import Kernel + + +def main() -> None: + with Kernel() as client: + browsers = cast(Any, client.browsers) + browser = browsers.create(headless=True) + try: + response = cast(httpx.Response, browsers.request(browser.session_id, "GET", "https://example.com")) + print("status", response.status_code) + + with browsers.stream(browser.session_id, "GET", "https://example.com") as streamed: + print("streamed-bytes", len(streamed.read())) + finally: + browsers.delete_by_id(browser.session_id) + + +if __name__ == "__main__": + main() diff --git a/pyproject.toml b/pyproject.toml index 5516926..6ff6d3c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "kernel" -version = "0.50.0" +version = "0.51.0" description = "The official Python library for the kernel API" dynamic = ["readme"] license = "Apache-2.0" diff --git a/scripts/bootstrap b/scripts/bootstrap index b430fee..fe8451e 100755 --- a/scripts/bootstrap +++ b/scripts/bootstrap @@ -4,7 +4,7 @@ set -e cd "$(dirname "$0")/.." -if [ -f "Brewfile" ] && [ "$(uname -s)" = "Darwin" ] && [ "$SKIP_BREW" != "1" ] && [ -t 0 ]; then +if [ -f "Brewfile" ] && [ "$(uname -s)" = "Darwin" ] && [ "${SKIP_BREW:-}" != "1" ] && [ -t 0 ]; then brew bundle check >/dev/null 2>&1 || { echo -n "==> Install Homebrew dependencies? (y/N): " read -r response diff --git a/src/kernel/_client.py b/src/kernel/_client.py index 75fe4b6..9b5f867 100644 --- a/src/kernel/_client.py +++ b/src/kernel/_client.py @@ -3,7 +3,7 @@ from __future__ import annotations import os -from typing import TYPE_CHECKING, Any, Dict, Mapping, cast +from typing import TYPE_CHECKING, Any, Dict, Type, Mapping, cast from typing_extensions import Self, Literal, override import httpx @@ -14,6 +14,7 @@ Omit, Timeout, NotGiven, + ResponseT, Transport, ProxiesTypes, RequestOptions, @@ -21,6 +22,7 @@ ) from ._utils import is_given, get_async_library from ._compat import cached_property +from ._models import FinalRequestOptions from ._version import __version__ from ._streaming import Stream as Stream, AsyncStream as AsyncStream from ._exceptions import KernelError, APIStatusError @@ -29,6 +31,15 @@ SyncAPIClient, AsyncAPIClient, ) +from .lib.browser_routing.routing import ( + BrowserRouteCache, + BrowserRoutingConfig, + strip_direct_vm_auth, + rewrite_direct_vm_options, + browser_routing_config_from_env, + maybe_evict_browser_route_from_response, + maybe_populate_browser_route_cache_from_response, +) if TYPE_CHECKING: from .resources import ( @@ -79,8 +90,10 @@ class Kernel(SyncAPIClient): # client options api_key: str + browser_route_cache: BrowserRouteCache _environment: Literal["production", "development"] | NotGiven + _browser_routing: BrowserRoutingConfig def __init__( self, @@ -105,6 +118,7 @@ def __init__( # outlining your use-case to help us decide if it should be # part of our public interface in the future. _strict_response_validation: bool = False, + _browser_route_cache: BrowserRouteCache | None = None, ) -> None: """Construct a new synchronous Kernel client instance. @@ -154,6 +168,8 @@ def __init__( custom_query=default_query, _strict_response_validation=_strict_response_validation, ) + self.browser_route_cache = _browser_route_cache or BrowserRouteCache() + self._browser_routing = browser_routing_config_from_env() @cached_property def deployments(self) -> DeploymentsResource: @@ -266,6 +282,37 @@ def default_headers(self) -> dict[str, str | Omit]: **self._custom_headers, } + @override + def _prepare_options(self, options: Any) -> Any: + options = cast(Any, super()._prepare_options(options)) + return rewrite_direct_vm_options(options, cache=self.browser_route_cache, config=self._browser_routing) + + @override + def _prepare_request(self, request: httpx.Request) -> None: + strip_direct_vm_auth(request, cache=self.browser_route_cache) + + @override + def _process_response( + self, + *, + cast_to: Type[ResponseT], + options: FinalRequestOptions, + response: httpx.Response, + stream: bool, + stream_cls: type[Stream[Any]] | type[AsyncStream[Any]] | None, + retries_taken: int = 0, + ) -> ResponseT: + maybe_populate_browser_route_cache_from_response(response, cache=self.browser_route_cache) + maybe_evict_browser_route_from_response(response, cache=self.browser_route_cache) + return super()._process_response( + cast_to=cast_to, + options=options, + response=response, + stream=stream, + stream_cls=stream_cls, + retries_taken=retries_taken, + ) + def copy( self, *, @@ -279,6 +326,7 @@ def copy( set_default_headers: Mapping[str, str] | None = None, default_query: Mapping[str, object] | None = None, set_default_query: Mapping[str, object] | None = None, + _browser_route_cache: BrowserRouteCache | None = None, _extra_kwargs: Mapping[str, Any] = {}, ) -> Self: """ @@ -312,6 +360,7 @@ def copy( max_retries=max_retries if is_given(max_retries) else self.max_retries, default_headers=headers, default_query=params, + _browser_route_cache=_browser_route_cache or self.browser_route_cache, **_extra_kwargs, ) @@ -356,8 +405,10 @@ def _make_status_error( class AsyncKernel(AsyncAPIClient): # client options api_key: str + browser_route_cache: BrowserRouteCache _environment: Literal["production", "development"] | NotGiven + _browser_routing: BrowserRoutingConfig def __init__( self, @@ -382,6 +433,7 @@ def __init__( # outlining your use-case to help us decide if it should be # part of our public interface in the future. _strict_response_validation: bool = False, + _browser_route_cache: BrowserRouteCache | None = None, ) -> None: """Construct a new async AsyncKernel client instance. @@ -431,6 +483,8 @@ def __init__( custom_query=default_query, _strict_response_validation=_strict_response_validation, ) + self.browser_route_cache = _browser_route_cache or BrowserRouteCache() + self._browser_routing = browser_routing_config_from_env() @cached_property def deployments(self) -> AsyncDeploymentsResource: @@ -543,6 +597,37 @@ def default_headers(self) -> dict[str, str | Omit]: **self._custom_headers, } + @override + async def _prepare_options(self, options: Any) -> Any: + options = cast(Any, await super()._prepare_options(options)) + return rewrite_direct_vm_options(options, cache=self.browser_route_cache, config=self._browser_routing) + + @override + async def _prepare_request(self, request: httpx.Request) -> None: + strip_direct_vm_auth(request, cache=self.browser_route_cache) + + @override + async def _process_response( + self, + *, + cast_to: Type[ResponseT], + options: FinalRequestOptions, + response: httpx.Response, + stream: bool, + stream_cls: type[Stream[Any]] | type[AsyncStream[Any]] | None, + retries_taken: int = 0, + ) -> ResponseT: + maybe_populate_browser_route_cache_from_response(response, cache=self.browser_route_cache) + maybe_evict_browser_route_from_response(response, cache=self.browser_route_cache) + return await super()._process_response( + cast_to=cast_to, + options=options, + response=response, + stream=stream, + stream_cls=stream_cls, + retries_taken=retries_taken, + ) + def copy( self, *, @@ -556,6 +641,7 @@ def copy( set_default_headers: Mapping[str, str] | None = None, default_query: Mapping[str, object] | None = None, set_default_query: Mapping[str, object] | None = None, + _browser_route_cache: BrowserRouteCache | None = None, _extra_kwargs: Mapping[str, Any] = {}, ) -> Self: """ @@ -589,6 +675,7 @@ def copy( max_retries=max_retries if is_given(max_retries) else self.max_retries, default_headers=headers, default_query=params, + _browser_route_cache=_browser_route_cache or self.browser_route_cache, **_extra_kwargs, ) diff --git a/src/kernel/_version.py b/src/kernel/_version.py index 81d0216..e0b52d1 100644 --- a/src/kernel/_version.py +++ b/src/kernel/_version.py @@ -1,4 +1,4 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. __title__ = "kernel" -__version__ = "0.50.0" # x-release-please-version +__version__ = "0.51.0" # x-release-please-version diff --git a/src/kernel/lib/browser_routing/__init__.py b/src/kernel/lib/browser_routing/__init__.py new file mode 100644 index 0000000..bdec2fc --- /dev/null +++ b/src/kernel/lib/browser_routing/__init__.py @@ -0,0 +1,3 @@ +from __future__ import annotations + +__all__: list[str] = [] diff --git a/src/kernel/lib/browser_routing/raw_http.py b/src/kernel/lib/browser_routing/raw_http.py new file mode 100644 index 0000000..5e644e3 --- /dev/null +++ b/src/kernel/lib/browser_routing/raw_http.py @@ -0,0 +1,150 @@ +from __future__ import annotations + +from typing import IO, Any, Union, Mapping, cast +from contextlib import contextmanager, asynccontextmanager +from collections.abc import Iterable, Iterator, AsyncIterator + +import httpx + +from .util import sanitize_curl_raw_params +from .routing import BrowserRoute +from ..._types import Body, Timeout, NotGiven, not_given +from ..._models import FinalRequestOptions + +BrowserRawContent = Union[bytes, bytearray, memoryview, str, IO[bytes], Iterable[bytes]] + + +def request_via_browser_route( + parent: Any, + route: BrowserRoute, + method: str, + url: str, + *, + content: BrowserRawContent | None = None, + json: Body | None = None, + headers: Mapping[str, str] | None = None, + params: Mapping[str, object] | None = None, + timeout: float | Timeout | None | NotGiven = not_given, +) -> httpx.Response: + if json is not None and content is not None: + raise TypeError("Passing both `json` and `content` is not supported") + query: dict[str, object] = {**sanitize_curl_raw_params(params), "url": url, "jwt": route.jwt} + options = FinalRequestOptions.construct( + method=method.upper(), + url=route.base_url.rstrip("/") + "/curl/raw", + params=query, + headers=headers or {}, + content=_normalize_binary_content(content), + json_data=json, + timeout=_normalize_timeout(timeout), + ) + return cast(httpx.Response, parent.request(httpx.Response, options)) + + +@contextmanager +def stream_via_browser_route( + parent: Any, + route: BrowserRoute, + method: str, + url: str, + *, + content: BrowserRawContent | None = None, + headers: Mapping[str, str] | None = None, + params: Mapping[str, object] | None = None, + timeout: float | Timeout | None | NotGiven = not_given, +) -> Iterator[httpx.Response]: + query: dict[str, Any] = sanitize_curl_raw_params(params) + query["jwt"] = route.jwt + query["url"] = url + request_headers = {k: v for k, v in parent.default_headers.items() if isinstance(v, str)} + if content is None: + request_headers.pop("Content-Type", None) + if headers: + request_headers.update(headers) + request_headers.pop("Authorization", None) + effective_timeout = parent.timeout if isinstance(timeout, NotGiven) else timeout + with parent._client.stream( + method.upper(), + route.base_url.rstrip("/") + "/curl/raw", + params=query, + headers=request_headers, + content=_normalize_binary_content(content), + timeout=_normalize_timeout(effective_timeout), + ) as response: + yield response + + +async def async_request_via_browser_route( + parent: Any, + route: BrowserRoute, + method: str, + url: str, + *, + content: BrowserRawContent | None = None, + json: Body | None = None, + headers: Mapping[str, str] | None = None, + params: Mapping[str, object] | None = None, + timeout: float | Timeout | None | NotGiven = not_given, +) -> httpx.Response: + if json is not None and content is not None: + raise TypeError("Passing both `json` and `content` is not supported") + query: dict[str, object] = {**sanitize_curl_raw_params(params), "url": url, "jwt": route.jwt} + options = FinalRequestOptions.construct( + method=method.upper(), + url=route.base_url.rstrip("/") + "/curl/raw", + params=query, + headers=headers or {}, + content=_normalize_binary_content(content), + json_data=json, + timeout=_normalize_timeout(timeout), + ) + return cast(httpx.Response, await parent.request(httpx.Response, options)) + + +@asynccontextmanager +async def async_stream_via_browser_route( + parent: Any, + route: BrowserRoute, + method: str, + url: str, + *, + content: BrowserRawContent | None = None, + headers: Mapping[str, str] | None = None, + params: Mapping[str, object] | None = None, + timeout: float | Timeout | None | NotGiven = not_given, +) -> AsyncIterator[httpx.Response]: + query: dict[str, Any] = sanitize_curl_raw_params(params) + query["jwt"] = route.jwt + query["url"] = url + request_headers = {k: v for k, v in parent.default_headers.items() if isinstance(v, str)} + if content is None: + request_headers.pop("Content-Type", None) + if headers: + request_headers.update(headers) + request_headers.pop("Authorization", None) + effective_timeout = parent.timeout if isinstance(timeout, NotGiven) else timeout + async with parent._client.stream( + method.upper(), + route.base_url.rstrip("/") + "/curl/raw", + params=query, + headers=request_headers, + content=_normalize_binary_content(content), + timeout=_normalize_timeout(effective_timeout), + ) as response: + yield response + + +def _normalize_timeout(timeout: float | Timeout | None | NotGiven) -> float | Timeout | None: + return None if isinstance(timeout, NotGiven) else timeout + + +def _normalize_binary_content(content: BrowserRawContent | None) -> bytes | IO[bytes] | Iterable[bytes] | None: + if content is None: + return None + if isinstance(content, str): + return content.encode() + if isinstance(content, bytearray): + return bytes(content) + if isinstance(content, memoryview): + return content.tobytes() + return content diff --git a/src/kernel/lib/browser_routing/routing.py b/src/kernel/lib/browser_routing/routing.py new file mode 100644 index 0000000..aa84cc1 --- /dev/null +++ b/src/kernel/lib/browser_routing/routing.py @@ -0,0 +1,243 @@ +from __future__ import annotations + +import os +import re +import json +from typing import Any, Mapping, cast +from dataclasses import field, dataclass +from urllib.parse import unquote + +import httpx + +from .util import ( + jwt_from_cdp_ws_url, + base_url_from_browser_like, + cdp_ws_url_from_browser_like, + session_id_from_browser_like, +) +from ..._compat import model_copy +from ..._models import FinalRequestOptions +from ..._constants import RAW_RESPONSE_HEADER + + +@dataclass +class BrowserRoute: + session_id: str + base_url: str + jwt: str + + +@dataclass +class BrowserRoutingConfig: + subresources: tuple[str, ...] = field(default_factory=tuple) + + +_BROWSER_ROUTE_CACHEABLE_PATH = re.compile(r"^/(?:v\d+/)?browsers(?:/[^/]+)?/?$") +_BROWSER_DELETE_BY_ID_PATH = re.compile(r"^/(?:v\d+/)?browsers/([^/]+)/?$") +_BROWSER_POOL_ACQUIRE_PATH = re.compile(r"^/(?:v\d+/)?browser_pools/[^/]+/acquire/?$") +_BROWSER_POOL_RELEASE_PATH = re.compile(r"^/(?:v\d+/)?browser_pools/[^/]+/release/?$") + + +def browser_routing_config_from_env() -> BrowserRoutingConfig: + raw = os.environ.get("KERNEL_BROWSER_ROUTING_SUBRESOURCES") + if raw is None: + return BrowserRoutingConfig(subresources=("curl",)) + if raw.strip() == "": + return BrowserRoutingConfig() + + return BrowserRoutingConfig(subresources=tuple(part.strip() for part in raw.split(",") if part.strip())) + + +class BrowserRouteCache: + def __init__(self) -> None: + self._routes: dict[str, BrowserRoute] = {} + + def get(self, session_id: str) -> BrowserRoute | None: + return self._routes.get(_normalize_session_id(session_id)) + + def set(self, route: BrowserRoute) -> None: + normalized_session_id = _normalize_session_id(route.session_id) + self._routes[normalized_session_id] = BrowserRoute( + session_id=normalized_session_id, + base_url=route.base_url.strip().rstrip("/") + "/", + jwt=route.jwt.strip(), + ) + + def delete(self, session_id: str) -> None: + self._routes.pop(_normalize_session_id(session_id), None) + + def values(self) -> list[BrowserRoute]: + return list(self._routes.values()) + + +def browser_route_from_browser(browser: Any) -> BrowserRoute | None: + try: + session_id = session_id_from_browser_like(browser) + except TypeError: + return None + + base_url = base_url_from_browser_like(browser) + if not base_url: + return None + + jwt = None + try: + jwt = jwt_from_cdp_ws_url(cdp_ws_url_from_browser_like(browser)) + except Exception: + jwt = None + if not jwt: + return None + + return BrowserRoute(session_id=session_id, base_url=base_url, jwt=jwt) + + +def _normalize_session_id(session_id: str) -> str: + return session_id.strip() + + +def maybe_populate_browser_route_cache_from_response(response: httpx.Response, *, cache: BrowserRouteCache) -> None: + if not _should_populate_browser_route_cache(response): + return + + try: + populate_browser_route_cache_from_value(response.json(), cache=cache) + except Exception: + # Ignore malformed JSON in routing cache population. + return + + +def maybe_evict_browser_route_from_response(response: httpx.Response, *, cache: BrowserRouteCache) -> None: + if not response.is_success: + return + + session_id = _session_id_to_evict_from_response(response) + if not session_id: + return + + cache.delete(session_id) + + +def populate_browser_route_cache_from_value(value: object, *, cache: BrowserRouteCache) -> None: + if isinstance(value, Mapping): + mapping = cast(Mapping[object, object], value) + route = browser_route_from_browser(mapping) + if route is not None: + cache.set(route) + + for child in mapping.values(): + populate_browser_route_cache_from_value(child, cache=cache) + return + + if isinstance(value, list): + for item in cast(list[object], value): + populate_browser_route_cache_from_value(item, cache=cache) + + +def _should_populate_browser_route_cache(response: httpx.Response) -> bool: + if response.request.headers.get(RAW_RESPONSE_HEADER) == "stream": + return False + + content_type = response.headers.get("content-type", "").lower() + if "application/json" not in content_type: + return False + + path = response.request.url.path + return bool(_BROWSER_ROUTE_CACHEABLE_PATH.match(path) or _BROWSER_POOL_ACQUIRE_PATH.match(path)) + + +def _session_id_to_evict_from_response(response: httpx.Response) -> str | None: + method = response.request.method.upper() + path = response.request.url.path + + if method == "DELETE": + return _session_id_from_browser_delete_path(path) + + if method == "POST": + return _session_id_from_browser_pool_release_request(response.request, path) + + return None + + +def _session_id_from_browser_delete_path(path: str) -> str | None: + match = _BROWSER_DELETE_BY_ID_PATH.match(path) + if match is None: + return None + + session_id = unquote(match.group(1)).strip() + return session_id or None + + +def _session_id_from_browser_pool_release_request(request: httpx.Request, path: str) -> str | None: + if _BROWSER_POOL_RELEASE_PATH.match(path) is None: + return None + + content_type = request.headers.get("content-type", "").lower() + if "application/json" not in content_type: + return None + + try: + body = json.loads(request.content.decode("utf-8")) + except Exception: + return None + + session_id = body.get("session_id") + if not isinstance(session_id, str): + return None + + normalized = session_id.strip() + return normalized or None + + +def rewrite_direct_vm_options( + options: FinalRequestOptions, + *, + cache: BrowserRouteCache, + config: BrowserRoutingConfig, +) -> FinalRequestOptions: + match = match_direct_vm_path(options.url) + if match is None: + return options + + session_id, subresource, suffix = match + if subresource not in set(config.subresources): + return options + + route = cache.get(session_id) + if route is None: + return options + + rewritten = model_copy(options) + rewritten.url = f"{route.base_url.rstrip('/')}/{subresource}{suffix}" + + params: dict[str, object] = {} + params.update(options.params) + params["jwt"] = route.jwt + rewritten.params = params or options.params + return rewritten + + +def strip_direct_vm_auth(request: httpx.Request, *, cache: BrowserRouteCache) -> None: + raw = str(request.url) + for route in cache.values(): + if raw.startswith(route.base_url.rstrip("/") + "/"): + request.headers.pop("Authorization", None) + return + + +def match_direct_vm_path(path: str) -> tuple[str, str, str] | None: + if "://" in path: + return None + + parts = [part for part in path.strip("/").split("/") if part] + for index in range(len(parts) - 2): + if parts[index] != "browsers": + continue + session_id = parts[index + 1] + subresource = parts[index + 2] + if not session_id or not subresource: + return None + suffix = "" + if index + 3 < len(parts): + suffix = "/" + "/".join(parts[index + 3 :]) + return session_id, subresource, suffix + return None diff --git a/src/kernel/lib/browser_routing/util.py b/src/kernel/lib/browser_routing/util.py new file mode 100644 index 0000000..ecfb733 --- /dev/null +++ b/src/kernel/lib/browser_routing/util.py @@ -0,0 +1,58 @@ +from __future__ import annotations + +from typing import Any, Mapping, cast +from urllib.parse import parse_qs, urlparse + +# Query keys reserved for /curl/raw; user-supplied `params` must not override these. +CURL_RAW_RESERVED_QUERY_KEYS: frozenset[str] = frozenset({"url", "jwt"}) + + +def sanitize_curl_raw_params(params: Mapping[str, object] | None) -> dict[str, object]: + """Drop reserved keys from user params so they cannot override the target URL or auth.""" + if not params: + return {} + return {k: v for k, v in dict(params).items() if k not in CURL_RAW_RESERVED_QUERY_KEYS} + + +def jwt_from_cdp_ws_url(cdp_ws_url: str) -> str | None: + parsed = urlparse(cdp_ws_url) + values = parse_qs(parsed.query).get("jwt") + if not values: + return None + return values[0] + + +def session_id_from_browser_like(browser: Any) -> str: + sid = getattr(browser, "session_id", None) + if isinstance(sid, str) and sid: + return sid + if isinstance(browser, Mapping): + mapping = cast(Mapping[str, object], browser) + value = mapping.get("session_id") + if isinstance(value, str) and value: + return value + raise TypeError("browser object must have a non-empty session_id") + + +def base_url_from_browser_like(browser: Any) -> str | None: + base_url = getattr(browser, "base_url", None) + if isinstance(base_url, str) and base_url.strip(): + return base_url.strip().rstrip("/") + "/" + if isinstance(browser, Mapping): + mapping = cast(Mapping[str, object], browser) + value = mapping.get("base_url") + if isinstance(value, str) and value.strip(): + return value.strip().rstrip("/") + "/" + return None + + +def cdp_ws_url_from_browser_like(browser: Any) -> str: + cdp_ws_url = getattr(browser, "cdp_ws_url", None) + if isinstance(cdp_ws_url, str) and cdp_ws_url: + return cdp_ws_url + if isinstance(browser, Mapping): + mapping = cast(Mapping[str, object], browser) + value = mapping.get("cdp_ws_url") + if isinstance(value, str) and value: + return value + raise TypeError("browser object must have a non-empty cdp_ws_url") diff --git a/src/kernel/resources/browsers/browsers.py b/src/kernel/resources/browsers/browsers.py index 228e653..f6a2372 100644 --- a/src/kernel/resources/browsers/browsers.py +++ b/src/kernel/resources/browsers/browsers.py @@ -3,7 +3,8 @@ from __future__ import annotations import typing_extensions -from typing import Dict, Mapping, Iterable, Optional, cast +from typing import Dict, Mapping, Iterable, Iterator, Optional, AsyncIterator, cast +from contextlib import contextmanager, asynccontextmanager from typing_extensions import Literal import httpx @@ -80,6 +81,12 @@ from ..._base_client import AsyncPaginator, make_request_options from ...types.browser_curl_response import BrowserCurlResponse from ...types.browser_list_response import BrowserListResponse +from ...lib.browser_routing.raw_http import ( + stream_via_browser_route, + request_via_browser_route, + async_stream_via_browser_route, + async_request_via_browser_route, +) from ...types.browser_create_response import BrowserCreateResponse from ...types.browser_update_response import BrowserUpdateResponse from ...types.browser_persistence_param import BrowserPersistenceParam @@ -510,6 +517,64 @@ def curl( cast_to=BrowserCurlResponse, ) + def request( + self, + id: str, + method: str, + url: str, + *, + content: bytes | bytearray | memoryview | str | Iterable[bytes] | None = None, + json: Body | None = None, + headers: Mapping[str, str] | None = None, + params: Mapping[str, object] | None = None, + timeout: float | httpx.Timeout | None | NotGiven = not_given, + ) -> httpx.Response: + route = self._client.browser_route_cache.get(id) + if route is None: + raise ValueError( + f"browser route cache does not contain session {id}; create, retrieve, or list the browser before calling browsers.request" + ) + return request_via_browser_route( + self._client, + route, + method, + url, + content=content, + json=json, + headers=headers, + params=params, + timeout=timeout, + ) + + @contextmanager + def stream( + self, + id: str, + method: str, + url: str, + *, + content: bytes | bytearray | memoryview | str | Iterable[bytes] | None = None, + headers: Mapping[str, str] | None = None, + params: Mapping[str, object] | None = None, + timeout: float | httpx.Timeout | None | NotGiven = not_given, + ) -> Iterator[httpx.Response]: + route = self._client.browser_route_cache.get(id) + if route is None: + raise ValueError( + f"browser route cache does not contain session {id}; create, retrieve, or list the browser before calling browsers.stream" + ) + with stream_via_browser_route( + self._client, + route, + method, + url, + content=content, + headers=headers, + params=params, + timeout=timeout, + ) as resp: + yield resp + def delete_by_id( self, id: str, @@ -1012,6 +1077,64 @@ async def curl( cast_to=BrowserCurlResponse, ) + async def request( + self, + id: str, + method: str, + url: str, + *, + content: bytes | bytearray | memoryview | str | Iterable[bytes] | None = None, + json: Body | None = None, + headers: Mapping[str, str] | None = None, + params: Mapping[str, object] | None = None, + timeout: float | httpx.Timeout | None | NotGiven = not_given, + ) -> httpx.Response: + route = self._client.browser_route_cache.get(id) + if route is None: + raise ValueError( + f"browser route cache does not contain session {id}; create, retrieve, or list the browser before calling browsers.request" + ) + return await async_request_via_browser_route( + self._client, + route, + method, + url, + content=content, + json=json, + headers=headers, + params=params, + timeout=timeout, + ) + + @asynccontextmanager + async def stream( + self, + id: str, + method: str, + url: str, + *, + content: bytes | bytearray | memoryview | str | Iterable[bytes] | None = None, + headers: Mapping[str, str] | None = None, + params: Mapping[str, object] | None = None, + timeout: float | httpx.Timeout | None | NotGiven = not_given, + ) -> AsyncIterator[httpx.Response]: + route = self._client.browser_route_cache.get(id) + if route is None: + raise ValueError( + f"browser route cache does not contain session {id}; create, retrieve, or list the browser before calling browsers.stream" + ) + async with async_stream_via_browser_route( + self._client, + route, + method, + url, + content=content, + headers=headers, + params=params, + timeout=timeout, + ) as resp: + yield resp + async def delete_by_id( self, id: str, diff --git a/src/kernel/types/auth/managed_auth.py b/src/kernel/types/auth/managed_auth.py index d76a02f..59b2d57 100644 --- a/src/kernel/types/auth/managed_auth.py +++ b/src/kernel/types/auth/managed_auth.py @@ -157,6 +157,13 @@ class ManagedAuth(BaseModel): - Ping Identity: _.pingone.com, _.pingidentity.com """ + browser_session_id: Optional[str] = None + """ + ID of the underlying browser session driving the current flow (present when flow + in progress). Use this to inspect or terminate the browser session via the + `/browsers` API. + """ + can_reauth: Optional[bool] = None """ Whether automatic re-authentication is possible (has credential, selectors, and @@ -190,7 +197,12 @@ class ManagedAuth(BaseModel): """ flow_expires_at: Optional[datetime] = None - """When the current flow expires (null when no flow in progress)""" + """When the current flow expires (null when no flow in progress). + + A flow past this timestamp is no longer valid and its `flow_status` will be + `EXPIRED`. Clients may start a new login to supersede a stale `IN_PROGRESS` flow + past this timestamp. + """ flow_status: Optional[Literal["IN_PROGRESS", "SUCCESS", "FAILED", "EXPIRED", "CANCELED"]] = None """Current flow status (null when no flow in progress)""" @@ -216,7 +228,20 @@ class ManagedAuth(BaseModel): """URL to redirect user to for hosted login (present when flow in progress)""" last_auth_at: Optional[datetime] = None - """When the profile was last successfully authenticated""" + """Deprecated alias for `last_auth_check_at`. + + Despite the name, this is the last health-check timestamp, not the last + successful authentication. Use `last_auth_check_at` instead. + """ + + last_auth_check_at: Optional[datetime] = None + """ + When the most recent auth health check ran for this connection, regardless of + outcome. Updated on every health check and does not by itself indicate that the + profile is currently authenticated - use `status` for that. May be newer than + `flow_expires_at` when a flow is still in progress because health checks + continue to run in parallel. + """ live_view_url: Optional[str] = None """Browser live view URL for debugging (present when flow in progress)""" diff --git a/tests/test_browser_routing.py b/tests/test_browser_routing.py new file mode 100644 index 0000000..810de80 --- /dev/null +++ b/tests/test_browser_routing.py @@ -0,0 +1,323 @@ +from __future__ import annotations + +import os +from typing import Any, cast + +import httpx +import respx +import pytest + +from kernel import Kernel, AsyncKernel, InternalServerError +from kernel.lib.browser_routing.util import jwt_from_cdp_ws_url +from kernel.lib.browser_routing.routing import ( + BrowserRoute, + BrowserRouteCache, + browser_route_from_browser, + browser_routing_config_from_env, +) + +base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010") +api_key = "sk-123" + + +def _fake_browser() -> dict[str, object]: + return { + "session_id": "sess-1", + "base_url": "http://browser-session.test/browser/kernel", + "cdp_ws_url": "wss://browser-session.test/browser/cdp?jwt=token-abc", + "webdriver_ws_url": "wss://x", + "created_at": "2020-01-01T00:00:00Z", + "headless": True, + "stealth": False, + "timeout_seconds": 60, + } + + +def _cache_browser(client: Kernel) -> None: + route = browser_route_from_browser(_fake_browser()) + assert route is not None + client.browser_route_cache.set(route) + + +def test_jwt_from_cdp_ws_url() -> None: + assert jwt_from_cdp_ws_url("wss://h/browser/cdp?jwt=abc%2Fdef&x=1") == "abc/def" + + +@respx.mock +def test_routes_allowlisted_browser_subresources_directly_to_vm(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("KERNEL_BROWSER_ROUTING_SUBRESOURCES", "process") + route = respx.post("http://browser-session.test/browser/kernel/process/exec").mock( + return_value=httpx.Response(200, json={"exit_code": 0, "stdout_b64": "", "stderr_b64": ""}) + ) + with Kernel( + base_url=base_url, + api_key=api_key, + _strict_response_validation=True, + ) as client: + _cache_browser(client) + out = client.browsers.process.exec("sess-1", command="echo", args=["hi"]) + + assert route.called + request = cast(httpx.Request, cast(Any, route.calls[0]).request) + assert request.url.params.get("jwt") == "token-abc" + assert request.headers.get("Authorization") is None + assert out.exit_code == 0 + + +@respx.mock +def test_skips_direct_vm_routing_outside_allowlist(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("KERNEL_BROWSER_ROUTING_SUBRESOURCES", "computer") + route = respx.post(f"{base_url}/browsers/sess-1/process/exec").mock( + return_value=httpx.Response(200, json={"exit_code": 0, "stdout_b64": "", "stderr_b64": ""}) + ) + with Kernel( + base_url=base_url, + api_key=api_key, + _strict_response_validation=True, + ) as client: + _cache_browser(client) + client.browsers.process.exec("sess-1", command="echo", args=["hi"]) + + assert route.called + + +@respx.mock +def test_browser_request_uses_curl_raw() -> None: + route = respx.get("http://browser-session.test/browser/kernel/curl/raw").mock( + return_value=httpx.Response(200, content=b"ok") + ) + with Kernel(base_url=base_url, api_key=api_key, _strict_response_validation=True) as client: + _cache_browser(client) + response = client.browsers.request("sess-1", "GET", "https://example.com", params={"timeout_ms": 5000}) + + assert response.status_code == 200 + assert response.content == b"ok" + request = cast(httpx.Request, cast(Any, route.calls[0]).request) + assert "curl/raw" in str(request.url) + assert request.url.params.get("jwt") == "token-abc" + + +@respx.mock +def test_browser_request_params_cannot_override_target_url_or_jwt() -> None: + route = respx.get("http://browser-session.test/browser/kernel/curl/raw").mock( + return_value=httpx.Response(200, content=b"ok") + ) + with Kernel(base_url=base_url, api_key=api_key, _strict_response_validation=True) as client: + _cache_browser(client) + client.browsers.request( + "sess-1", + "GET", + "https://example.com", + params={"url": "https://evil.example", "jwt": "other", "timeout_ms": 1}, + ) + + request = cast(httpx.Request, cast(Any, route.calls[0]).request) + assert str(request.url.params.get("url")) == "https://example.com" + assert str(request.url.params.get("jwt")) == "token-abc" + assert str(request.url.params.get("timeout_ms")) == "1" + + +def test_browser_request_requires_cached_route() -> None: + with Kernel(base_url=base_url, api_key=api_key, _strict_response_validation=True) as client: + _cache_browser(client) + client.browser_route_cache.delete("sess-1") + with pytest.raises(ValueError, match="route cache"): + client.browsers.request("sess-1", "GET", "https://example.com") + + +@respx.mock +def test_browser_create_warms_route_cache() -> None: + create_route = respx.post(f"{base_url}/browsers").mock(return_value=httpx.Response(200, json=_fake_browser())) + routed_request = respx.get("http://browser-session.test/browser/kernel/curl/raw").mock( + return_value=httpx.Response(200, content=b"ok") + ) + with Kernel(base_url=base_url, api_key=api_key, _strict_response_validation=True) as client: + browser = client.browsers.create() + routed = client.browsers.request(browser.session_id, "GET", "https://example.com") + + assert create_route.called + assert browser.session_id == "sess-1" + assert routed.status_code == 200 + assert routed_request.called + + +@respx.mock +def test_raw_browser_create_warms_route_cache() -> None: + create_route = respx.post(f"{base_url}/browsers").mock(return_value=httpx.Response(200, json=_fake_browser())) + routed_request = respx.get("http://browser-session.test/browser/kernel/curl/raw").mock( + return_value=httpx.Response(200, content=b"ok") + ) + with Kernel(base_url=base_url, api_key=api_key, _strict_response_validation=True) as client: + response = client.browsers.with_raw_response.create() + routed = client.browsers.request("sess-1", "GET", "https://example.com") + + assert create_route.called + assert response.is_closed is True + assert routed.status_code == 200 + assert routed.content == b"ok" + request = cast(httpx.Request, cast(Any, routed_request.calls[0]).request) + assert request.url.params.get("jwt") == "token-abc" + + +@pytest.mark.asyncio +@respx.mock +async def test_async_raw_browser_create_warms_route_cache() -> None: + create_route = respx.post(f"{base_url}/browsers").mock(return_value=httpx.Response(200, json=_fake_browser())) + routed_request = respx.get("http://browser-session.test/browser/kernel/curl/raw").mock( + return_value=httpx.Response(200, content=b"ok") + ) + async with AsyncKernel(base_url=base_url, api_key=api_key, _strict_response_validation=True) as client: + response = await client.browsers.with_raw_response.create() + routed = await client.browsers.request("sess-1", "GET", "https://example.com") + + assert create_route.called + assert response.is_closed is True + assert routed.status_code == 200 + assert routed.content == b"ok" + request = cast(httpx.Request, cast(Any, routed_request.calls[0]).request) + assert request.url.params.get("jwt") == "token-abc" + + +@respx.mock +def test_only_browser_metadata_endpoints_warm_route_cache() -> None: + projects_route = respx.get(f"{base_url}/projects").mock(return_value=httpx.Response(200, json=_fake_browser())) + with Kernel(base_url=base_url, api_key=api_key, _strict_response_validation=True) as client: + response = client.projects.with_raw_response.list() + with pytest.raises(ValueError, match="route cache"): + client.browsers.request("sess-1", "GET", "https://example.com") + + assert projects_route.called + assert response.is_closed is True + + +@respx.mock +def test_browser_pool_acquire_warms_route_cache() -> None: + acquire_route = respx.post(f"{base_url}/browser_pools/pool-1/acquire").mock( + return_value=httpx.Response(200, json=_fake_browser()) + ) + routed_request = respx.get("http://browser-session.test/browser/kernel/curl/raw").mock( + return_value=httpx.Response(200, content=b"ok") + ) + with Kernel(base_url=base_url, api_key=api_key, _strict_response_validation=True) as client: + response = client.browser_pools.with_raw_response.acquire("pool-1") + routed = client.browsers.request("sess-1", "GET", "https://example.com") + + assert acquire_route.called + assert response.is_closed is True + assert routed.status_code == 200 + assert routed_request.called + + +@respx.mock +def test_browser_delete_by_id_evicts_route_cache() -> None: + delete_route = respx.delete(f"{base_url}/browsers/sess-1").mock(return_value=httpx.Response(204)) + with Kernel(base_url=base_url, api_key=api_key, _strict_response_validation=True) as client: + _cache_browser(client) + response = client.browsers.with_raw_response.delete_by_id("sess-1") + with pytest.raises(ValueError, match="route cache"): + client.browsers.request("sess-1", "GET", "https://example.com") + + assert delete_route.called + assert response.is_closed is True + + +@respx.mock +def test_browser_pool_release_evicts_route_cache() -> None: + release_route = respx.post(f"{base_url}/browser_pools/pool-1/release").mock(return_value=httpx.Response(204)) + with Kernel(base_url=base_url, api_key=api_key, _strict_response_validation=True) as client: + _cache_browser(client) + response = client.browser_pools.with_raw_response.release("pool-1", session_id="sess-1") + with pytest.raises(ValueError, match="route cache"): + client.browsers.request("sess-1", "GET", "https://example.com") + + assert release_route.called + assert response.is_closed is True + + +@respx.mock +def test_failed_browser_delete_by_id_keeps_route_cache() -> None: + delete_route = respx.delete(f"{base_url}/browsers/sess-1").mock( + return_value=httpx.Response(500, json={"error": "boom"}) + ) + routed_request = respx.get("http://browser-session.test/browser/kernel/curl/raw").mock( + return_value=httpx.Response(200, content=b"ok") + ) + with Kernel(base_url=base_url, api_key=api_key, _strict_response_validation=True) as client: + _cache_browser(client) + with pytest.raises(InternalServerError): + client.browsers.delete_by_id("sess-1") + routed = client.browsers.request("sess-1", "GET", "https://example.com") + + assert delete_route.called + assert routed.status_code == 200 + assert routed_request.called + + +@respx.mock +def test_failed_browser_pool_release_keeps_route_cache() -> None: + release_route = respx.post(f"{base_url}/browser_pools/pool-1/release").mock( + return_value=httpx.Response(500, json={"error": "boom"}) + ) + routed_request = respx.get("http://browser-session.test/browser/kernel/curl/raw").mock( + return_value=httpx.Response(200, content=b"ok") + ) + with Kernel(base_url=base_url, api_key=api_key, _strict_response_validation=True) as client: + _cache_browser(client) + with pytest.raises(InternalServerError): + client.browser_pools.release("pool-1", session_id="sess-1") + routed = client.browsers.request("sess-1", "GET", "https://example.com") + + assert release_route.called + assert routed.status_code == 200 + assert routed_request.called + + +@pytest.mark.asyncio +@respx.mock +async def test_async_browser_pool_release_evicts_route_cache() -> None: + release_route = respx.post(f"{base_url}/browser_pools/pool-1/release").mock(return_value=httpx.Response(204)) + async with AsyncKernel(base_url=base_url, api_key=api_key, _strict_response_validation=True) as client: + route = browser_route_from_browser(_fake_browser()) + assert route is not None + client.browser_route_cache.set(route) + response = await client.browser_pools.with_raw_response.release("pool-1", session_id="sess-1") + with pytest.raises(ValueError, match="route cache"): + await client.browsers.request("sess-1", "GET", "https://example.com") + + assert release_route.called + assert response.is_closed is True + + +def test_browser_route_cache_normalizes_session_id_keys() -> None: + cache = BrowserRouteCache() + cache.set( + BrowserRoute( + session_id=" sess-1 ", + base_url=" http://browser-session.test/browser/kernel/ ", + jwt=" token-abc ", + ) + ) + + route = cache.get("sess-1") + assert route is not None + assert route.session_id == "sess-1" + assert route.base_url == "http://browser-session.test/browser/kernel/" + assert route.jwt == "token-abc" + + cache.delete("sess-1") + assert cache.get("sess-1") is None + + +def test_browser_route_from_browser_requires_base_url_and_jwt() -> None: + assert browser_route_from_browser({**_fake_browser(), "base_url": None}) is None + assert browser_route_from_browser({**_fake_browser(), "cdp_ws_url": None}) is None + + +def test_browser_routing_config_from_env_defaults_to_curl(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delenv("KERNEL_BROWSER_ROUTING_SUBRESOURCES", raising=False) + assert browser_routing_config_from_env().subresources == ("curl",) + + +def test_browser_routing_config_from_env_empty_string_disables_routing(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("KERNEL_BROWSER_ROUTING_SUBRESOURCES", "") + assert browser_routing_config_from_env().subresources == ()