From fae5b248ad1457e7b75208642a81a87b0efd92a5 Mon Sep 17 00:00:00 2001 From: Sam Machin Date: Mon, 11 May 2026 14:15:40 +0100 Subject: [PATCH] add typed inputs --- .gitignore | 1 + CLAUDE.md | 42 +- README.md | 64 +- pyproject.toml | 2 + scripts/generate_stubs.py | 14 +- scripts/regen_models.py | 572 ++++++++++++++++++ src/jambonz_sdk/_models/__init__.py | 24 + .../_models/_generated/__init__.py | 4 + .../_models/_generated/callbacks/__init__.py | 3 + .../_generated/callbacks/agent_turn.py | 168 +++++ .../_models/_generated/callbacks/amd.py | 60 ++ .../_models/_generated/callbacks/base.py | 105 ++++ .../_generated/callbacks/call_status.py | 38 ++ .../_generated/callbacks/conference.py | 19 + .../_generated/callbacks/conference_status.py | 58 ++ .../_generated/callbacks/conference_wait.py | 19 + .../_models/_generated/callbacks/dequeue.py | 35 ++ .../_models/_generated/callbacks/dial.py | 52 ++ .../_models/_generated/callbacks/dial_dtmf.py | 23 + .../_models/_generated/callbacks/dial_hold.py | 39 ++ .../_generated/callbacks/dial_refer.py | 62 ++ .../_models/_generated/callbacks/enqueue.py | 44 ++ .../_generated/callbacks/enqueue_wait.py | 42 ++ .../_models/_generated/callbacks/gather.py | 104 ++++ .../_generated/callbacks/gather_partial.py | 151 +++++ .../_models/_generated/callbacks/listen.py | 27 + .../_models/_generated/callbacks/llm.py | 44 ++ .../_models/_generated/callbacks/message.py | 53 ++ .../_models/_generated/callbacks/play.py | 58 ++ .../_generated/callbacks/session_new.py | 189 ++++++ .../_generated/callbacks/session_reconnect.py | 13 + .../_generated/callbacks/session_redirect.py | 45 ++ .../_models/_generated/callbacks/sip_refer.py | 27 + .../_generated/callbacks/sip_refer_event.py | 28 + .../_generated/callbacks/sip_request.py | 42 ++ .../_generated/callbacks/transcribe.py | 98 +++ .../callbacks/transcribe_translation.py | 43 ++ .../callbacks/tts_streaming_event.py | 125 ++++ .../_generated/callbacks/verb_status.py | 71 +++ .../_models/_generated/components/__init__.py | 3 + .../_generated/components/actionHook.py | 47 ++ .../components/actionHookDelayAction.py | 45 ++ .../_models/_generated/components/amd.py | 66 ++ .../_models/_generated/components/auth.py | 21 + .../components/bidirectionalAudio.py | 29 + .../_generated/components/fillerNoise.py | 27 + .../_models/_generated/components/llm_base.py | 120 ++++ .../_generated/components/recognizer.py | 331 ++++++++++ .../recognizer_assemblyAiOptions.py | 85 +++ .../components/recognizer_awsOptions.py | 72 +++ .../components/recognizer_azureOptions.py | 63 ++ .../components/recognizer_cobaltOptions.py | 42 ++ .../components/recognizer_customOptions.py | 36 ++ .../components/recognizer_deepgramOptions.py | 163 +++++ .../recognizer_elevenlabsOptions.py | 57 ++ .../components/recognizer_gladiaOptions.py | 18 + .../components/recognizer_googleOptions.py | 58 ++ .../components/recognizer_houndifyOptions.py | 168 +++++ .../components/recognizer_ibmOptions.py | 62 ++ .../components/recognizer_nuanceOptions.py | 225 +++++++ .../components/recognizer_nvidiaOptions.py | 48 ++ .../components/recognizer_openaiOptions.py | 122 ++++ .../components/recognizer_sonioxOptions.py | 79 +++ .../recognizer_speechmaticsOptions.py | 167 +++++ .../components/recognizer_verbioOptions.py | 54 ++ .../_generated/components/synthesizer.py | 104 ++++ .../_models/_generated/components/target.py | 111 ++++ .../_models/_generated/components/vad.py | 58 ++ .../_models/_generated/jambonz_app.py | 212 +++++++ .../_models/_generated/verbs/__init__.py | 3 + .../_models/_generated/verbs/agent.py | 202 +++++++ .../_models/_generated/verbs/alert.py | 26 + .../_models/_generated/verbs/answer.py | 20 + .../_models/_generated/verbs/conference.py | 92 +++ .../_models/_generated/verbs/config.py | 188 ++++++ .../_models/_generated/verbs/deepgram_s2s.py | 51 ++ .../_models/_generated/verbs/dequeue.py | 44 ++ .../_models/_generated/verbs/dial.py | 132 ++++ .../_models/_generated/verbs/dialogflow.py | 140 +++++ .../_models/_generated/verbs/dtmf.py | 30 + .../_models/_generated/verbs/dub.py | 59 ++ .../_generated/verbs/elevenlabs_s2s.py | 72 +++ .../_models/_generated/verbs/enqueue.py | 40 ++ .../_models/_generated/verbs/gather.py | 136 +++++ .../_models/_generated/verbs/google_s2s.py | 44 ++ .../_models/_generated/verbs/hangup.py | 27 + .../_models/_generated/verbs/leave.py | 20 + .../_models/_generated/verbs/listen.py | 109 ++++ .../_models/_generated/verbs/llm.py | 56 ++ .../_models/_generated/verbs/message.py | 58 ++ .../_models/_generated/verbs/openai_s2s.py | 44 ++ .../_models/_generated/verbs/pause.py | 29 + .../_models/_generated/verbs/play.py | 60 ++ .../_models/_generated/verbs/redirect.py | 32 + .../_models/_generated/verbs/rest_dial.py | 87 +++ .../_models/_generated/verbs/s2s.py | 56 ++ .../_models/_generated/verbs/say.py | 68 +++ .../_models/_generated/verbs/sip_decline.py | 34 ++ .../_models/_generated/verbs/sip_refer.py | 48 ++ .../_models/_generated/verbs/sip_request.py | 40 ++ .../_models/_generated/verbs/stream.py | 104 ++++ .../_models/_generated/verbs/tag.py | 29 + .../_models/_generated/verbs/transcribe.py | 48 ++ .../_models/_generated/verbs/ultravox_s2s.py | 44 ++ src/jambonz_sdk/_models/_patches/__init__.py | 10 + src/jambonz_sdk/_models/_registry.py | 66 ++ src/jambonz_sdk/_models/base.py | 43 ++ src/jambonz_sdk/components/__init__.py | 102 ++++ src/jambonz_sdk/verb_builder.py | 122 +++- src/jambonz_sdk/verb_builder.pyi | 104 +++- src/jambonz_sdk/verbs/__init__.py | 82 +++ tests/unit/test_verb_builder.py | 316 +++++++--- 112 files changed, 8263 insertions(+), 155 deletions(-) create mode 100644 scripts/regen_models.py create mode 100644 src/jambonz_sdk/_models/__init__.py create mode 100644 src/jambonz_sdk/_models/_generated/__init__.py create mode 100644 src/jambonz_sdk/_models/_generated/callbacks/__init__.py create mode 100644 src/jambonz_sdk/_models/_generated/callbacks/agent_turn.py create mode 100644 src/jambonz_sdk/_models/_generated/callbacks/amd.py create mode 100644 src/jambonz_sdk/_models/_generated/callbacks/base.py create mode 100644 src/jambonz_sdk/_models/_generated/callbacks/call_status.py create mode 100644 src/jambonz_sdk/_models/_generated/callbacks/conference.py create mode 100644 src/jambonz_sdk/_models/_generated/callbacks/conference_status.py create mode 100644 src/jambonz_sdk/_models/_generated/callbacks/conference_wait.py create mode 100644 src/jambonz_sdk/_models/_generated/callbacks/dequeue.py create mode 100644 src/jambonz_sdk/_models/_generated/callbacks/dial.py create mode 100644 src/jambonz_sdk/_models/_generated/callbacks/dial_dtmf.py create mode 100644 src/jambonz_sdk/_models/_generated/callbacks/dial_hold.py create mode 100644 src/jambonz_sdk/_models/_generated/callbacks/dial_refer.py create mode 100644 src/jambonz_sdk/_models/_generated/callbacks/enqueue.py create mode 100644 src/jambonz_sdk/_models/_generated/callbacks/enqueue_wait.py create mode 100644 src/jambonz_sdk/_models/_generated/callbacks/gather.py create mode 100644 src/jambonz_sdk/_models/_generated/callbacks/gather_partial.py create mode 100644 src/jambonz_sdk/_models/_generated/callbacks/listen.py create mode 100644 src/jambonz_sdk/_models/_generated/callbacks/llm.py create mode 100644 src/jambonz_sdk/_models/_generated/callbacks/message.py create mode 100644 src/jambonz_sdk/_models/_generated/callbacks/play.py create mode 100644 src/jambonz_sdk/_models/_generated/callbacks/session_new.py create mode 100644 src/jambonz_sdk/_models/_generated/callbacks/session_reconnect.py create mode 100644 src/jambonz_sdk/_models/_generated/callbacks/session_redirect.py create mode 100644 src/jambonz_sdk/_models/_generated/callbacks/sip_refer.py create mode 100644 src/jambonz_sdk/_models/_generated/callbacks/sip_refer_event.py create mode 100644 src/jambonz_sdk/_models/_generated/callbacks/sip_request.py create mode 100644 src/jambonz_sdk/_models/_generated/callbacks/transcribe.py create mode 100644 src/jambonz_sdk/_models/_generated/callbacks/transcribe_translation.py create mode 100644 src/jambonz_sdk/_models/_generated/callbacks/tts_streaming_event.py create mode 100644 src/jambonz_sdk/_models/_generated/callbacks/verb_status.py create mode 100644 src/jambonz_sdk/_models/_generated/components/__init__.py create mode 100644 src/jambonz_sdk/_models/_generated/components/actionHook.py create mode 100644 src/jambonz_sdk/_models/_generated/components/actionHookDelayAction.py create mode 100644 src/jambonz_sdk/_models/_generated/components/amd.py create mode 100644 src/jambonz_sdk/_models/_generated/components/auth.py create mode 100644 src/jambonz_sdk/_models/_generated/components/bidirectionalAudio.py create mode 100644 src/jambonz_sdk/_models/_generated/components/fillerNoise.py create mode 100644 src/jambonz_sdk/_models/_generated/components/llm_base.py create mode 100644 src/jambonz_sdk/_models/_generated/components/recognizer.py create mode 100644 src/jambonz_sdk/_models/_generated/components/recognizer_assemblyAiOptions.py create mode 100644 src/jambonz_sdk/_models/_generated/components/recognizer_awsOptions.py create mode 100644 src/jambonz_sdk/_models/_generated/components/recognizer_azureOptions.py create mode 100644 src/jambonz_sdk/_models/_generated/components/recognizer_cobaltOptions.py create mode 100644 src/jambonz_sdk/_models/_generated/components/recognizer_customOptions.py create mode 100644 src/jambonz_sdk/_models/_generated/components/recognizer_deepgramOptions.py create mode 100644 src/jambonz_sdk/_models/_generated/components/recognizer_elevenlabsOptions.py create mode 100644 src/jambonz_sdk/_models/_generated/components/recognizer_gladiaOptions.py create mode 100644 src/jambonz_sdk/_models/_generated/components/recognizer_googleOptions.py create mode 100644 src/jambonz_sdk/_models/_generated/components/recognizer_houndifyOptions.py create mode 100644 src/jambonz_sdk/_models/_generated/components/recognizer_ibmOptions.py create mode 100644 src/jambonz_sdk/_models/_generated/components/recognizer_nuanceOptions.py create mode 100644 src/jambonz_sdk/_models/_generated/components/recognizer_nvidiaOptions.py create mode 100644 src/jambonz_sdk/_models/_generated/components/recognizer_openaiOptions.py create mode 100644 src/jambonz_sdk/_models/_generated/components/recognizer_sonioxOptions.py create mode 100644 src/jambonz_sdk/_models/_generated/components/recognizer_speechmaticsOptions.py create mode 100644 src/jambonz_sdk/_models/_generated/components/recognizer_verbioOptions.py create mode 100644 src/jambonz_sdk/_models/_generated/components/synthesizer.py create mode 100644 src/jambonz_sdk/_models/_generated/components/target.py create mode 100644 src/jambonz_sdk/_models/_generated/components/vad.py create mode 100644 src/jambonz_sdk/_models/_generated/jambonz_app.py create mode 100644 src/jambonz_sdk/_models/_generated/verbs/__init__.py create mode 100644 src/jambonz_sdk/_models/_generated/verbs/agent.py create mode 100644 src/jambonz_sdk/_models/_generated/verbs/alert.py create mode 100644 src/jambonz_sdk/_models/_generated/verbs/answer.py create mode 100644 src/jambonz_sdk/_models/_generated/verbs/conference.py create mode 100644 src/jambonz_sdk/_models/_generated/verbs/config.py create mode 100644 src/jambonz_sdk/_models/_generated/verbs/deepgram_s2s.py create mode 100644 src/jambonz_sdk/_models/_generated/verbs/dequeue.py create mode 100644 src/jambonz_sdk/_models/_generated/verbs/dial.py create mode 100644 src/jambonz_sdk/_models/_generated/verbs/dialogflow.py create mode 100644 src/jambonz_sdk/_models/_generated/verbs/dtmf.py create mode 100644 src/jambonz_sdk/_models/_generated/verbs/dub.py create mode 100644 src/jambonz_sdk/_models/_generated/verbs/elevenlabs_s2s.py create mode 100644 src/jambonz_sdk/_models/_generated/verbs/enqueue.py create mode 100644 src/jambonz_sdk/_models/_generated/verbs/gather.py create mode 100644 src/jambonz_sdk/_models/_generated/verbs/google_s2s.py create mode 100644 src/jambonz_sdk/_models/_generated/verbs/hangup.py create mode 100644 src/jambonz_sdk/_models/_generated/verbs/leave.py create mode 100644 src/jambonz_sdk/_models/_generated/verbs/listen.py create mode 100644 src/jambonz_sdk/_models/_generated/verbs/llm.py create mode 100644 src/jambonz_sdk/_models/_generated/verbs/message.py create mode 100644 src/jambonz_sdk/_models/_generated/verbs/openai_s2s.py create mode 100644 src/jambonz_sdk/_models/_generated/verbs/pause.py create mode 100644 src/jambonz_sdk/_models/_generated/verbs/play.py create mode 100644 src/jambonz_sdk/_models/_generated/verbs/redirect.py create mode 100644 src/jambonz_sdk/_models/_generated/verbs/rest_dial.py create mode 100644 src/jambonz_sdk/_models/_generated/verbs/s2s.py create mode 100644 src/jambonz_sdk/_models/_generated/verbs/say.py create mode 100644 src/jambonz_sdk/_models/_generated/verbs/sip_decline.py create mode 100644 src/jambonz_sdk/_models/_generated/verbs/sip_refer.py create mode 100644 src/jambonz_sdk/_models/_generated/verbs/sip_request.py create mode 100644 src/jambonz_sdk/_models/_generated/verbs/stream.py create mode 100644 src/jambonz_sdk/_models/_generated/verbs/tag.py create mode 100644 src/jambonz_sdk/_models/_generated/verbs/transcribe.py create mode 100644 src/jambonz_sdk/_models/_generated/verbs/ultravox_s2s.py create mode 100644 src/jambonz_sdk/_models/_patches/__init__.py create mode 100644 src/jambonz_sdk/_models/_registry.py create mode 100644 src/jambonz_sdk/_models/base.py create mode 100644 src/jambonz_sdk/components/__init__.py create mode 100644 src/jambonz_sdk/verbs/__init__.py diff --git a/.gitignore b/.gitignore index 6911c2a..3672b55 100644 --- a/.gitignore +++ b/.gitignore @@ -13,3 +13,4 @@ htmlcov/ .coverage *.egg .vscode/ +spike/ diff --git a/CLAUDE.md b/CLAUDE.md index 16f9737..ec8f95b 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -17,13 +17,23 @@ jambonz is an open-source Communications Platform as a Service for building voic ``` src/jambonz_sdk/ ├── __init__.py # Public API re-exports +├── verbs/ # Public re-exports of generated verb models (Gather, Say, …) +├── components/ # Public re-exports of generated component models (Recognizer, …) +├── _models/ # Pydantic v2 models +│ ├── base.py # JambonzModel base class (alias/serialization config) +│ ├── _registry.py # JSON verb name → generated model class lookup +│ ├── _generated/ # Auto-generated from JSON Schemas — do not edit +│ │ ├── verbs/ +│ │ ├── components/ +│ │ └── callbacks/ +│ └── _patches/ # Hand-written supplements (currently empty stub) ├── types/ │ ├── __init__.py -│ ├── components.py # Shared types: Synthesizer, Recognizer, Target, ActionHook, etc. -│ ├── verbs.py # All 26+ verb TypedDicts +│ ├── components.py # Legacy TypedDicts (still used internally) +│ ├── verbs.py # Legacy verb TypedDicts │ ├── rest.py # REST API request/response types │ └── session.py # Call session & WebSocket message types -├── verb_builder.py # VerbBuilder — methods auto-generated from JSON Schema +├── verb_builder.py # VerbBuilder — verb methods route through the generated models ├── verb_registry.py # Verb definitions: maps spec entries → Python methods ├── webhook/ │ ├── __init__.py @@ -47,8 +57,8 @@ src/jambonz_sdk/ - **Transport-agnostic verb building**: Same verb methods on both `WebhookResponse` and `Session` - **Fluent/chainable API**: All verb methods return `self` for method chaining -- **TypedDict for verb schemas**: Type-safe verb construction matching JSON schemas exactly -- **Auto-generated verb methods**: VerbBuilder methods are generated at import time from JSON Schema files (`@jambonz/schema`) + `verb_registry.py` — when the schema changes, the SDK automatically picks up new parameters +- **Typed pydantic models**: Every verb and component has a generated pydantic v2 model. Verb methods accept a typed model, a raw dict, or kwargs interchangeably — all three are validated through the model and serialized via `model_dump(mode="json", by_alias=True, exclude_none=True)`. Typos and missing required fields fail at construction time rather than at the jambonz server. Dict and kwargs styles remain for backwards compatibility. +- **Auto-generated verb methods**: VerbBuilder methods are generated at import time from JSON Schema files (`@jambonz/schema`) + `verb_registry.py`. When the schema changes, the SDK picks up new parameters automatically. - **aiohttp for both HTTP and WebSocket**: Single dependency for REST client and WS transport ## Verb System @@ -95,10 +105,32 @@ python scripts/sync_schema.py v0.1.1 # Copy from a local directory python scripts/sync_schema.py --local /path/to/schema + +# After any schema sync, regenerate the pydantic models and the .pyi stubs: +python scripts/regen_models.py +python scripts/generate_stubs.py ``` Source: https://github.com/jambonz/schema +### Pydantic model regeneration + +`scripts/regen_models.py` mirrors the bundled schemas, runs +`datamodel-code-generator`, applies post-gen patches (declared as tables +at the top of the script), then writes: + +- `src/jambonz_sdk/_models/_generated/` — generated classes, committed + as a build artifact so end users don't need codegen tools installed +- `src/jambonz_sdk/verbs/__init__.py` and + `src/jambonz_sdk/components/__init__.py` — user-facing re-exports + +Post-processing performs: (1) `BaseModel` → `JambonzModel`, (2) `AnyUrl` → `str` +(the schemas use `format: uri` even for relative paths like `/menu`), +(3) nested `dict[str, Any]` → real model classes on fields like `Gather.say`, +`Gather.play`, and `Agent.llm`, (4) appending cross-field validators to +specific classes (`Gather` digit-bounds rules). All these live as tables +in the script — extend them there, never edit generated files directly. + ## AI Agent Support ### AGENTS.md diff --git a/README.md b/README.md index 5752142..d48bd82 100644 --- a/README.md +++ b/README.md @@ -31,6 +31,43 @@ app.router.add_post("/incoming", handle_incoming) web.run_app(app, port=3000) ``` +### Typed input (recommended) + +Every verb method also accepts a pydantic model for full IDE autocomplete and +typo-proof nested fields: + +```python +from jambonz_sdk.webhook import WebhookResponse +from jambonz_sdk.verbs import Gather, Say +from jambonz_sdk.components import Recognizer + +jambonz = WebhookResponse() +jambonz.gather(Gather( + input=["speech", "digits"], + action_hook="/menu", + timeout=15, + num_digits=1, + say=Say(text="Press 1 for sales, 2 for support"), + recognizer=Recognizer(vendor="deepgram", language="en-US"), +)).hangup() +``` + +Dict and kwargs styles both still work and coerce automatically, so existing +apps keep running unchanged. Mix and match as you like: + +```python +jambonz.gather( + input=["speech"], + say=Say(text="Hello"), # model + recognizer={"vendor": "google"}, # dict +) +``` + +Unknown fields, missing required fields, wrong types, and violated cross-field +rules (e.g. `numDigits` combined with `minDigits`/`maxDigits`) raise +`pydantic.ValidationError` at construction time — no more hunting silent +failures after a round-trip to the jambonz server. + ### WebSocket ```python @@ -80,14 +117,27 @@ The SDK does **not** hardcode verb method signatures. Instead, verb methods (`.s - Every method has **real typed parameters** (not `**kwargs: Any`) so IDEs show autocomplete and type hints - Verb synonyms (`stream` ↔ `listen`, `openai_s2s` → `llm` with `vendor: "openai"`) are handled by the registry +### Typed pydantic models + +Alongside the schema-driven method signatures, the SDK ships pydantic v2 models +generated from the same JSON Schemas. They live under `jambonz_sdk.verbs` and +`jambonz_sdk.components` and can be passed directly to any verb method. + +These models are a build artifact — produced by `scripts/regen_models.py` +from the bundled schemas — and get checked into the repo so end users don't +need codegen tools to install the SDK. + ### Updating the schema ```bash # Download the pinned version from @jambonz/schema: python scripts/sync_schema.py -# Or copy from a local clone: -python scripts/sync_schema.py --local /path/to/schema +# Regenerate the typed pydantic models so they match: +python scripts/regen_models.py + +# Regenerate the .pyi stubs so IDE autocomplete stays in sync: +python scripts/generate_stubs.py ``` If a **new verb** was added (not just new properties), add one line to `verb_registry.py`: @@ -132,12 +182,14 @@ source .venv/bin/activate pip install -e ".[dev]" # Run tests -pytest tests/unit/ # Fast unit tests (253) -pytest tests/integration/ # Real server tests (26) -pytest # All 279 tests +pytest tests/unit/ # Fast unit tests +pytest tests/integration/ # Real server tests +pytest # All tests -# Sync schema from upstream +# Sync schema from upstream and regenerate pydantic models + stubs python scripts/sync_schema.py +python scripts/regen_models.py +python scripts/generate_stubs.py ``` ## License diff --git a/pyproject.toml b/pyproject.toml index 28c9ea7..00366b0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,6 +25,7 @@ classifiers = [ dependencies = [ "aiohttp>=3.9", "jsonschema>=4.20", + "pydantic>=2.10", "referencing>=0.31", "typing_extensions>=4.0; python_version < '3.11'", ] @@ -43,6 +44,7 @@ dev = [ "aioresponses>=0.7", "ruff>=0.4", "mypy>=1.10", + "datamodel-code-generator[http]>=0.25", ] [tool.hatch.build.targets.wheel] diff --git a/scripts/generate_stubs.py b/scripts/generate_stubs.py index 32cb859..c98cf55 100644 --- a/scripts/generate_stubs.py +++ b/scripts/generate_stubs.py @@ -97,6 +97,11 @@ def generate() -> str: '"""Auto-generated type stubs for VerbBuilder.', "", "DO NOT EDIT — regenerate with: python scripts/generate_stubs.py", + "", + "Each verb method accepts three interchangeable input forms:", + " 1. a positional generated model instance", + " 2. a positional dict payload", + " 3. keyword arguments matching the verb's JSON Schema", '"""', "", "from typing import Any, Self", @@ -118,25 +123,24 @@ def generate() -> str: properties = spec.get("properties", {}) required = set(spec.get("required", [])) - # Build parameter list - params = ["self"] + # First positional-only arg: model or dict. Then kwargs mirroring + # the schema properties for kwargs-style autocomplete. + params = ["self, arg: Any = ..., /"] for prop_name, prop_spec in properties.items(): py_name = "from_" if prop_name == "from" else prop_name py_type = resolve_type(prop_spec) params.append(f"{py_name}: {py_type} = ...") - - # Add **kwargs for forward compatibility params.append("**kwargs: Any") param_str = ",\n ".join(params) - # Build docstring doc_lines = [f' """{verb_def.doc}'] if required: doc_lines.append("") doc_lines.append(f" Required: {', '.join(sorted(required))}") doc_lines.append("") doc_lines.append(" Args:") + doc_lines.append(" arg: a typed model instance or a dict payload (alternative to kwargs).") for prop_name, prop_spec in properties.items(): py_name = "from_" if prop_name == "from" else prop_name py_type = resolve_type(prop_spec) diff --git a/scripts/regen_models.py b/scripts/regen_models.py new file mode 100644 index 0000000..a9dcd04 --- /dev/null +++ b/scripts/regen_models.py @@ -0,0 +1,572 @@ +#!/usr/bin/env python3 +"""Regenerate pydantic v2 models from bundled jambonz JSON Schemas. + +Produces ``src/jambonz_sdk/_models/_generated/`` as a committed build +artifact. Re-run whenever ``src/jambonz_sdk/schema/`` changes (e.g. after +``scripts/sync_schema.py`` pulls a new upstream version). + +Pipeline: + +1. Mirror bundled schemas to a temp dir with bare filenames (no + ``.schema.json`` suffix) so that the generator's ``$ref`` resolver — + which follows the ``$id`` URL relative to the file — finds the target. + Also create aliases for schemas whose ``$id`` uses a colon but whose + filename uses a hyphen (``sip-decline`` → ``sip:decline``, etc). + +2. Run ``datamodel-code-generator`` across the whole mirrored tree. This + produces one Python module per schema with shared types emitted once + and imported where needed. + +3. Apply post-generation patches: + - rewrite ``class Foo(BaseModel):`` → ``class Foo(JambonzModel):`` + so every model inherits the populate-by-name / serialize-by-alias / + extra="forbid" defaults. Generated ``model_config = ConfigDict( + extra="allow")`` overrides on individual classes are preserved + (pydantic v2 merges model_config with the parent). + - rewrite ``AnyUrl`` to ``str``. The schemas use ``format: uri`` on + webhook fields but accept relative paths like ``/menu``, which + ``pydantic.AnyUrl`` rejects. Upstream schemas should switch to + ``format: uri-reference``; until then, relax the type. + - wire nested ``dict[str, Any]`` fields to their proper models + (``Gather.say`` → ``Say``, ``Gather.play`` → ``Play``, + ``Agent.llm`` → ``Llm``). The schemas declare these as + ``additionalProperties: true`` untyped objects but in practice + they follow the corresponding verb's shape. + - record the upstream schema version in ``__jambonz_schema_version__``. + +4. Format the output with ``ruff format``. + +Usage:: + + # Install the codegen toolchain into your dev venv first: + pip install 'datamodel-code-generator[http]' + + # Then regenerate: + python scripts/regen_models.py +""" + +from __future__ import annotations + +import argparse +import re +import shutil +import subprocess +import sys +import tempfile +from pathlib import Path + +ROOT = Path(__file__).resolve().parent.parent +SCHEMA_DIR = ROOT / "src" / "jambonz_sdk" / "schema" +OUTPUT_DIR = ROOT / "src" / "jambonz_sdk" / "_models" / "_generated" +PUBLIC_VERBS_INIT = ROOT / "src" / "jambonz_sdk" / "verbs" / "__init__.py" +PUBLIC_COMPONENTS_INIT = ROOT / "src" / "jambonz_sdk" / "components" / "__init__.py" + +# Schemas whose $id uses a colon (sip:decline) but whose filename on disk +# uses a hyphen (sip-decline). The generator's ref resolver needs the +# colon form to resolve refs like "verbs/sip:decline". +COLON_ID_ALIASES = { + "verbs/sip-decline": "verbs/sip:decline", + "verbs/sip-refer": "verbs/sip:refer", + "verbs/sip-request": "verbs/sip:request", + "verbs/rest_dial": "verbs/rest:dial", +} + +# Nested untyped fields in specific verbs that should reference real +# models. Schema declares these as ``additionalProperties: true`` objects +# but in practice they follow the corresponding verb's shape. +# +# Format: (verb_module, class_name, field_name, target_module, target_class) +NESTED_OVERRIDES: list[tuple[str, str, str, str, str]] = [ + ("verbs/gather.py", "Gather", "say", ".say", "Say"), + ("verbs/gather.py", "Gather", "play", ".play", "Play"), + ("verbs/agent.py", "Agent", "llm", ".llm", "LLM"), +] + +# Nested classes the re-export heuristic wouldn't pick up (because they +# live alongside a primary class in the same file) but that users need +# typed access to. Each entry is promoted into the public +# ``jambonz_sdk.components`` module so it imports alongside the other +# component types. +# +# Format: (module_path_relative_to_generated, class_name) +PROMOTE_TO_COMPONENTS: list[tuple[str, str]] = [ + ("_generated.verbs.agent", "McpServer"), + ("_generated.verbs.agent", "BargeIn"), +] + + +# Cross-field validators appended to specific generated classes after +# codegen. These express rules that don't fit cleanly into JSON Schema. +# Each entry: (module_path, class_name, imports, validator_body). +# The validator body is appended verbatim into the class body; it must +# use ``self`` and return ``self``. +CLASS_VALIDATORS: list[tuple[str, str, str, str]] = [ + ( + "verbs/gather.py", + "Gather", + "from pydantic import model_validator", + ''' + @model_validator(mode="after") + def _check_digit_bounds(self) -> "Gather": + """``numDigits`` is mutually exclusive with ``min/maxDigits``.""" + if self.num_digits is not None and ( + self.min_digits is not None or self.max_digits is not None + ): + raise ValueError( + "numDigits cannot be combined with minDigits or maxDigits" + ) + if ( + self.min_digits is not None + and self.max_digits is not None + and self.min_digits > self.max_digits + ): + raise ValueError("minDigits cannot exceed maxDigits") + return self +''', + ), +] + + +def mirror_schemas(mirror_dir: Path) -> None: + """Copy ``*.schema.json`` files into ``mirror_dir`` with bare names.""" + schema_dir = mirror_dir / "schema" + for subdir in ("verbs", "components", "callbacks"): + src_sub = SCHEMA_DIR / subdir + if not src_sub.is_dir(): + continue + dst_sub = schema_dir / subdir + dst_sub.mkdir(parents=True, exist_ok=True) + for src in src_sub.glob("*.schema.json"): + bare_name = src.name[: -len(".schema.json")] + shutil.copy2(src, dst_sub / bare_name) + + # Root app schema + root = SCHEMA_DIR / "jambonz-app.schema.json" + if root.is_file(): + shutil.copy2(root, schema_dir / "jambonz-app") + + # Colon-named aliases for sip:* and rest:dial + for src_rel, dst_rel in COLON_ID_ALIASES.items(): + src = schema_dir / src_rel + if src.is_file(): + shutil.copy2(src, schema_dir / dst_rel) + + +def read_schema_version() -> str: + """Read the pinned schema version from ``scripts/sync_schema.py``.""" + sync_script = ROOT / "scripts" / "sync_schema.py" + for line in sync_script.read_text().splitlines(): + m = re.match(r'SCHEMA_VERSION\s*=\s*"([^"]+)"', line) + if m: + return m.group(1) + return "unknown" + + +def run_codegen(input_dir: Path, output_dir: Path) -> None: + """Invoke ``datamodel-codegen`` across the mirrored schema tree.""" + if output_dir.exists(): + shutil.rmtree(output_dir) + cmd = [ + "datamodel-codegen", + "--input", str(input_dir), + "--input-file-type", "jsonschema", + "--output", str(output_dir), + "--output-model-type", "pydantic_v2.BaseModel", + "--use-standard-collections", + "--use-union-operator", + "--target-python-version", "3.10", + "--snake-case-field", + "--use-schema-description", + "--use-field-description", + "--use-double-quotes", + "--reuse-model", + ] + result = subprocess.run(cmd, capture_output=True, text=True) + if result.returncode != 0: + print(result.stdout, file=sys.stderr) + print(result.stderr, file=sys.stderr) + raise SystemExit(f"datamodel-codegen failed with exit code {result.returncode}") + + +# Match lines like ``class Foo(BaseModel):`` or ``class Foo(BaseModel, ...):``. +_BASEMODEL_CLASS_RE = re.compile(r"^(class\s+\w+\()BaseModel(\s*[),])", re.MULTILINE) + + +def rewrite_basemodel_to_jambonz(source: str) -> str: + """Replace ``BaseModel`` base class with ``JambonzModel`` and add import.""" + new_source, n = _BASEMODEL_CLASS_RE.subn(r"\1JambonzModel\2", source) + if n == 0: + return source + + # Drop ``BaseModel`` from the pydantic import line (keep other names). + new_source = re.sub( + r"^from pydantic import (.+)$", + lambda m: _rewrite_pydantic_import(m.group(1), drop={"BaseModel"}), + new_source, + count=1, + flags=re.MULTILINE, + ) + + # Work out the relative import path to jambonz_sdk._models.base. + # Every generated file lives under _generated/ (optionally inside a + # subdirectory like verbs/ or components/). From _generated/* we go + # up one level to _models, then into base. + # Insert the import just after the last existing ``from`` line so + # ruff-format leaves it alone. + lines = new_source.splitlines(keepends=True) + import_line = "from jambonz_sdk._models.base import JambonzModel\n" + insert_at = 0 + for i, line in enumerate(lines): + if line.startswith(("from ", "import ")): + insert_at = i + 1 + # Preserve a blank line between imports and code if present. + lines.insert(insert_at, import_line) + return "".join(lines) + + +def _rewrite_pydantic_import(import_list: str, drop: set[str]) -> str: + names = [n.strip() for n in import_list.split(",") if n.strip()] + names = [n for n in names if n not in drop] + if not names: + return "" # caller's regex replaces the whole line; we emit empty + return "from pydantic import " + ", ".join(names) + + +def rewrite_field_default_kwarg(source: str) -> str: + """Rewrite ``Field(None, ...)`` → ``Field(default=None, ...)``. + + Pylance's dataclass_transform pass treats the *keyword* ``default=`` as + the authoritative default for optional-ness. When the generator emits + ``Field(None, alias="x")`` Pylance can't always tell the field has a + default and flags it as missing at call sites like + ``Synthesizer(vendor="aws")``. Naming the kwarg fixes this without + changing runtime behavior (pydantic accepts either form). + + Rewrites literal defaults (``None``, ``True``, ``False``, numbers, + quoted strings) and the required-sentinel ``...``. Leaves complex + expressions alone. + """ + # Capture ``Field(`` followed by a literal default, followed by a + # comma or closing paren. Do NOT rewrite the required sentinel + # ``Field(...)`` — Pylance recognizes that as a no-default required + # field, but treats ``Field(default=...)`` as optional. Optional + # defaults (None, True, False, numbers, strings) are the ones that + # need the explicit kwarg to be seen as having a default. + default_literal = r"(?:None|True|False|-?\d+(?:\.\d+)?|\"[^\"]*\"|'[^']*')" + pattern = re.compile(rf"\bField\(\s*({default_literal})(\s*[,)])") + return pattern.sub(r"Field(default=\1\2", source) + + +def rewrite_anyurl_to_str(source: str) -> str: + """Replace ``AnyUrl`` uses with ``str`` and drop the import.""" + if "AnyUrl" not in source: + return source + source = re.sub(r"\bAnyUrl\b", "str", source) + # Clean up the import — AnyUrl became "str" literally in the import + # list, which isn't valid; drop it. + source = re.sub( + r"^from pydantic import (.+)$", + lambda m: _rewrite_pydantic_import(m.group(1), drop={"str"}), + source, + count=1, + flags=re.MULTILINE, + ) + # Collapse any blank "from pydantic import" lines produced above. + source = re.sub(r"^from pydantic import\s*\n", "", source, flags=re.MULTILINE) + return source + + +def apply_class_validators(output_dir: Path) -> None: + """Append cross-field validator bodies to specific generated classes.""" + for rel_path, class_name, import_line, body in CLASS_VALIDATORS: + path = output_dir / rel_path + if not path.is_file(): + print(f" warning: validator target {rel_path} missing — skipping") + continue + source = path.read_text() + + if import_line and import_line not in source: + lines = source.splitlines(keepends=True) + insert_at = 0 + for i, line in enumerate(lines): + if line.startswith(("from ", "import ")): + insert_at = i + 1 + lines.insert(insert_at, import_line + "\n") + source = "".join(lines) + + # Append the validator body at the end of the target class. The + # class body runs to the end of file or the next top-level class. + class_start = source.find(f"class {class_name}(") + if class_start < 0: + print(f" warning: class {class_name} not found in {rel_path}") + continue + next_class = re.search( + r"^class\s+\w+\(", source[class_start + 1:], re.MULTILINE + ) + class_end = ( + class_start + 1 + next_class.start() if next_class else len(source) + ) + # Strip trailing whitespace/newlines off the class body before + # appending so ruff-format produces clean output. + class_body = source[class_start:class_end].rstrip() + source = source[:class_start] + class_body + body + source[class_end:] + path.write_text(source) + + +def apply_nested_overrides(output_dir: Path) -> None: + """Rewrite specific ``dict[str, Any]`` fields to reference real models.""" + for rel_path, class_name, field_name, target_module, target_class in NESTED_OVERRIDES: + path = output_dir / rel_path + if not path.is_file(): + print(f" warning: nested-override target {rel_path} missing — skipping") + continue + source = path.read_text() + + # Find the target class body and rewrite the field annotation in place. + # Pattern matches either "field: dict[str, Any]" or + # "field: dict[str, Any] | None = ...". + field_re = re.compile( + rf"(^\s*{field_name}:\s*)dict\[str,\s*Any\](.*)$", + re.MULTILINE, + ) + class_start = source.find(f"class {class_name}(") + if class_start < 0: + print(f" warning: class {class_name} not found in {rel_path}") + continue + # Apply the regex only to the class body (from class_start to end of file + # or next top-level class). + next_class = re.search(r"^class\s+\w+\(", source[class_start + 1:], re.MULTILINE) + class_end = class_start + 1 + (next_class.start() if next_class else len(source)) + + head, body, tail = source[:class_start], source[class_start:class_end], source[class_end:] + new_body, n = field_re.subn(rf"\g<1>{target_class}\g<2>", body) + if n == 0: + print(f" warning: field {class_name}.{field_name} not found to override") + continue + + # Add the import at the module top (before the first class definition). + import_line = f"from {target_module} import {target_class}\n" + if import_line not in head: + # Insert after the last existing import line in head. + head_lines = head.splitlines(keepends=True) + insert_at = 0 + for i, line in enumerate(head_lines): + if line.startswith(("from ", "import ")): + insert_at = i + 1 + head_lines.insert(insert_at, import_line) + head = "".join(head_lines) + + path.write_text(head + new_body + tail) + + +def postprocess_file(path: Path) -> None: + source = path.read_text() + source = rewrite_anyurl_to_str(source) + source = rewrite_basemodel_to_jambonz(source) + source = rewrite_field_default_kwarg(source) + # Drop any now-empty ``from pydantic import`` lines produced by the + # rewrites above (happens when BaseModel or AnyUrl was the sole import). + source = re.sub(r"^from pydantic import\s*\n", "", source, flags=re.MULTILINE) + path.write_text(source) + + +def write_public_reexports(output_dir: Path) -> None: + """Emit user-facing ``jambonz_sdk.verbs`` / ``.components`` packages. + + - ``verbs/`` re-exports the one top-level class per file in + ``_generated/verbs/`` that is named after the verb (e.g. ``Gather``, + ``OpenaiS2S``). Files with numeric-suffixed internal class names + (``TurnDetection1``) skip those since they're not meant to be a + user-facing API. + - ``components/`` re-exports the one top-level class per file in + ``_generated/components/`` named after the schema (e.g. + ``Recognizer``). Per-verb helper types (``Auth``, ``Method``) do + not make it into the public surface because their names aren't + globally unique. + """ + # Regex finds top-level classes with their first base class name. + class_re = re.compile(r"^class\s+(\w+)\(([\w\.]+)", re.MULTILINE) + + def _primary_class_of(path: Path, source: str) -> str | None: + """Pick the one class in a module intended as the public surface. + + Prefers a class whose name matches the file's stem in CapWords + (``actionHook`` → ``ActionHook``, ``sip_decline`` → ``SipDecline``). + Falls back to the last model-class declared in the file. + """ + stem = path.stem + # Collect every model/root class declared in the module. + keep_bases = ("JambonzModel", "LlmBaseProperties") + classes: list[str] = [] + for name, base in class_re.findall(source): + if base in keep_bases or base.startswith("RootModel"): + classes.append(name) + if not classes: + return None + + cap_variants = { + _to_cap(stem), + _to_cap(stem).upper(), + stem.upper(), + stem.replace("_", "").upper(), + } + for name in classes: + if name in cap_variants: + return name + # Fall back to the last class; the generator puts the outer + # public class after its inner helpers. + return classes[-1] + + verb_lines: list[tuple[str, str]] = [] + component_lines: list[tuple[str, str]] = [] + + verbs_dir = output_dir / "verbs" + for path in sorted(verbs_dir.glob("*.py")): + if path.name == "__init__.py": + continue + source = path.read_text() + primary = _primary_class_of(path, source) + if not primary: + continue + module_dotted = f"jambonz_sdk._models._generated.verbs.{path.stem}" + verb_lines.append((primary, module_dotted)) + + components_dir = output_dir / "components" + for path in sorted(components_dir.glob("*.py")): + if path.name == "__init__.py": + continue + source = path.read_text() + primary = _primary_class_of(path, source) + if not primary: + continue + module_dotted = f"jambonz_sdk._models._generated.components.{path.stem}" + component_lines.append((primary, module_dotted)) + + # Explicit promotions for nested helper classes users need typed access + # to (e.g. ``McpServer`` inside ``verbs/agent.py``). + for rel_module, class_name in PROMOTE_TO_COMPONENTS: + module_dotted = f"jambonz_sdk._models.{rel_module}" + component_lines.append((class_name, module_dotted)) + + PUBLIC_VERBS_INIT.parent.mkdir(parents=True, exist_ok=True) + PUBLIC_VERBS_INIT.write_text(_render_reexport_module("verbs", verb_lines)) + + PUBLIC_COMPONENTS_INIT.parent.mkdir(parents=True, exist_ok=True) + PUBLIC_COMPONENTS_INIT.write_text(_render_reexport_module("components", component_lines)) + + +def _to_cap(stem: str) -> str: + """Convert a snake/kebab module stem into CapWords form.""" + parts = re.split(r"[_\-]+", stem) + return "".join(p[:1].upper() + p[1:] for p in parts if p) + + +def _render_reexport_module(kind_label: str, exports: list[tuple[str, str]]) -> str: + seen: dict[str, str] = {} + for cls, mod in exports: + seen.setdefault(cls, mod) + lines = [ + f'"""Public re-exports of generated jambonz {kind_label} models.', + "", + "Auto-generated by ``scripts/regen_models.py`` — do not edit by hand.", + "", + "Typical usage::", + "", + f" from jambonz_sdk.{kind_label} import Gather, Say", + '"""', + "", + "from __future__ import annotations", + "", + ] + for cls in sorted(seen): + lines.append(f"from {seen[cls]} import {cls}") + lines.append("") + lines.append("__all__ = [") + for cls in sorted(seen): + lines.append(f' "{cls}",') + lines.append("]") + lines.append("") + return "\n".join(lines) + + +def write_schema_version(output_dir: Path, version: str) -> None: + init = output_dir / "__init__.py" + if not init.is_file(): + return + text = init.read_text() + marker = f'__jambonz_schema_version__ = "{version}"\n' + if "__jambonz_schema_version__" in text: + text = re.sub( + r'__jambonz_schema_version__\s*=\s*".*?"\n', + marker, + text, + ) + else: + text = marker + text + init.write_text(text) + + +def ruff_format(output_dir: Path) -> None: + subprocess.run( + ["ruff", "format", str(output_dir)], + check=False, + capture_output=True, + ) + + +def main() -> None: + parser = argparse.ArgumentParser(description="Regenerate pydantic models from JSON Schemas.") + parser.add_argument( + "--keep-mirror", + action="store_true", + help="Keep the temp mirror dir for debugging (normally cleaned up).", + ) + args = parser.parse_args() + + version = read_schema_version() + print(f"Regenerating models for schema version {version}") + + tmp = Path(tempfile.mkdtemp(prefix="jambonz-schema-mirror-")) + try: + mirror_schemas(tmp) + print(f" mirrored schemas → {tmp / 'schema'}") + + run_codegen(tmp / "schema", OUTPUT_DIR) + print(f" generated → {OUTPUT_DIR.relative_to(ROOT)}") + + # Every .py file gets BaseModel → JambonzModel and AnyUrl → str. + py_files = list(OUTPUT_DIR.rglob("*.py")) + for path in py_files: + postprocess_file(path) + print(f" post-processed {len(py_files)} files") + + apply_nested_overrides(OUTPUT_DIR) + print(" applied nested-object overrides") + + apply_class_validators(OUTPUT_DIR) + print(" applied cross-field validators") + + write_schema_version(OUTPUT_DIR, version) + + write_public_reexports(OUTPUT_DIR) + print( + f" wrote public re-exports → " + f"{PUBLIC_VERBS_INIT.relative_to(ROOT)}, " + f"{PUBLIC_COMPONENTS_INIT.relative_to(ROOT)}" + ) + + ruff_format(OUTPUT_DIR) + ruff_format(PUBLIC_VERBS_INIT.parent) + ruff_format(PUBLIC_COMPONENTS_INIT.parent) + print(" formatted with ruff") + finally: + if args.keep_mirror: + print(f" (kept mirror at {tmp})") + else: + shutil.rmtree(tmp, ignore_errors=True) + + print("Done.") + + +if __name__ == "__main__": + main() diff --git a/src/jambonz_sdk/_models/__init__.py b/src/jambonz_sdk/_models/__init__.py new file mode 100644 index 0000000..2662c91 --- /dev/null +++ b/src/jambonz_sdk/_models/__init__.py @@ -0,0 +1,24 @@ +"""Pydantic v2 models for jambonz verbs and components. + +This package contains: + +- ``base.JambonzModel`` — hand-written base class with alias/serialization + config inherited by every generated model. +- ``_generated/`` — models generated from @jambonz/schema JSON Schema files + by ``scripts/regen_models.py``. Committed to the repo but never edited by + hand. +- ``_patches/`` — small hand-written supplements (cross-field validators, + nested-override tables, etc.) applied either by the regen script or + imported directly. + +Users typically import the public names re-exported at the top level:: + + from jambonz_sdk.verbs import Gather, Say + from jambonz_sdk.components import Recognizer, Synthesizer +""" + +from __future__ import annotations + +from jambonz_sdk._models.base import JambonzModel + +__all__ = ["JambonzModel"] diff --git a/src/jambonz_sdk/_models/_generated/__init__.py b/src/jambonz_sdk/_models/_generated/__init__.py new file mode 100644 index 0000000..7b2868e --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/__init__.py @@ -0,0 +1,4 @@ +__jambonz_schema_version__ = "v0.2.1" +# generated by datamodel-codegen: +# filename: schema +# timestamp: 2026-04-22T10:54:53+00:00 diff --git a/src/jambonz_sdk/_models/_generated/callbacks/__init__.py b/src/jambonz_sdk/_models/_generated/callbacks/__init__.py new file mode 100644 index 0000000..1a9201e --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/callbacks/__init__.py @@ -0,0 +1,3 @@ +# generated by datamodel-codegen: +# filename: schema +# timestamp: 2026-04-22T10:54:53+00:00 diff --git a/src/jambonz_sdk/_models/_generated/callbacks/agent_turn.py b/src/jambonz_sdk/_models/_generated/callbacks/agent_turn.py new file mode 100644 index 0000000..f41b935 --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/callbacks/agent_turn.py @@ -0,0 +1,168 @@ +# generated by datamodel-codegen: +# filename: callbacks/agent-turn +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from enum import Enum +from typing import Literal + +from pydantic import ConfigDict, Field, RootModel +from jambonz_sdk._models.base import JambonzModel + + +class Result(Enum): + """ + Whether the speculative preflight transcript matched the final transcript ('hit'), did not match ('miss'), or was still in progress ('pending'). + """ + + hit = "hit" + miss = "miss" + pending = "pending" + + +class Preflight(JambonzModel): + """ + Early generation (preflight) metrics. Only present when earlyGeneration is enabled. + """ + + model_config = ConfigDict( + extra="forbid", + ) + result: Result | None = None + """ + Whether the speculative preflight transcript matched the final transcript ('hit'), did not match ('miss'), or was still in progress ('pending'). + """ + tokens: int | None = None + """ + Number of preflight tokens that were buffered. Only present on a 'hit'. + """ + + +class Latency(JambonzModel): + """ + Latency metrics for this turn, all in milliseconds. Fields are absent when not applicable. + """ + + model_config = ConfigDict( + extra="forbid", + ) + transcriber_latency: int | None = None + """ + STT processing latency: time from user stops talking until final transcript received, in milliseconds. + """ + turn_detection_latency: int | None = None + """ + Additional wait after final transcript for end-of-turn detection, in milliseconds. Absent when EOT fires before or with the transcript. + """ + model_latency: int | None = None + """ + LLM latency: time spent waiting for the first LLM token after the system is ready to prompt, in milliseconds. Absent on a preflight hit. + """ + voice_latency: int | None = None + """ + TTS engine latency: time from first text sent to the TTS engine until first audio received back, in milliseconds. + """ + preflight: Preflight | None = None + """ + Early generation (preflight) metrics. Only present when earlyGeneration is enabled. + """ + + +class AgentEventhookEvents1(JambonzModel): + """ + Events sent to the agent verb's eventHook during a conversation. These are sent as 'agent:event' messages over the WebSocket connection. + """ + + model_config = ConfigDict( + extra="forbid", + ) + type: Literal["turn_end"] + """ + Sent at the end of each conversational turn. + """ + transcript: str + """ + The user's final speech-to-text transcript for this turn. + """ + response: str + """ + The assistant's text response for this turn. May be trimmed to what was actually spoken if the turn was interrupted and alignment tracking is enabled. + """ + interrupted: bool + """ + True if the user interrupted the assistant before it finished speaking. + """ + latency: Latency + """ + Latency metrics for this turn, all in milliseconds. Fields are absent when not applicable. + """ + + +class AgentEventhookEvents2(JambonzModel): + """ + Events sent to the agent verb's eventHook during a conversation. These are sent as 'agent:event' messages over the WebSocket connection. + """ + + model_config = ConfigDict( + extra="forbid", + ) + type: Literal["user_transcript"] + """ + Sent when the user's final transcript is available and the system is proceeding to prompt the LLM. This indicates the end of the user's speech input for the current turn. + """ + transcript: str + """ + The user's final speech-to-text transcript. + """ + + +class AgentEventhookEvents3(JambonzModel): + """ + Events sent to the agent verb's eventHook during a conversation. These are sent as 'agent:event' messages over the WebSocket connection. + """ + + model_config = ConfigDict( + extra="forbid", + ) + type: Literal["llm_response"] + """ + Sent when the LLM has finished generating its response for the current turn. Contains the complete response text. + """ + response: str + """ + The assistant's complete text response. + """ + + +class AgentEventhookEvents4(JambonzModel): + """ + Events sent to the agent verb's eventHook during a conversation. These are sent as 'agent:event' messages over the WebSocket connection. + """ + + model_config = ConfigDict( + extra="forbid", + ) + type: Literal["user_interruption"] + """ + Sent when the user barges in (interrupts) while the assistant is speaking. This event has no additional data. + """ + + +class AgentEventhookEvents( + RootModel[ + AgentEventhookEvents1 + | AgentEventhookEvents2 + | AgentEventhookEvents3 + | AgentEventhookEvents4 + ] +): + root: ( + AgentEventhookEvents1 + | AgentEventhookEvents2 + | AgentEventhookEvents3 + | AgentEventhookEvents4 + ) = Field(..., title="Agent EventHook Events") + """ + Events sent to the agent verb's eventHook during a conversation. These are sent as 'agent:event' messages over the WebSocket connection. + """ diff --git a/src/jambonz_sdk/_models/_generated/callbacks/amd.py b/src/jambonz_sdk/_models/_generated/callbacks/amd.py new file mode 100644 index 0000000..325ff25 --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/callbacks/amd.py @@ -0,0 +1,60 @@ +# generated by datamodel-codegen: +# filename: callbacks/amd +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from enum import Enum + +from .base import ActionhookBasePayload + + +class Type(Enum): + """ + The AMD event type. IMPORTANT: This field is 'type', NOT 'amd_type'. + """ + + amd_human_detected = "amd_human_detected" + amd_machine_detected = "amd_machine_detected" + amd_no_speech_detected = "amd_no_speech_detected" + amd_decision_timeout = "amd_decision_timeout" + amd_machine_stopped_speaking = "amd_machine_stopped_speaking" + amd_tone_detected = "amd_tone_detected" + amd_tone_timeout = "amd_tone_timeout" + amd_error = "amd_error" + amd_stopped = "amd_stopped" + + +class AmdActionhookPayload(ActionhookBasePayload): + """ + Payload sent to the AMD actionHook when an answering machine detection event occurs. Multiple events may fire during a single call (e.g. amd_machine_detected followed by amd_machine_stopped_speaking or amd_tone_detected). + """ + + type: Type + """ + The AMD event type. IMPORTANT: This field is 'type', NOT 'amd_type'. + """ + reason: str | None = None + """ + Reason for the detection result (e.g. 'short greeting', 'long greeting', 'hint', 'digit count'). Present on amd_human_detected and amd_machine_detected events. + """ + greeting: str | None = None + """ + The transcribed greeting text. Present on amd_human_detected and amd_machine_detected events. + """ + hint: str | None = None + """ + The voicemail hint that matched, if detection was triggered by hint matching. + """ + language: str | None = None + """ + Language code from the transcription (e.g. 'en-US'). Present on amd_human_detected and amd_machine_detected events. + """ + frequency: float | None = None + """ + Frequency of the detected beep in Hz. Present on amd_tone_detected events. + """ + variance: float | None = None + """ + Frequency variance of the detected beep. Present on amd_tone_detected events. + """ diff --git a/src/jambonz_sdk/_models/_generated/callbacks/base.py b/src/jambonz_sdk/_models/_generated/callbacks/base.py new file mode 100644 index 0000000..ab27a42 --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/callbacks/base.py @@ -0,0 +1,105 @@ +# generated by datamodel-codegen: +# filename: callbacks/base +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from enum import Enum + +from pydantic import ConfigDict, Field +from jambonz_sdk._models.base import JambonzModel + + +class Direction(Enum): + """ + Call direction. + """ + + inbound = "inbound" + outbound = "outbound" + + +class CallStatus(Enum): + """ + Current call state. + """ + + trying = "trying" + ringing = "ringing" + early_media = "early-media" + in_progress = "in-progress" + completed = "completed" + failed = "failed" + busy = "busy" + no_answer = "no-answer" + queued = "queued" + + +class ActionhookBasePayload(JambonzModel): + """ + Common fields present in every actionHook callback payload. All verb-specific callback schemas extend this base. + """ + + model_config = ConfigDict( + extra="allow", + ) + call_sid: str | None = None + """ + Unique identifier for this call. + """ + account_sid: str | None = None + """ + Account identifier. + """ + application_sid: str | None = None + """ + Application identifier. + """ + direction: Direction | None = None + """ + Call direction. + """ + from_: str | None = Field(default=None, alias="from") + """ + Caller phone number or SIP URI. + """ + to: str | None = None + """ + Called phone number or SIP URI. + """ + call_id: str | None = None + """ + SIP Call-ID. + """ + sbc_callid: str | None = None + """ + SBC-level Call-ID. + """ + call_status: CallStatus | None = None + """ + Current call state. + """ + sip_status: int | None = None + """ + SIP response code (e.g. 200, 486). + """ + sip_reason: str | None = None + """ + SIP reason phrase (e.g. 'OK', 'Busy Here'). + """ + trace_id: str | None = None + """ + Distributed tracing identifier for correlating logs across jambonz components. + """ + originating_sip_ip: str | None = None + """ + IP address of the originating SIP trunk. + """ + originating_sip_trunk_name: str | None = None + """ + Name of the originating SIP trunk as configured in jambonz. + """ + api_base_url: str | None = None + """ + jambonz REST API base URL. Use this for mid-call control via the REST API. + """ diff --git a/src/jambonz_sdk/_models/_generated/callbacks/call_status.py b/src/jambonz_sdk/_models/_generated/callbacks/call_status.py new file mode 100644 index 0000000..995a2fb --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/callbacks/call_status.py @@ -0,0 +1,38 @@ +# generated by datamodel-codegen: +# filename: callbacks/call-status +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from enum import Enum + +from pydantic import ConfigDict + +from .base import ActionhookBasePayload + + +class CallTerminationBy(Enum): + """ + Who terminated the call. 'caller' if the remote party hung up, 'jambonz' if the call was ended by the application (e.g. hangup verb, REST API). Present only on the final status event (completed/failed). + """ + + caller = "caller" + jambonz = "jambonz" + + +class CallStatusWebhookPayload(ActionhookBasePayload): + """ + Payload sent to the call status webhook URL whenever the call state changes (e.g. trying, in-progress, completed). The status webhook is configured at the application level in jambonz. Multiple status events are sent over the life of a call. The final event (completed or failed) includes additional fields like duration and termination cause. + """ + + model_config = ConfigDict( + extra="allow", + ) + call_termination_by: CallTerminationBy | None = None + """ + Who terminated the call. 'caller' if the remote party hung up, 'jambonz' if the call was ended by the application (e.g. hangup verb, REST API). Present only on the final status event (completed/failed). + """ + duration: int | None = None + """ + Call duration in seconds. Present only on the final status event (completed/failed). + """ diff --git a/src/jambonz_sdk/_models/_generated/callbacks/conference.py b/src/jambonz_sdk/_models/_generated/callbacks/conference.py new file mode 100644 index 0000000..007d59a --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/callbacks/conference.py @@ -0,0 +1,19 @@ +# generated by datamodel-codegen: +# filename: callbacks/conference +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from pydantic import ConfigDict + +from .base import ActionhookBasePayload + + +class ConferenceActionhookPayload(ActionhookBasePayload): + """ + Payload sent to the conference verb's actionHook when the participant leaves or the conference ends. Contains only the base call info. + """ + + model_config = ConfigDict( + extra="allow", + ) diff --git a/src/jambonz_sdk/_models/_generated/callbacks/conference_status.py b/src/jambonz_sdk/_models/_generated/callbacks/conference_status.py new file mode 100644 index 0000000..c1e6293 --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/callbacks/conference_status.py @@ -0,0 +1,58 @@ +# generated by datamodel-codegen: +# filename: callbacks/conference-status +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from enum import Enum + +from pydantic import AwareDatetime, ConfigDict + +from .base import ActionhookBasePayload + + +class Event(Enum): + """ + The conference event that occurred. + """ + + start = "start" + end = "end" + join = "join" + leave = "leave" + start_talking = "start-talking" + stop_talking = "stop-talking" + + +class ConferenceStatushookPayload(ActionhookBasePayload): + """ + Payload sent to the conference statusHook when a conference event occurs. + """ + + model_config = ConfigDict( + extra="allow", + ) + event: Event + """ + The conference event that occurred. + """ + conference_sid: str + """ + Conference identifier (format: conf::). + """ + friendly_name: str | None = None + """ + The conference name as specified in the conference verb. + """ + duration: float | None = None + """ + Time in seconds since the conference started. + """ + members: int | None = None + """ + Current number of participants in the conference. + """ + time: AwareDatetime | None = None + """ + ISO 8601 timestamp of when the event occurred. + """ diff --git a/src/jambonz_sdk/_models/_generated/callbacks/conference_wait.py b/src/jambonz_sdk/_models/_generated/callbacks/conference_wait.py new file mode 100644 index 0000000..9473c6e --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/callbacks/conference_wait.py @@ -0,0 +1,19 @@ +# generated by datamodel-codegen: +# filename: callbacks/conference-wait +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from pydantic import ConfigDict + +from .base import ActionhookBasePayload + + +class ConferenceWaithookPayload(ActionhookBasePayload): + """ + Payload sent to the conference waitHook while a participant is waiting for the conference to start. The response should contain an array of say, play, and/or pause verbs to play while waiting. + """ + + model_config = ConfigDict( + extra="allow", + ) diff --git a/src/jambonz_sdk/_models/_generated/callbacks/dequeue.py b/src/jambonz_sdk/_models/_generated/callbacks/dequeue.py new file mode 100644 index 0000000..4a51fc6 --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/callbacks/dequeue.py @@ -0,0 +1,35 @@ +# generated by datamodel-codegen: +# filename: callbacks/dequeue +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from enum import Enum + +from pydantic import ConfigDict + +from .base import ActionhookBasePayload + + +class DequeueResult(Enum): + """ + Outcome of the dequeue. 'complete' — successfully dequeued and bridged; 'timeout' — no queued caller found; 'hangup' — call hung up during bridge. + """ + + complete = "complete" + timeout = "timeout" + hangup = "hangup" + + +class DequeueActionhookPayload(ActionhookBasePayload): + """ + Payload sent to the dequeue verb's actionHook when the dequeue operation completes. + """ + + model_config = ConfigDict( + extra="allow", + ) + dequeue_result: DequeueResult + """ + Outcome of the dequeue. 'complete' — successfully dequeued and bridged; 'timeout' — no queued caller found; 'hangup' — call hung up during bridge. + """ diff --git a/src/jambonz_sdk/_models/_generated/callbacks/dial.py b/src/jambonz_sdk/_models/_generated/callbacks/dial.py new file mode 100644 index 0000000..db0145f --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/callbacks/dial.py @@ -0,0 +1,52 @@ +# generated by datamodel-codegen: +# filename: callbacks/dial +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from enum import Enum + +from pydantic import ConfigDict, Field + +from .base import ActionhookBasePayload + + +class DialCallStatus(Enum): + """ + Final status of the outbound (dialed) call leg. + """ + + completed = "completed" + failed = "failed" + busy = "busy" + no_answer = "no-answer" + trying = "trying" + ringing = "ringing" + early_media = "early-media" + in_progress = "in-progress" + + +class DialActionhookPayload(ActionhookBasePayload): + """ + Payload sent to the actionHook when a dial verb completes, either because the dialed party hung up, the call was not answered, or an error occurred. + """ + + model_config = ConfigDict( + extra="allow", + ) + dial_call_status: DialCallStatus | None = None + """ + Final status of the outbound (dialed) call leg. + """ + dial_sip_status: int | None = Field(default=None, examples=[200, 486, 487]) + """ + SIP response code from the dialed party (e.g. 200, 486, 487). + """ + dial_call_sid: str | None = None + """ + Call SID of the outbound (dialed) call leg. + """ + dial_sbc_callid: str | None = None + """ + SBC-level Call-ID for the outbound (dialed) call leg. + """ diff --git a/src/jambonz_sdk/_models/_generated/callbacks/dial_dtmf.py b/src/jambonz_sdk/_models/_generated/callbacks/dial_dtmf.py new file mode 100644 index 0000000..00ad31c --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/callbacks/dial_dtmf.py @@ -0,0 +1,23 @@ +# generated by datamodel-codegen: +# filename: callbacks/dial-dtmf +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from pydantic import ConfigDict + +from .base import ActionhookBasePayload + + +class DialDtmfhookPayload(ActionhookBasePayload): + """ + Payload sent to the dial dtmfHook when DTMF digits matching the configured pattern are detected during a dial. + """ + + model_config = ConfigDict( + extra="allow", + ) + dtmf: str + """ + The DTMF digit sequence that matched the configured dtmfCapture pattern. + """ diff --git a/src/jambonz_sdk/_models/_generated/callbacks/dial_hold.py b/src/jambonz_sdk/_models/_generated/callbacks/dial_hold.py new file mode 100644 index 0000000..f5cb87b --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/callbacks/dial_hold.py @@ -0,0 +1,39 @@ +# generated by datamodel-codegen: +# filename: callbacks/dial-hold +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from pydantic import ConfigDict, Field + +from .base import ActionhookBasePayload +from jambonz_sdk._models.base import JambonzModel + + +class HoldDetail(JambonzModel): + """ + Details of the hold event. + """ + + from_: str | None = Field(default=None, alias="from") + """ + SIP From header value. + """ + to: str | None = None + """ + SIP To header value. + """ + + +class DialOnholdhookPayload(ActionhookBasePayload): + """ + Payload sent to the dial onHoldHook when the remote party places the call on hold. The response should contain say, play, and/or pause verbs to play as hold music. The hook is called repeatedly until the hold ends. + """ + + model_config = ConfigDict( + extra="allow", + ) + hold_detail: HoldDetail + """ + Details of the hold event. + """ diff --git a/src/jambonz_sdk/_models/_generated/callbacks/dial_refer.py b/src/jambonz_sdk/_models/_generated/callbacks/dial_refer.py new file mode 100644 index 0000000..588e5e1 --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/callbacks/dial_refer.py @@ -0,0 +1,62 @@ +# generated by datamodel-codegen: +# filename: callbacks/dial-refer +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from pydantic import ConfigDict + +from .base import ActionhookBasePayload +from jambonz_sdk._models.base import JambonzModel + + +class ReferDetails(JambonzModel): + """ + Details of the SIP REFER request. Any custom X-* headers on the REFER are also included as snake_cased properties (e.g. X-Override-Number becomes x_override_number). + """ + + model_config = ConfigDict( + extra="allow", + ) + sip_refer_to: str | None = None + """ + Full SIP Refer-To header value. + """ + refer_to_user: str | None = None + """ + User part of the Refer-To URI (phone number or SIP user). + """ + sip_referred_by: str | None = None + """ + SIP Referred-By header value, if present. + """ + referred_by_user: str | None = None + """ + User part of the Referred-By URI, if present. + """ + sip_user_agent: str | None = None + """ + User-Agent header from the REFER request. + """ + referring_call_sid: str | None = None + """ + Call SID of the leg that sent the REFER. + """ + referred_call_sid: str | None = None + """ + Call SID of the leg being referred. + """ + + +class DialReferhookPayload(ActionhookBasePayload): + """ + Payload sent to the dial referHook when a SIP REFER is received during an active dial. The response can contain new verbs to execute. + """ + + model_config = ConfigDict( + extra="allow", + ) + refer_details: ReferDetails + """ + Details of the SIP REFER request. Any custom X-* headers on the REFER are also included as snake_cased properties (e.g. X-Override-Number becomes x_override_number). + """ diff --git a/src/jambonz_sdk/_models/_generated/callbacks/enqueue.py b/src/jambonz_sdk/_models/_generated/callbacks/enqueue.py new file mode 100644 index 0000000..4341e62 --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/callbacks/enqueue.py @@ -0,0 +1,44 @@ +# generated by datamodel-codegen: +# filename: callbacks/enqueue +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from enum import Enum + +from pydantic import ConfigDict + +from .base import ActionhookBasePayload + + +class QueueResult(Enum): + """ + Outcome of the enqueue. 'bridged' — call was dequeued and connected; 'hangup' — caller hung up while waiting; 'leave' — caller executed a leave verb from the waitHook; 'error' — an error occurred during bridging. + """ + + bridged = "bridged" + hangup = "hangup" + leave = "leave" + error = "error" + + +class EnqueueActionhookPayload(ActionhookBasePayload): + """ + Payload sent to the enqueue actionHook when the enqueue verb completes — i.e. the call was bridged, abandoned, left the queue, or an error occurred. + """ + + model_config = ConfigDict( + extra="allow", + ) + queue_sid: str + """ + Queue identifier in the format 'queue:{account_sid}:{queue_name}'. + """ + queue_time: int + """ + Time in seconds the caller spent waiting in the queue. + """ + queue_result: QueueResult + """ + Outcome of the enqueue. 'bridged' — call was dequeued and connected; 'hangup' — caller hung up while waiting; 'leave' — caller executed a leave verb from the waitHook; 'error' — an error occurred during bridging. + """ diff --git a/src/jambonz_sdk/_models/_generated/callbacks/enqueue_wait.py b/src/jambonz_sdk/_models/_generated/callbacks/enqueue_wait.py new file mode 100644 index 0000000..1ccc6d7 --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/callbacks/enqueue_wait.py @@ -0,0 +1,42 @@ +# generated by datamodel-codegen: +# filename: callbacks/enqueue-wait +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from pydantic import ConfigDict +from jambonz_sdk._models.base import JambonzModel + + +class EnqueueWaithookPayload(JambonzModel): + """ + Payload sent to the enqueue waitHook while a caller is waiting in the queue. The response should contain an array of say, play, pause, and/or leave verbs. Note: this payload is sparse — it contains queue-specific fields plus call_sid and call_id, but NOT the full base payload fields. + """ + + model_config = ConfigDict( + extra="allow", + ) + queue_sid: str + """ + Queue identifier in the format 'queue:{account_sid}:{queue_name}'. + """ + queue_time: int + """ + Time in seconds the caller has been waiting in the queue. + """ + queue_size: int | None = None + """ + Total number of callers currently in the queue. + """ + queue_position: int | None = None + """ + Caller's current position in the queue (0-based). + """ + call_sid: str | None = None + """ + Unique identifier for this call. + """ + call_id: str | None = None + """ + SIP Call-ID. + """ diff --git a/src/jambonz_sdk/_models/_generated/callbacks/gather.py b/src/jambonz_sdk/_models/_generated/callbacks/gather.py new file mode 100644 index 0000000..880f647 --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/callbacks/gather.py @@ -0,0 +1,104 @@ +# generated by datamodel-codegen: +# filename: callbacks/gather +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from enum import Enum +from typing import Any + +from pydantic import ConfigDict + +from .base import ActionhookBasePayload +from jambonz_sdk._models.base import JambonzModel + + +class Reason(Enum): + """ + The reason the gather completed. + """ + + speech_detected = "speechDetected" + dtmf_detected = "dtmfDetected" + timeout = "timeout" + error = "error" + stt_low_confidence = "stt-low-confidence" + + +class Alternative(JambonzModel): + transcript: str | None = None + """ + The recognized transcript. + """ + confidence: float | None = None + """ + Confidence score between 0 and 1. + """ + + +class Vendor(JambonzModel): + """ + Vendor-specific STT data. Structure varies by provider. + """ + + name: str | None = None + """ + STT vendor name (e.g. 'deepgram', 'google', 'aws'). + """ + evt: dict[str, Any] | list[dict[str, Any]] | None = None + """ + Raw vendor-specific event payload. A single object for simple utterances, or an array of objects when jambonz assembles the final transcript from multiple STT segments. Contains provider-specific fields like word-level timestamps, model info, etc. + """ + + +class Speech(JambonzModel): + """ + Speech recognition results, present when reason is speechDetected. + """ + + language_code: str | None = None + """ + Language code used for recognition (e.g. 'en-US'). + """ + channel_tag: int | None = None + """ + Audio channel number. + """ + is_final: bool | None = None + """ + Whether this is a final (not interim) recognition result. + """ + alternatives: list[Alternative] | None = None + """ + Array of recognition alternatives, ordered by confidence. + """ + vendor: Vendor | None = None + """ + Vendor-specific STT data. Structure varies by provider. + """ + + +class GatherActionhookPayload(ActionhookBasePayload): + """ + Payload sent to the actionHook when a gather verb completes, either due to speech detected, DTMF detected, or timeout. + """ + + model_config = ConfigDict( + extra="allow", + ) + reason: Reason | None = None + """ + The reason the gather completed. + """ + speech: Speech | None = None + """ + Speech recognition results, present when reason is speechDetected. + """ + digits: str | None = None + """ + DTMF digits collected, present when reason is dtmfDetected. + """ + details: dict[str, Any] | None = None + """ + Error information if reason is error. + """ diff --git a/src/jambonz_sdk/_models/_generated/callbacks/gather_partial.py b/src/jambonz_sdk/_models/_generated/callbacks/gather_partial.py new file mode 100644 index 0000000..c067aa8 --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/callbacks/gather_partial.py @@ -0,0 +1,151 @@ +# generated by datamodel-codegen: +# filename: callbacks/gather-partial +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from enum import Enum +from typing import Any + +from pydantic import ConfigDict, Field +from jambonz_sdk._models.base import JambonzModel + + +class Direction(Enum): + """ + Call direction. + """ + + inbound = "inbound" + outbound = "outbound" + + +class Alternative(JambonzModel): + transcript: str | None = None + """ + The partial transcript recognized so far. + """ + confidence: float | None = None + """ + Confidence score between 0 and 1. + """ + + +class Vendor(JambonzModel): + """ + Vendor-specific STT data. Structure varies by provider. + """ + + name: str | None = None + """ + STT vendor name (e.g. 'deepgram', 'google', 'aws'). + """ + evt: dict[str, Any] | None = None + """ + Raw vendor-specific event payload. + """ + + +class Speech(JambonzModel): + """ + Interim speech recognition results. + """ + + language_code: str | None = None + """ + Language code used for recognition (e.g. 'en-US'). + """ + channel_tag: int | None = None + """ + Audio channel number. + """ + is_final: bool | None = None + """ + Always false for partial results. + """ + alternatives: list[Alternative] | None = None + """ + Array of recognition alternatives, ordered by confidence. + """ + vendor: Vendor | None = None + """ + Vendor-specific STT data. Structure varies by provider. + """ + + +class GatherPartialTranscriptPayload(JambonzModel): + """ + Payload sent to the partialResultHook during a gather verb with speech input. Delivers interim (partial) speech recognition results as they arrive, before the gather completes. This hook is informational — the response is ignored and does not replace the verb stack. Note: the base fields use slightly different names than the actionHook payload (e.g. local_sip_address instead of fs_sip_address). + """ + + model_config = ConfigDict( + extra="allow", + ) + call_sid: str | None = None + """ + Unique identifier for this call. + """ + account_sid: str | None = None + """ + Account identifier. + """ + application_sid: str | None = None + """ + Application identifier. + """ + direction: Direction | None = None + """ + Call direction. + """ + from_: str | None = Field(default=None, alias="from") + """ + Caller phone number or SIP URI. + """ + to: str | None = None + """ + Called phone number or SIP URI. + """ + call_id: str | None = None + """ + SIP Call-ID. + """ + sbc_callid: str | None = None + """ + SBC-level Call-ID. + """ + call_status: str | None = None + """ + Current call state. + """ + sip_status: int | None = None + """ + SIP response code. + """ + sip_reason: str | None = None + """ + SIP reason phrase. + """ + trace_id: str | None = None + """ + Distributed tracing identifier. + """ + b3: str | None = None + """ + B3 trace propagation header. + """ + caller_name: str | None = None + """ + Caller display name from SIP, if available. + """ + originating_sip_ip: str | None = None + """ + IP address of the originating SIP trunk. + """ + originating_sip_trunk_name: str | None = None + """ + Name of the originating SIP trunk. + """ + speech: Speech | None = None + """ + Interim speech recognition results. + """ diff --git a/src/jambonz_sdk/_models/_generated/callbacks/listen.py b/src/jambonz_sdk/_models/_generated/callbacks/listen.py new file mode 100644 index 0000000..c6dae0c --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/callbacks/listen.py @@ -0,0 +1,27 @@ +# generated by datamodel-codegen: +# filename: callbacks/listen +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from pydantic import ConfigDict + +from .base import ActionhookBasePayload + + +class ListenActionhookPayload(ActionhookBasePayload): + """ + Payload sent to the listen verb's actionHook when the listen verb completes. + """ + + model_config = ConfigDict( + extra="allow", + ) + dial_call_duration: int | None = None + """ + Duration of the listen session in seconds, present if recording was active. + """ + digits: str | None = None + """ + DTMF digit that ended the listen, present if finishOnKey was configured and pressed. + """ diff --git a/src/jambonz_sdk/_models/_generated/callbacks/llm.py b/src/jambonz_sdk/_models/_generated/callbacks/llm.py new file mode 100644 index 0000000..afabd94 --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/callbacks/llm.py @@ -0,0 +1,44 @@ +# generated by datamodel-codegen: +# filename: callbacks/llm +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from enum import Enum +from typing import Any + +from pydantic import ConfigDict + +from .base import ActionhookBasePayload + + +class CompletionReason(Enum): + """ + Reason the LLM session ended. + """ + + normal_conversation_end = "normal conversation end" + connection_failure = "connection failure" + disconnect_from_remote_end = "disconnect from remote end" + server_failure = "server failure" + server_error = "server error" + client_error_calling_function = "client error calling function" + client_error_calling_mcp_function = "client error calling mcp function" + + +class LlmActionhookPayload(ActionhookBasePayload): + """ + Payload sent to the llm verb's actionHook when the LLM session ends. Applies to all LLM providers (OpenAI, Google, ElevenLabs, Ultravox, and generic voice agents). + """ + + model_config = ConfigDict( + extra="allow", + ) + completion_reason: CompletionReason + """ + Reason the LLM session ended. + """ + error: Any | None = None + """ + Error details, present when completion_reason indicates a failure. + """ diff --git a/src/jambonz_sdk/_models/_generated/callbacks/message.py b/src/jambonz_sdk/_models/_generated/callbacks/message.py new file mode 100644 index 0000000..8e5fddd --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/callbacks/message.py @@ -0,0 +1,53 @@ +# generated by datamodel-codegen: +# filename: callbacks/message +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from enum import Enum + +from pydantic import ConfigDict + +from .base import ActionhookBasePayload + + +class MessageStatus(Enum): + """ + Delivery status of the message. + """ + + success = "success" + failure = "failure" + no_carriers = "no carriers" + smpp_configuration_error = "smpp configuration error" + system_error = "system error" + + +class MessageActionhookPayload(ActionhookBasePayload): + """ + Payload sent to the message verb's actionHook with the delivery status of an outbound SMS. + """ + + model_config = ConfigDict( + extra="allow", + ) + message_sid: str + """ + Unique identifier for the message. + """ + message_status: MessageStatus + """ + Delivery status of the message. + """ + carrier: str | None = None + """ + Name of the carrier used to send the message. Present on success or carrier-level failure. + """ + carrier_message_id: str | None = None + """ + Message ID returned by the carrier. Present on success or carrier-level failure. + """ + message_failure_reason: str | None = None + """ + Reason for failure. Present when message_status is 'failure' or 'system error'. + """ diff --git a/src/jambonz_sdk/_models/_generated/callbacks/play.py b/src/jambonz_sdk/_models/_generated/callbacks/play.py new file mode 100644 index 0000000..04b5f5e --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/callbacks/play.py @@ -0,0 +1,58 @@ +# generated by datamodel-codegen: +# filename: callbacks/play +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from enum import Enum + +from pydantic import ConfigDict + +from .base import ActionhookBasePayload + + +class Reason(Enum): + """ + Outcome of playback. 'playCompleted' on success, 'playFailed' on file-not-found or other error. + """ + + play_completed = "playCompleted" + play_failed = "playFailed" + + +class Status(Enum): + """ + Set to 'fail' when playback failed (e.g. file not found). + """ + + fail = "fail" + + +class PlayActionhookPayload(ActionhookBasePayload): + """ + Payload sent to the play verb's actionHook when playback completes or fails. + """ + + model_config = ConfigDict( + extra="allow", + ) + reason: Reason + """ + Outcome of playback. 'playCompleted' on success, 'playFailed' on file-not-found or other error. + """ + playback_seconds: int | None = None + """ + Total playback duration in seconds. Present when reason is 'playCompleted'. + """ + playback_milliseconds: int | None = None + """ + Total playback duration in milliseconds. Present when reason is 'playCompleted'. + """ + playback_last_offset_pos: str | None = None + """ + Last offset position in the audio stream. Present when reason is 'playCompleted'. + """ + status: Status | None = None + """ + Set to 'fail' when playback failed (e.g. file not found). + """ diff --git a/src/jambonz_sdk/_models/_generated/callbacks/session_new.py b/src/jambonz_sdk/_models/_generated/callbacks/session_new.py new file mode 100644 index 0000000..0f750e6 --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/callbacks/session_new.py @@ -0,0 +1,189 @@ +# generated by datamodel-codegen: +# filename: callbacks/session-new +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from enum import Enum +from typing import Any + +from pydantic import ConfigDict, Field + +from .base import ActionhookBasePayload +from jambonz_sdk._models.base import JambonzModel + + +class Type(Enum): + """ + Always 'request' for an incoming INVITE. + """ + + request = "request" + + +class Source(Enum): + """ + Origin of the SIP message. + """ + + network = "network" + application = "application" + + +class Sip(JambonzModel): + """ + The raw SIP INVITE message (drachtio SipRequest object, serialized). Only present for WebSocket and HTTP POST transports (not for HTTP GET). See https://drachtio.org/api#sip-request for the full drachtio SipRequest API. + """ + + headers: dict[str, str] | None = Field( + default=None, + examples=[ + { + "X-Authenticated-User": "retell@sip.example.com", + "X-Override-Number": "+15551234567", + "From": ";tag=abc123", + "To": "", + "Call-ID": "abc123@10.0.0.1", + } + ], + ) + """ + SIP headers from the INVITE as key-value pairs. Custom headers (X-* headers) from the originating carrier or SIP client are included here. Standard SIP headers like From, To, Contact, Call-ID, Via, etc. are also present. + """ + body: str | None = None + """ + SIP message body (typically SDP for INVITE). + """ + method: str | None = None + """ + SIP method (always 'INVITE' for session:new). + """ + uri: str | None = None + """ + Request-URI from the SIP INVITE. + """ + called_number: str | None = Field(default=None, alias="calledNumber") + """ + Phone number extracted from the Request-URI. + """ + calling_number: str | None = Field(default=None, alias="callingNumber") + """ + Calling phone number extracted from P-Asserted-Identity or From header. + """ + type: Type | None = None + """ + Always 'request' for an incoming INVITE. + """ + source: Source | None = None + """ + Origin of the SIP message. + """ + source_address: str | None = None + """ + IP address of the sender. + """ + source_port: str | int | None = None + """ + Port of the sender. + """ + protocol: str | None = None + """ + Transport protocol (e.g. 'udp', 'tcp', 'tls', 'wss'). + """ + payload: list[Any] | None = None + """ + Message body organized into parts; useful for multipart content. + """ + + +class Synthesizer(JambonzModel): + """ + Default TTS settings. + """ + + vendor: str | None = None + language: str | None = None + voice: str | None = None + + +class Recognizer(JambonzModel): + """ + Default STT settings. + """ + + vendor: str | None = None + language: str | None = None + + +class Defaults(JambonzModel): + """ + Default speech settings for the account (synthesizer and recognizer defaults). + """ + + synthesizer: Synthesizer | None = None + """ + Default TTS settings. + """ + recognizer: Recognizer | None = None + """ + Default STT settings. + """ + + +class SessionNewPayload(ActionhookBasePayload): + """ + Payload delivered when a new call arrives. For webhook apps this is the initial POST body; for WebSocket apps it is the `data` property of the first `session:new` message. In the @jambonz/sdk WebSocket transport, this data is available as `session.data`. + """ + + model_config = ConfigDict( + extra="allow", + ) + caller_name: str | None = None + """ + Caller display name from the SIP From header. + """ + caller_id: str | None = None + """ + Caller ID value (phone number or SIP user). + """ + service_provider_sid: str | None = None + """ + Service provider identifier, if applicable. + """ + parent_call_sid: str | None = None + """ + Call SID of the parent call, present when this session was created via the REST API dial (adulting) or when an outbound call leg is promoted to its own session. + """ + fs_sip_address: str | None = None + """ + Internal SIP address of the FreeSWITCH media server handling this call. + """ + fs_public_ip: str | None = None + """ + Public IP address of the FreeSWITCH media server, if available. + """ + sip: Sip | None = None + """ + The raw SIP INVITE message (drachtio SipRequest object, serialized). Only present for WebSocket and HTTP POST transports (not for HTTP GET). See https://drachtio.org/api#sip-request for the full drachtio SipRequest API. + """ + env_vars: dict[str, Any] | None = Field( + default=None, + examples=[ + { + "RETELL_TRUNK_NAME": "retell-hosted", + "PSTN_TRUNK_NAME": "my-carrier", + "DEFAULT_COUNTRY": "US", + } + ], + ) + """ + Application environment variables configured in the jambonz portal. These are the key-value pairs defined in the application's environment variable schema. In the @jambonz/sdk, access via `session.data.env_vars`. + """ + defaults: Defaults | None = None + """ + Default speech settings for the account (synthesizer and recognizer defaults). + """ + customer_data: dict[str, Any] | None = Field(default=None, alias="customerData") + """ + Custom data attached to the call via the REST API when creating an outbound call. Preserved as-is (not snake-cased). + """ diff --git a/src/jambonz_sdk/_models/_generated/callbacks/session_reconnect.py b/src/jambonz_sdk/_models/_generated/callbacks/session_reconnect.py new file mode 100644 index 0000000..c395e0b --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/callbacks/session_reconnect.py @@ -0,0 +1,13 @@ +# generated by datamodel-codegen: +# filename: callbacks/session-reconnect +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from .session_new import SessionNewPayload + + +class SessionReconnectPayload(SessionNewPayload): + """ + Payload delivered when a WebSocket client reconnects after a disconnection. The payload is identical to the original session:new payload — it is cached from the initial session setup and replayed on reconnect. This allows the application to restore state without needing to re-fetch call details. + """ diff --git a/src/jambonz_sdk/_models/_generated/callbacks/session_redirect.py b/src/jambonz_sdk/_models/_generated/callbacks/session_redirect.py new file mode 100644 index 0000000..f682662 --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/callbacks/session_redirect.py @@ -0,0 +1,45 @@ +# generated by datamodel-codegen: +# filename: callbacks/session-redirect +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from typing import Any + +from pydantic import ConfigDict, Field + +from .base import ActionhookBasePayload + + +class SessionRedirectPayload(ActionhookBasePayload): + """ + Payload delivered when a redirect verb transfers the call to a new application or WebSocket endpoint. Contains only the current call state information — unlike session:new, it does not include defaults, env_vars, sip, or service_provider_sid. + """ + + model_config = ConfigDict( + extra="allow", + ) + caller_name: str | None = None + """ + Caller display name from the SIP From header. + """ + caller_id: str | None = None + """ + Caller ID value (phone number or SIP user). + """ + parent_call_sid: str | None = None + """ + Call SID of the parent call, if applicable. + """ + fs_sip_address: str | None = None + """ + Internal SIP address of the FreeSWITCH media server handling this call. + """ + fs_public_ip: str | None = None + """ + Public IP address of the FreeSWITCH media server, if available. + """ + customer_data: dict[str, Any] | None = Field(default=None, alias="customerData") + """ + Custom data attached to the call via the REST API. Preserved as-is (not snake-cased). + """ diff --git a/src/jambonz_sdk/_models/_generated/callbacks/sip_refer.py b/src/jambonz_sdk/_models/_generated/callbacks/sip_refer.py new file mode 100644 index 0000000..8e17ce0 --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/callbacks/sip_refer.py @@ -0,0 +1,27 @@ +# generated by datamodel-codegen: +# filename: callbacks/sip-refer +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from pydantic import ConfigDict + +from .base import ActionhookBasePayload + + +class SipreferActionhookPayload(ActionhookBasePayload): + """ + Payload sent to the sip-refer verb's actionHook when the REFER completes. Sent once the REFER response is received, or after a final NOTIFY arrives. + """ + + model_config = ConfigDict( + extra="allow", + ) + refer_status: int + """ + SIP status code from the REFER response (e.g. 202 for accepted, 4xx/5xx for failure). + """ + final_referred_call_status: int | None = None + """ + Final SIP status of the referred call, extracted from a NOTIFY sipfrag. Present only when the REFER was accepted (202) and a final NOTIFY was received. + """ diff --git a/src/jambonz_sdk/_models/_generated/callbacks/sip_refer_event.py b/src/jambonz_sdk/_models/_generated/callbacks/sip_refer_event.py new file mode 100644 index 0000000..fe84b7f --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/callbacks/sip_refer_event.py @@ -0,0 +1,28 @@ +# generated by datamodel-codegen: +# filename: callbacks/sip-refer-event +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from typing import Literal + +from pydantic import ConfigDict +from jambonz_sdk._models.base import JambonzModel + + +class SipReferEventhookPayload(JambonzModel): + """ + Payload sent to the sip-refer eventHook when a SIP NOTIFY is received with transfer status updates. + """ + + model_config = ConfigDict( + extra="allow", + ) + event: Literal["transfer-status"] + """ + Event type — always 'transfer-status' for REFER notifications. + """ + call_status: int + """ + SIP status code from the NOTIFY sipfrag body (e.g. 100 for trying, 180 for ringing, 200 for success). + """ diff --git a/src/jambonz_sdk/_models/_generated/callbacks/sip_request.py b/src/jambonz_sdk/_models/_generated/callbacks/sip_request.py new file mode 100644 index 0000000..97d280c --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/callbacks/sip_request.py @@ -0,0 +1,42 @@ +# generated by datamodel-codegen: +# filename: callbacks/sip-request +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from enum import Enum + +from pydantic import ConfigDict + +from .base import ActionhookBasePayload + + +class Result(Enum): + """ + Whether the SIP request succeeded or failed. + """ + + success = "success" + failed = "failed" + + +class SiprequestActionhookPayload(ActionhookBasePayload): + """ + Payload sent to the sip-request verb's actionHook after a SIP request (e.g. INFO, NOTIFY) is sent and a response is received. + """ + + model_config = ConfigDict( + extra="allow", + ) + result: Result + """ + Whether the SIP request succeeded or failed. + """ + sip_status: int | None = None + """ + SIP response status code. Present when result is 'success'. + """ + err: str | None = None + """ + Error message. Present when result is 'failed'. + """ diff --git a/src/jambonz_sdk/_models/_generated/callbacks/transcribe.py b/src/jambonz_sdk/_models/_generated/callbacks/transcribe.py new file mode 100644 index 0000000..378c6c8 --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/callbacks/transcribe.py @@ -0,0 +1,98 @@ +# generated by datamodel-codegen: +# filename: callbacks/transcribe +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from typing import Any + +from pydantic import ConfigDict + +from .base import ActionhookBasePayload +from jambonz_sdk._models.base import JambonzModel + + +class Alternative(JambonzModel): + transcript: str | None = None + """ + The recognized transcript. + """ + confidence: float | None = None + """ + Confidence score between 0 and 1. + """ + + +class Speech(JambonzModel): + """ + Speech recognition results, present when the STT engine returns alternatives. + """ + + language_code: str | None = None + """ + Language code used for recognition (e.g. 'en-US'). + """ + channel_tag: int | None = None + """ + Audio channel number. + """ + is_final: bool | None = None + """ + Whether this is a final (not interim) recognition result. + """ + alternatives: list[Alternative] | None = None + """ + Array of recognition alternatives, ordered by confidence. + """ + + +class SpeechEvent(JambonzModel): + """ + Speech event data, present when the STT engine returns a typed event (e.g. end of utterance). + """ + + model_config = ConfigDict( + extra="allow", + ) + type: str | None = None + """ + Event type from the STT vendor. + """ + + +class TranscribeTranscriptionhookPayload(ActionhookBasePayload): + """ + Payload sent to the transcriptionHook when a transcription result is received from the STT engine. + """ + + model_config = ConfigDict( + extra="allow", + ) + speech: Speech | None = None + """ + Speech recognition results, present when the STT engine returns alternatives. + """ + speech_event: SpeechEvent | None = None + """ + Speech event data, present when the STT engine returns a typed event (e.g. end of utterance). + """ + stt_latency_ms: str | None = None + """ + STT latency in milliseconds. + """ + stt_talkspurts: str | None = None + """ + JSON-encoded array of talkspurt timing data. + """ + stt_start_time: str | None = None + """ + STT recognition start time. + """ + stt_stop_time: str | None = None + """ + STT recognition stop time. + """ + stt_usage: Any | None = None + """ + STT usage data from the vendor. + """ diff --git a/src/jambonz_sdk/_models/_generated/callbacks/transcribe_translation.py b/src/jambonz_sdk/_models/_generated/callbacks/transcribe_translation.py new file mode 100644 index 0000000..f3444db --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/callbacks/transcribe_translation.py @@ -0,0 +1,43 @@ +# generated by datamodel-codegen: +# filename: callbacks/transcribe-translation +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from pydantic import ConfigDict + +from .base import ActionhookBasePayload +from jambonz_sdk._models.base import JambonzModel + + +class Translation(JambonzModel): + """ + Translation result data. + """ + + channel: int + """ + Audio channel number (1 or 2). + """ + language: str + """ + Target language code for the translation. + """ + translation: str + """ + The translated text. + """ + + +class TranscribeTranslationhookPayload(ActionhookBasePayload): + """ + Payload sent to the translationHook when a translation result is received. + """ + + model_config = ConfigDict( + extra="allow", + ) + translation: Translation + """ + Translation result data. + """ diff --git a/src/jambonz_sdk/_models/_generated/callbacks/tts_streaming_event.py b/src/jambonz_sdk/_models/_generated/callbacks/tts_streaming_event.py new file mode 100644 index 0000000..c10c9a1 --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/callbacks/tts_streaming_event.py @@ -0,0 +1,125 @@ +# generated by datamodel-codegen: +# filename: callbacks/tts-streaming-event +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from typing import Literal + +from pydantic import ConfigDict, Field, RootModel +from jambonz_sdk._models.base import JambonzModel + + +class TtsStreamingEvent1(JambonzModel): + """ + Events sent to the '/streaming-event' WebSocket endpoint during TTS streaming. These are sent as 'tts:streaming-event' messages. The tts_spoken event is only sent when trackTtsPlayout is enabled via the config verb. + """ + + model_config = ConfigDict( + extra="forbid", + ) + event_type: Literal["stream_open"] + """ + The TTS streaming connection has been established. + """ + + +class TtsStreamingEvent2(JambonzModel): + """ + Events sent to the '/streaming-event' WebSocket endpoint during TTS streaming. These are sent as 'tts:streaming-event' messages. The tts_spoken event is only sent when trackTtsPlayout is enabled via the config verb. + """ + + model_config = ConfigDict( + extra="forbid", + ) + event_type: Literal["stream_closed"] + """ + The TTS streaming connection has been closed. + """ + + +class TtsStreamingEvent3(JambonzModel): + """ + Events sent to the '/streaming-event' WebSocket endpoint during TTS streaming. These are sent as 'tts:streaming-event' messages. The tts_spoken event is only sent when trackTtsPlayout is enabled via the config verb. + """ + + model_config = ConfigDict( + extra="forbid", + ) + event_type: Literal["stream_paused"] + """ + TTS streaming has been paused. + """ + + +class TtsStreamingEvent4(JambonzModel): + """ + Events sent to the '/streaming-event' WebSocket endpoint during TTS streaming. These are sent as 'tts:streaming-event' messages. The tts_spoken event is only sent when trackTtsPlayout is enabled via the config verb. + """ + + model_config = ConfigDict( + extra="forbid", + ) + event_type: Literal["stream_resumed"] + """ + TTS streaming has been resumed. + """ + + +class TtsStreamingEvent5(JambonzModel): + """ + Events sent to the '/streaming-event' WebSocket endpoint during TTS streaming. These are sent as 'tts:streaming-event' messages. The tts_spoken event is only sent when trackTtsPlayout is enabled via the config verb. + """ + + model_config = ConfigDict( + extra="forbid", + ) + event_type: Literal["user_interruption"] + """ + The user interrupted (barged in) during TTS playout, causing the stream to be cleared. + """ + + +class TtsStreamingEvent6(JambonzModel): + """ + Events sent to the '/streaming-event' WebSocket endpoint during TTS streaming. These are sent as 'tts:streaming-event' messages. The tts_spoken event is only sent when trackTtsPlayout is enabled via the config verb. + """ + + model_config = ConfigDict( + extra="forbid", + ) + event_type: Literal["tts_spoken"] + """ + Reports the actual text that was spoken via TTS. Sent on utterance completion or when the user barges in. Only sent when trackTtsPlayout is enabled via the config verb. Requires a TTS vendor that supports alignment data (e.g. ElevenLabs). + """ + text: str + """ + The text that was actually spoken before completion or interruption. + """ + bargein: bool + """ + True if the user barged in (interrupted) before the TTS finished speaking. False if the utterance completed normally. + """ + + +class TtsStreamingEvent( + RootModel[ + TtsStreamingEvent1 + | TtsStreamingEvent2 + | TtsStreamingEvent3 + | TtsStreamingEvent4 + | TtsStreamingEvent5 + | TtsStreamingEvent6 + ] +): + root: ( + TtsStreamingEvent1 + | TtsStreamingEvent2 + | TtsStreamingEvent3 + | TtsStreamingEvent4 + | TtsStreamingEvent5 + | TtsStreamingEvent6 + ) = Field(..., title="TTS Streaming Event") + """ + Events sent to the '/streaming-event' WebSocket endpoint during TTS streaming. These are sent as 'tts:streaming-event' messages. The tts_spoken event is only sent when trackTtsPlayout is enabled via the config verb. + """ diff --git a/src/jambonz_sdk/_models/_generated/callbacks/verb_status.py b/src/jambonz_sdk/_models/_generated/callbacks/verb_status.py new file mode 100644 index 0000000..d61223c --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/callbacks/verb_status.py @@ -0,0 +1,71 @@ +# generated by datamodel-codegen: +# filename: callbacks/verb-status +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from enum import Enum + +from pydantic import ConfigDict +from jambonz_sdk._models.base import JambonzModel + + +class Event(Enum): + """ + The verb lifecycle event. + """ + + starting = "starting" + finished = "finished" + start_playback = "start-playback" + stop_playback = "stop-playback" + kill_playback = "kill-playback" + dtmf_bargein_detected = "dtmf-bargein-detected" + speech_bargein_detected = "speech-bargein-detected" + synthesized_audio = "synthesized-audio" + + +class VerbStatusEvent(JambonzModel): + """ + Real-time verb lifecycle events sent over WebSocket when notifyEvents is enabled on the session. These are informational — no response is expected. + """ + + model_config = ConfigDict( + extra="allow", + ) + event: Event + """ + The verb lifecycle event. + """ + verb: str | None = None + """ + The verb name (e.g. 'say', 'play', 'gather'). Present on synthesized-audio, start-playback, stop-playback, kill-playback, and dtmf/speech-bargein events. + """ + name: str | None = None + """ + The verb name. Present on 'starting' and 'finished' events (these use 'name' instead of 'verb'). + """ + id: str | None = None + """ + The verb instance id, if one was assigned by the application. + """ + vendor: str | None = None + """ + TTS vendor name. Present on synthesized-audio events. + """ + language: str | None = None + """ + TTS language code. Present on synthesized-audio events. + """ + characters: int | None = None + """ + Number of characters synthesized. Present on synthesized-audio events when not served from cache. + """ + elapsed_time: float | None = None + """ + TTS round-trip time in milliseconds. Present on synthesized-audio events when not served from cache. + """ + served_from_cache: bool | None = None + """ + Whether the TTS audio was served from cache. Present on synthesized-audio events. + """ diff --git a/src/jambonz_sdk/_models/_generated/components/__init__.py b/src/jambonz_sdk/_models/_generated/components/__init__.py new file mode 100644 index 0000000..1a9201e --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/components/__init__.py @@ -0,0 +1,3 @@ +# generated by datamodel-codegen: +# filename: schema +# timestamp: 2026-04-22T10:54:53+00:00 diff --git a/src/jambonz_sdk/_models/_generated/components/actionHook.py b/src/jambonz_sdk/_models/_generated/components/actionHook.py new file mode 100644 index 0000000..ad68c06 --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/components/actionHook.py @@ -0,0 +1,47 @@ +# generated by datamodel-codegen: +# filename: components/actionHook +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from enum import Enum + +from pydantic import Field, RootModel + +from . import auth +from jambonz_sdk._models.base import JambonzModel + + +class Method(Enum): + """ + The HTTP method to use. Only applies to webhook applications. + """ + + get = "GET" + post = "POST" + + +class ActionHook1(JambonzModel): + """ + A hook specification with URL and additional options. + """ + + url: str + """ + The URL to invoke. + """ + method: Method | None = "POST" + """ + The HTTP method to use. Only applies to webhook applications. + """ + basic_auth: auth.Auth | None = Field(default=None, alias="basicAuth") + """ + Basic authentication credentials to include in the request. + """ + + +class ActionHook(RootModel[str | ActionHook1]): + root: str | ActionHook1 = Field(..., title="ActionHook") + """ + A webhook or websocket callback that jambonz invokes when a verb completes. Reports verb results (e.g. speech recognition from 'gather', dial outcome) and receives the next verbs to execute. In webhook mode: jambonz POSTs to this URL and the HTTP response body is the next verb array. In WebSocket mode: this value becomes an event name emitted on the session — bind session.on('/hookName', (evt) => {...}) and respond with session.reply() (NOT session.send()). The callback payload always includes 'reason' plus verb-specific fields (e.g. 'speech', 'digits' for gather). Can be a simple URL/path string or an object with additional options. + """ diff --git a/src/jambonz_sdk/_models/_generated/components/actionHookDelayAction.py b/src/jambonz_sdk/_models/_generated/components/actionHookDelayAction.py new file mode 100644 index 0000000..92c3eac --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/components/actionHookDelayAction.py @@ -0,0 +1,45 @@ +# generated by datamodel-codegen: +# filename: components/actionHookDelayAction +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from typing import Any + +from pydantic import Field +from jambonz_sdk._models.base import JambonzModel + + +class ActionHookDelayAction(JambonzModel): + """ + Configuration for what to do when an actionHook (webhook) takes a long time to respond. Allows playing interim content (e.g. 'please wait' messages, hold music) while waiting for the webhook response, with configurable retry and give-up behavior. + """ + + enabled: bool | None = None + """ + Whether to enable delay handling for actionHooks. + """ + no_response_timeout: float | None = Field( + default=None, alias="noResponseTimeout", examples=[3, 5] + ) + """ + Time in seconds to wait before executing the delay actions. If the webhook responds before this timeout, the delay actions are skipped. + """ + no_response_give_up_timeout: float | None = Field( + default=None, alias="noResponseGiveUpTimeout", examples=[30, 60] + ) + """ + Total time in seconds to wait for a webhook response before giving up and executing the giveUpActions. + """ + retries: float | None = None + """ + Number of times to retry the delay actions while still waiting for the webhook response. + """ + actions: list[dict[str, Any]] | None = None + """ + An array of jambonz verbs to execute while waiting for the webhook response. Typically 'say' or 'play' verbs with messages like 'please hold'. + """ + give_up_actions: list[dict[str, Any]] | None = Field(default=None, alias="giveUpActions") + """ + An array of jambonz verbs to execute if the webhook never responds within the giveUpTimeout. Typically an error message and/or hangup. + """ diff --git a/src/jambonz_sdk/_models/_generated/components/amd.py b/src/jambonz_sdk/_models/_generated/components/amd.py new file mode 100644 index 0000000..d295345 --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/components/amd.py @@ -0,0 +1,66 @@ +# generated by datamodel-codegen: +# filename: components/amd +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from pydantic import ConfigDict, Field + +from . import actionHook +from . import recognizer as recognizer_1 +from jambonz_sdk._models.base import JambonzModel + + +class Timers(JambonzModel): + """ + Timer settings controlling AMD detection windows. + """ + + model_config = ConfigDict( + extra="forbid", + ) + no_speech_timeout_ms: float | None = Field(default=5000, alias="noSpeechTimeoutMs") + """ + Milliseconds to wait for any speech before returning amd_no_speech_detected. + """ + decision_timeout_ms: float | None = Field(default=15000, alias="decisionTimeoutMs") + """ + Milliseconds before returning amd_decision_timeout if no determination is made. + """ + tone_timeout_ms: float | None = Field(default=20000, alias="toneTimeoutMs") + """ + Milliseconds to wait for beep/tone detection. + """ + greeting_completion_timeout_ms: float | None = Field( + default=2000, alias="greetingCompletionTimeoutMs" + ) + """ + Milliseconds of silence after speech before determining the machine greeting is complete. Automatically reduced to 1000ms if a beep is detected. + """ + + +class AnsweringMachineDetection(JambonzModel): + """ + Configuration for answering machine detection (AMD). Detects whether an outbound or inbound call was answered by a human or a machine. Used as a nested property on the 'config' or 'dial' verb. IMPORTANT: AMD runs asynchronously in the background. When using AMD with the 'config' verb, you MUST follow it with a 'pause' verb (e.g. pause({ length: 25 })) to keep the call alive while AMD detection runs. Without a pause, the call will end immediately after config completes. + """ + + action_hook: actionHook.ActionHook = Field(..., alias="actionHook") + """ + Webhook to receive AMD events (amd_human_detected, amd_machine_detected, amd_no_speech_detected, amd_decision_timeout, amd_machine_stopped_speaking, amd_tone_detected, amd_error, amd_stopped). + """ + threshold_word_count: float | None = Field(default=9, alias="thresholdWordCount") + """ + Number of spoken words in a greeting that triggers an amd_machine_detected result. + """ + digit_count: float | None = Field(default=0, alias="digitCount") + """ + Number of digits in a greeting to trigger detection. 0 disables digit-based detection. + """ + timers: Timers | None = None + """ + Timer settings controlling AMD detection windows. + """ + recognizer: recognizer_1.Recognizer | None = None + """ + Override the STT recognizer used for AMD speech detection. When omitted, AMD uses the session default recognizer with enhancedModel enabled. + """ diff --git a/src/jambonz_sdk/_models/_generated/components/auth.py b/src/jambonz_sdk/_models/_generated/components/auth.py new file mode 100644 index 0000000..f3df6c7 --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/components/auth.py @@ -0,0 +1,21 @@ +# generated by datamodel-codegen: +# filename: components/auth +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations +from jambonz_sdk._models.base import JambonzModel + + +class Auth(JambonzModel): + """ + Basic authentication credentials, used for authenticating with external services such as websocket endpoints or SIP registrars. + """ + + username: str + """ + The username for authentication. + """ + password: str + """ + The password for authentication. + """ diff --git a/src/jambonz_sdk/_models/_generated/components/bidirectionalAudio.py b/src/jambonz_sdk/_models/_generated/components/bidirectionalAudio.py new file mode 100644 index 0000000..cb1be5f --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/components/bidirectionalAudio.py @@ -0,0 +1,29 @@ +# generated by datamodel-codegen: +# filename: components/bidirectionalAudio +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from pydantic import Field +from jambonz_sdk._models.base import JambonzModel + + +class BidirectionalAudio(JambonzModel): + """ + Configuration for bidirectional audio streaming over a websocket connection. When enabled, the remote websocket endpoint can send audio back to jambonz to be played to the caller. + """ + + enabled: bool | None = None + """ + Whether to enable bidirectional audio on the websocket connection. + """ + streaming: bool | None = None + """ + If true, audio is streamed continuously rather than sent as complete messages. + """ + sample_rate: float | None = Field( + default=None, alias="sampleRate", examples=[8000, 16000, 24000] + ) + """ + The sample rate in Hz for bidirectional audio. + """ diff --git a/src/jambonz_sdk/_models/_generated/components/fillerNoise.py b/src/jambonz_sdk/_models/_generated/components/fillerNoise.py new file mode 100644 index 0000000..004d7aa --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/components/fillerNoise.py @@ -0,0 +1,27 @@ +# generated by datamodel-codegen: +# filename: components/fillerNoise +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from pydantic import Field +from jambonz_sdk._models.base import JambonzModel + + +class FillerNoise(JambonzModel): + """ + Configuration for playing background filler noise (e.g. keyboard typing, hold music) while the application is processing and the caller would otherwise hear silence. Commonly used during LLM response generation to indicate the system is working. + """ + + enable: bool + """ + Whether to enable filler noise. + """ + url: str | None = Field(default=None, examples=["https://example.com/sounds/typing.wav"]) + """ + URL of the audio file to play as filler noise. Should be a short, loopable audio clip. + """ + start_delay_secs: float | None = Field(default=None, alias="startDelaySecs", examples=[1, 2]) + """ + Number of seconds to wait before starting filler noise. Prevents filler noise from playing during brief processing pauses. + """ diff --git a/src/jambonz_sdk/_models/_generated/components/llm_base.py b/src/jambonz_sdk/_models/_generated/components/llm_base.py new file mode 100644 index 0000000..455822f --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/components/llm_base.py @@ -0,0 +1,120 @@ +# generated by datamodel-codegen: +# filename: components/llm-base +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from typing import Any + +from pydantic import ConfigDict, Field + +from . import actionHook +from jambonz_sdk._models.base import JambonzModel + + +class Auth(JambonzModel): + """ + Authentication credentials for the LLM vendor API. + """ + + model_config = ConfigDict( + extra="allow", + ) + api_key: str | None = Field(default=None, alias="apiKey") + """ + The API key for the LLM vendor. + """ + + +class McpServer(JambonzModel): + url: str + """ + The URL of the MCP server. + """ + auth: dict[str, Any] | None = None + """ + Authentication for the MCP server. + """ + roots: list[dict[str, Any]] | None = None + """ + MCP root definitions. + """ + + +class LlmBaseProperties(JambonzModel): + """ + Shared properties for llm, s2s, and vendor-specific s2s verb schemas. + """ + + id: str | None = None + """ + An optional unique identifier for this verb instance. + """ + vendor: str | None = Field( + default=None, + examples=[ + "openai", + "anthropic", + "google", + "groq", + "deepseek", + "deepgram", + "ultravox", + "custom", + ], + ) + """ + The LLM vendor to use. + """ + model: str | None = Field( + default=None, examples=["gpt-4o", "claude-sonnet-4-20250514", "gemini-2.0-flash"] + ) + """ + The specific model to use from the vendor. + """ + auth: Auth | None = None + """ + Authentication credentials for the LLM vendor API. + """ + connect_options: dict[str, Any] | None = Field(default=None, alias="connectOptions") + """ + Additional connection options for the LLM vendor, such as custom base URLs or API versions. + """ + llm_options: dict[str, Any] | None = Field( + default=None, + alias="llmOptions", + examples=[ + { + "messages": [ + { + "role": "system", + "content": "You are a helpful customer service agent for Acme Corp.", + } + ], + "temperature": 0.7, + } + ], + ) + """ + Configuration passed to the LLM including the system prompt, temperature, tools/functions, and other model parameters. The structure varies by vendor but typically includes 'messages' (conversation history), 'temperature', 'tools' (function definitions), and 'maxTokens'. + """ + mcp_servers: list[McpServer] | None = Field(default=None, alias="mcpServers") + """ + Model Context Protocol servers to connect to. MCP servers provide tools that the LLM can invoke during the conversation. + """ + action_hook: actionHook.ActionHook | None = Field(default=None, alias="actionHook") + """ + A webhook invoked when the LLM conversation ends. Receives conversation details and should return the next verbs to execute. + """ + event_hook: actionHook.ActionHook | None = Field(default=None, alias="eventHook") + """ + A webhook invoked for real-time events during the LLM conversation (e.g. tool calls, transcription events). + """ + tool_hook: actionHook.ActionHook | None = Field(default=None, alias="toolHook") + """ + A webhook invoked when the LLM calls a tool/function. Receives the tool name and arguments, and should return the tool result. + """ + events: list[str] | None = None + """ + List of event types to receive via the eventHook. + """ diff --git a/src/jambonz_sdk/_models/_generated/components/recognizer.py b/src/jambonz_sdk/_models/_generated/components/recognizer.py new file mode 100644 index 0000000..ac294b3 --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/components/recognizer.py @@ -0,0 +1,331 @@ +# generated by datamodel-codegen: +# filename: components/recognizer +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from enum import Enum + +from pydantic import Field, confloat + +from . import ( + recognizer_assemblyAiOptions, + recognizer_awsOptions, + recognizer_azureOptions, + recognizer_cobaltOptions, + recognizer_customOptions, + recognizer_deepgramOptions, + recognizer_elevenlabsOptions, + recognizer_gladiaOptions, + recognizer_googleOptions, + recognizer_houndifyOptions, + recognizer_ibmOptions, + recognizer_nuanceOptions, + recognizer_nvidiaOptions, + recognizer_openaiOptions, + recognizer_sonioxOptions, + recognizer_speechmaticsOptions, + recognizer_verbioOptions, +) +from . import vad as vad_1 +from jambonz_sdk._models.base import JambonzModel + + +class Hints(JambonzModel): + phrase: str + boost: float | None = None + + +class InteractionType(Enum): + """ + A hint to the recognizer about the type of interaction, which can improve accuracy. + """ + + unspecified = "unspecified" + discussion = "discussion" + presentation = "presentation" + phone_call = "phone_call" + voicemail = "voicemail" + voice_search = "voice_search" + voice_command = "voice_command" + dictation = "dictation" + + +class FilterMethod(Enum): + """ + How filtered vocabulary words should be handled in the transcript. + """ + + remove = "remove" + mask = "mask" + tag = "tag" + + +class OutputFormat(Enum): + """ + The level of detail in recognition results. + """ + + simple = "simple" + detailed = "detailed" + + +class ProfanityOption(Enum): + """ + How profanity should be handled in results. + """ + + masked = "masked" + removed = "removed" + raw = "raw" + + +class Recognizer(JambonzModel): + """ + Configuration for speech-to-text recognition. Specifies the STT vendor, language, and vendor-specific options. Can be set at the session level via the 'config' verb or overridden per-verb (e.g. on 'gather'). + """ + + vendor: str = Field( + ..., + examples=[ + "google", + "aws", + "microsoft", + "deepgram", + "nuance", + "ibm", + "nvidia", + "soniox", + "cobalt", + "assemblyai", + "speechmatics", + "openai", + "houndify", + "gladia", + "elevenlabs", + "verbio", + "custom", + ], + ) + """ + The STT vendor to use. Must match a vendor configured in the jambonz platform. + """ + label: str | None = None + """ + An optional label identifying a specific credential set for this vendor. Used when multiple credentials are configured for the same vendor. + """ + language: str | None = Field(default=None, examples=["en-US", "en-GB", "es-ES", "fr-FR"]) + """ + The language code for speech recognition, in BCP-47 format. + """ + fallback_vendor: str | None = Field(default=None, alias="fallbackVendor") + """ + A backup STT vendor to use if the primary vendor fails or is unavailable. + """ + fallback_label: str | None = Field(default=None, alias="fallbackLabel") + """ + Credential label for the fallback vendor. + """ + fallback_language: str | None = Field(default=None, alias="fallbackLanguage") + """ + Language code to use with the fallback vendor. + """ + vad: vad_1.VAD | None = None + """ + Voice activity detection settings for this recognizer. + """ + autogenerate_prompt: bool | None = Field(default=None, alias="autogeneratePrompt") + """ + If true, automatically generate a prompt for the STT vendor based on context (e.g. TTS voice, language). Supported by vendors that accept prompts for recognition guidance. + """ + hints: list[str | Hints] | None = Field( + default=None, + examples=[ + ["jambonz", "drachtio", "SIP", "WebRTC"], + [{"phrase": "jambonz", "boost": 20}, {"phrase": "drachtio", "boost": 10}], + ], + ) + """ + An array of words or phrases that the recognizer should favor. Each item can be a plain string or an object with 'phrase' and optional 'boost' properties. + """ + hints_boost: float | None = Field(default=None, alias="hintsBoost") + """ + A boost factor for hint words. Higher values increase the likelihood of recognizing hinted words. Vendor-specific range. + """ + alt_languages: list[str] | None = Field( + default=None, alias="altLanguages", examples=[["es-ES", "fr-FR"]] + ) + """ + Additional languages the recognizer should listen for simultaneously. Enables multilingual recognition. + """ + profanity_filter: bool | None = Field(default=None, alias="profanityFilter") + """ + If true, the vendor will attempt to filter profanity from transcription results. + """ + interim: bool | None = None + """ + If true, return interim (partial) transcription results as they become available, before the utterance is complete. + """ + single_utterance: bool | None = Field(default=None, alias="singleUtterance") + """ + If true, recognition stops after the first complete utterance is detected. + """ + dual_channel: bool | None = Field(default=None, alias="dualChannel") + """ + If true, send separate audio channels for each call leg (caller and callee) to the recognizer. + """ + separate_recognition_per_channel: bool | None = Field( + default=None, alias="separateRecognitionPerChannel" + ) + """ + If true, perform independent recognition on each audio channel. Requires dualChannel. + """ + punctuation: bool | None = None + """ + If true, enable automatic punctuation in transcription results. + """ + enhanced_model: bool | None = Field(default=None, alias="enhancedModel") + """ + If true, use an enhanced (premium) recognition model if available from the vendor. + """ + words: bool | None = None + """ + If true, include word-level timing information in transcription results. + """ + diarization: bool | None = None + """ + If true, enable speaker diarization to identify different speakers in the audio. + """ + diarization_min_speakers: float | None = Field(default=None, alias="diarizationMinSpeakers") + """ + Minimum number of speakers expected. Used to guide the diarization algorithm. + """ + diarization_max_speakers: float | None = Field(default=None, alias="diarizationMaxSpeakers") + """ + Maximum number of speakers expected. Used to guide the diarization algorithm. + """ + interaction_type: InteractionType | None = Field(default=None, alias="interactionType") + """ + A hint to the recognizer about the type of interaction, which can improve accuracy. + """ + naics_code: float | None = Field(default=None, alias="naicsCode") + """ + North American Industry Classification System code. Some vendors use this to improve domain-specific accuracy. + """ + identify_channels: bool | None = Field(default=None, alias="identifyChannels") + """ + If true, identify and label which channel each transcription segment came from. + """ + vocabulary_name: str | None = Field(default=None, alias="vocabularyName") + """ + Name of a custom vocabulary resource configured at the vendor for improved recognition of specialized terms. + """ + vocabulary_filter_name: str | None = Field(default=None, alias="vocabularyFilterName") + """ + Name of a vocabulary filter configured at the vendor for masking or removing specific words. + """ + filter_method: FilterMethod | None = Field(default=None, alias="filterMethod") + """ + How filtered vocabulary words should be handled in the transcript. + """ + model: str | None = Field( + default=None, examples=["latest_long", "phone_call", "nova-2", "chirp"] + ) + """ + The specific recognition model to use. Model names are vendor-specific. + """ + output_format: OutputFormat | None = Field(default=None, alias="outputFormat") + """ + The level of detail in recognition results. + """ + profanity_option: ProfanityOption | None = Field(default=None, alias="profanityOption") + """ + How profanity should be handled in results. + """ + request_snr: bool | None = Field(default=None, alias="requestSnr") + """ + If true, request signal-to-noise ratio information in results. + """ + initial_speech_timeout_ms: float | None = Field( + default=None, alias="initialSpeechTimeoutMs", examples=[5000] + ) + """ + Time in milliseconds to wait for initial speech before timing out. + """ + azure_service_endpoint: str | None = Field(default=None, alias="azureServiceEndpoint") + """ + Custom Azure Speech Services endpoint URL. Only applies when vendor is 'microsoft'. + """ + azure_stt_endpoint_id: str | None = Field(default=None, alias="azureSttEndpointId") + """ + Azure custom speech endpoint ID for using a custom-trained model. Only applies when vendor is 'microsoft'. + """ + asr_dtmf_termination_digit: str | None = Field( + default=None, alias="asrDtmfTerminationDigit", examples=["#"] + ) + """ + A DTMF digit that terminates speech recognition when pressed. + """ + asr_timeout: float | None = Field(default=None, alias="asrTimeout") + """ + Maximum time in seconds to wait for a complete recognition result. + """ + fast_recognition_timeout: float | None = Field(default=None, alias="fastRecognitionTimeout") + """ + Timeout in seconds for fast recognition mode. Shorter timeout for quick responses. + """ + min_confidence: confloat(ge=0.0, le=1.0) | None = Field(default=None, alias="minConfidence") + """ + Minimum confidence score (0-1) required to accept a recognition result. Results below this threshold are discarded. + """ + deepgram_options: recognizer_deepgramOptions.DeepgramRecognizerOptions | None = Field( + default=None, alias="deepgramOptions" + ) + google_options: recognizer_googleOptions.GoogleRecognizerOptions | None = Field( + default=None, alias="googleOptions" + ) + aws_options: recognizer_awsOptions.AwsRecognizerOptions | None = Field( + default=None, alias="awsOptions" + ) + azure_options: recognizer_azureOptions.AzureRecognizerOptions | None = Field( + default=None, alias="azureOptions" + ) + nuance_options: recognizer_nuanceOptions.NuanceRecognizerOptions | None = Field( + default=None, alias="nuanceOptions" + ) + ibm_options: recognizer_ibmOptions.IbmRecognizerOptions | None = Field( + default=None, alias="ibmOptions" + ) + nvidia_options: recognizer_nvidiaOptions.NvidiaRecognizerOptions | None = Field( + default=None, alias="nvidiaOptions" + ) + soniox_options: recognizer_sonioxOptions.SonioxRecognizerOptions | None = Field( + default=None, alias="sonioxOptions" + ) + cobalt_options: recognizer_cobaltOptions.CobaltRecognizerOptions | None = Field( + default=None, alias="cobaltOptions" + ) + assembly_ai_options: recognizer_assemblyAiOptions.AssemblyaiRecognizerOptions | None = Field( + default=None, alias="assemblyAiOptions" + ) + speechmatics_options: recognizer_speechmaticsOptions.SpeechmaticsRecognizerOptions | None = ( + Field(default=None, alias="speechmaticsOptions") + ) + openai_options: recognizer_openaiOptions.OpenaiRecognizerOptions | None = Field( + default=None, alias="openaiOptions" + ) + houndify_options: recognizer_houndifyOptions.HoundifyRecognizerOptions | None = Field( + default=None, alias="houndifyOptions" + ) + gladia_options: recognizer_gladiaOptions.GladiaRecognizerOptions | None = Field( + default=None, alias="gladiaOptions" + ) + elevenlabs_options: recognizer_elevenlabsOptions.ElevenlabsRecognizerOptions | None = Field( + default=None, alias="elevenlabsOptions" + ) + verbio_options: recognizer_verbioOptions.VerbioRecognizerOptions | None = Field( + default=None, alias="verbioOptions" + ) + custom_options: recognizer_customOptions.CustomRecognizerOptions | None = Field( + default=None, alias="customOptions" + ) diff --git a/src/jambonz_sdk/_models/_generated/components/recognizer_assemblyAiOptions.py b/src/jambonz_sdk/_models/_generated/components/recognizer_assemblyAiOptions.py new file mode 100644 index 0000000..cfc9288 --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/components/recognizer_assemblyAiOptions.py @@ -0,0 +1,85 @@ +# generated by datamodel-codegen: +# filename: components/recognizer-assemblyAiOptions +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from enum import Enum + +from pydantic import ConfigDict, Field +from jambonz_sdk._models.base import JambonzModel + + +class ServiceVersion(Enum): + """ + AssemblyAI streaming API version. + """ + + v2 = "v2" + v3 = "v3" + + +class AssemblyaiRecognizerOptions(JambonzModel): + """ + AssemblyAI-specific STT options. Only applies when recognizer vendor is 'assemblyai'. + """ + + model_config = ConfigDict( + extra="forbid", + ) + api_key: str | None = Field(default=None, alias="apiKey") + """ + AssemblyAI API key. Overrides credentials configured in jambonz. + """ + service_version: ServiceVersion | None = Field(default=None, alias="serviceVersion") + """ + AssemblyAI streaming API version. + """ + speech_model: str | None = Field(default=None, alias="speechModel") + """ + AssemblyAI speech model to use for recognition. + """ + format_turns: bool | None = Field(default=None, alias="formatTurns") + """ + Enable turn-level formatting. + """ + end_of_turn_confidence_threshold: float | None = Field( + default=None, alias="endOfTurnConfidenceThreshold" + ) + """ + Confidence threshold for end-of-turn detection. + """ + min_end_of_turn_silence_when_confident: float | None = Field( + default=None, alias="minEndOfTurnSilenceWhenConfident" + ) + """ + Minimum silence duration (seconds) to trigger end-of-turn when confidence is met. + """ + max_turn_silence: float | None = Field(default=None, alias="maxTurnSilence") + """ + Maximum silence duration (seconds) before forcing end-of-turn. + """ + min_turn_silence: float | None = Field(default=None, alias="minTurnSilence") + """ + Minimum silence duration (seconds) before allowing end-of-turn. + """ + keyterms: list[str] | None = None + """ + List of key terms to boost in recognition. + """ + prompt: str | None = None + """ + Prompt to guide the recognition model. + """ + language_detection: bool | None = Field(default=None, alias="languageDetection") + """ + Enable automatic language detection. + """ + vad_threshold: float | None = Field(default=None, alias="vadThreshold") + """ + Voice activity detection threshold. + """ + inactivity_timeout: float | None = Field(default=None, alias="inactivityTimeout") + """ + Timeout (seconds) for inactivity before closing the stream. + """ diff --git a/src/jambonz_sdk/_models/_generated/components/recognizer_awsOptions.py b/src/jambonz_sdk/_models/_generated/components/recognizer_awsOptions.py new file mode 100644 index 0000000..034201f --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/components/recognizer_awsOptions.py @@ -0,0 +1,72 @@ +# generated by datamodel-codegen: +# filename: components/recognizer-awsOptions +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from enum import Enum + +from pydantic import ConfigDict, Field +from jambonz_sdk._models.base import JambonzModel + + +class VocabularyFilterMethod(Enum): + """ + How filtered vocabulary words should be handled. + """ + + remove = "remove" + mask = "mask" + tag = "tag" + + +class AwsRecognizerOptions(JambonzModel): + """ + AWS Transcribe specific options. Only applies when recognizer vendor is 'aws'. + """ + + model_config = ConfigDict( + extra="forbid", + ) + access_key: str | None = Field(default=None, alias="accessKey") + """ + AWS access key ID. Overrides credentials configured in jambonz. + """ + secret_key: str | None = Field(default=None, alias="secretKey") + """ + AWS secret access key. + """ + security_token: str | None = Field(default=None, alias="securityToken") + """ + AWS temporary security token (for STS/assumed roles). + """ + region: str | None = None + """ + AWS region for the Transcribe service. + """ + vocabulary_name: str | None = Field(default=None, alias="vocabularyName") + """ + Name of a custom vocabulary to use. + """ + vocabulary_filter_name: str | None = Field(default=None, alias="vocabularyFilterName") + """ + Name of a vocabulary filter to apply. + """ + vocabulary_filter_method: VocabularyFilterMethod | None = Field( + default=None, alias="vocabularyFilterMethod" + ) + """ + How filtered vocabulary words should be handled. + """ + language_model_name: str | None = Field(default=None, alias="languageModelName") + """ + Name of a custom language model. + """ + pii_entity_types: list[str] | None = Field(default=None, alias="piiEntityTypes") + """ + PII entity types to identify (e.g. 'BANK_ACCOUNT_NUMBER', 'CREDIT_DEBIT_NUMBER'). + """ + pii_identify_entities: bool | None = Field(default=None, alias="piiIdentifyEntities") + """ + Enable PII entity identification. + """ diff --git a/src/jambonz_sdk/_models/_generated/components/recognizer_azureOptions.py b/src/jambonz_sdk/_models/_generated/components/recognizer_azureOptions.py new file mode 100644 index 0000000..cfb100b --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/components/recognizer_azureOptions.py @@ -0,0 +1,63 @@ +# generated by datamodel-codegen: +# filename: components/recognizer-azureOptions +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from enum import Enum + +from pydantic import ConfigDict, Field +from jambonz_sdk._models.base import JambonzModel + + +class LanguageIdMode(Enum): + """ + Language identification mode when using multiple languages. + """ + + at_start = "AtStart" + continuous = "Continuous" + + +class SpeechRecognitionMode(Enum): + """ + Speech recognition mode optimized for the interaction type. + """ + + conversation = "CONVERSATION" + dictation = "DICTATION" + interactive = "INTERACTIVE" + + +class AzureRecognizerOptions(JambonzModel): + """ + Azure Speech Services specific options. Only applies when recognizer vendor is 'microsoft'. + """ + + model_config = ConfigDict( + extra="forbid", + ) + speech_segmentation_silence_timeout_ms: float | None = Field( + default=None, alias="speechSegmentationSilenceTimeoutMs" + ) + """ + Silence timeout in milliseconds for speech segmentation. + """ + post_processing: str | None = Field(default=None, alias="postProcessing") + """ + Post-processing mode for transcription results. + """ + audio_logging: bool | None = Field(default=None, alias="audioLogging") + """ + Enable audio logging for diagnostics. + """ + language_id_mode: LanguageIdMode | None = Field(default=None, alias="languageIdMode") + """ + Language identification mode when using multiple languages. + """ + speech_recognition_mode: SpeechRecognitionMode | None = Field( + default=None, alias="speechRecognitionMode" + ) + """ + Speech recognition mode optimized for the interaction type. + """ diff --git a/src/jambonz_sdk/_models/_generated/components/recognizer_cobaltOptions.py b/src/jambonz_sdk/_models/_generated/components/recognizer_cobaltOptions.py new file mode 100644 index 0000000..5c7fa27 --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/components/recognizer_cobaltOptions.py @@ -0,0 +1,42 @@ +# generated by datamodel-codegen: +# filename: components/recognizer-cobaltOptions +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from pydantic import ConfigDict, Field +from jambonz_sdk._models.base import JambonzModel + + +class CobaltRecognizerOptions(JambonzModel): + """ + Cobalt-specific STT options. Only applies when recognizer vendor is 'cobalt'. + """ + + model_config = ConfigDict( + extra="forbid", + ) + server_uri: str | None = Field(default=None, alias="serverUri") + """ + Cobalt server URI. + """ + enable_confusion_network: bool | None = Field(default=None, alias="enableConfusionNetwork") + """ + Enable confusion network output. + """ + metadata: str | None = None + """ + Metadata string to pass to the server. + """ + compiled_context_data: str | None = Field(default=None, alias="compiledContextData") + """ + Compiled context data for biasing recognition. + """ + word_time_offsets: bool | None = Field(default=None, alias="wordTimeOffsets") + """ + Include word-level timestamps. + """ + context_token: str | None = Field(default=None, alias="contextToken") + """ + Context token for server-side context. + """ diff --git a/src/jambonz_sdk/_models/_generated/components/recognizer_customOptions.py b/src/jambonz_sdk/_models/_generated/components/recognizer_customOptions.py new file mode 100644 index 0000000..f31c594 --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/components/recognizer_customOptions.py @@ -0,0 +1,36 @@ +# generated by datamodel-codegen: +# filename: components/recognizer-customOptions +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from typing import Any + +from pydantic import ConfigDict, Field +from jambonz_sdk._models.base import JambonzModel + + +class CustomRecognizerOptions(JambonzModel): + """ + Options for custom STT vendors. Only applies when recognizer vendor is 'custom'. + """ + + model_config = ConfigDict( + extra="forbid", + ) + auth_token: str | None = Field(default=None, alias="authToken") + """ + Authentication token for the custom STT service. + """ + uri: str | None = None + """ + WebSocket URI of the custom STT service. + """ + sample_rate: float | None = Field(default=None, alias="sampleRate") + """ + Audio sample rate in Hz. + """ + options: dict[str, Any] | None = None + """ + Additional vendor-specific options passed through to the custom service. + """ diff --git a/src/jambonz_sdk/_models/_generated/components/recognizer_deepgramOptions.py b/src/jambonz_sdk/_models/_generated/components/recognizer_deepgramOptions.py new file mode 100644 index 0000000..a730d02 --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/components/recognizer_deepgramOptions.py @@ -0,0 +1,163 @@ +# generated by datamodel-codegen: +# filename: components/recognizer-deepgramOptions +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from enum import Enum + +from pydantic import ConfigDict, Field +from jambonz_sdk._models.base import JambonzModel + + +class Redact(Enum): + """ + Redact sensitive information from transcripts. + """ + + pci = "pci" + numbers = "numbers" + true = "true" + ssn = "ssn" + + +class DeepgramRecognizerOptions(JambonzModel): + """ + Deepgram-specific STT options. Only applies when recognizer vendor is 'deepgram'. + """ + + model_config = ConfigDict( + extra="forbid", + ) + deepgram_stt_uri: str | None = Field(default=None, alias="deepgramSttUri") + """ + Custom Deepgram STT endpoint URI. + """ + deepgram_stt_use_tls: bool | None = Field(default=None, alias="deepgramSttUseTls") + """ + Whether to use TLS when connecting to the Deepgram STT endpoint. + """ + api_key: str | None = Field(default=None, alias="apiKey") + """ + Deepgram API key. Overrides the key configured in jambonz. + """ + tier: str | None = None + """ + Deepgram model tier. + """ + model: str | None = None + """ + Deepgram model name (e.g. 'nova-2', 'nova-2-general'). + """ + custom_model: str | None = Field(default=None, alias="customModel") + """ + ID of a custom-trained Deepgram model. + """ + version: str | None = None + """ + Model version. + """ + punctuate: bool | None = None + """ + Enable automatic punctuation. + """ + smart_formatting: bool | None = Field(default=None, alias="smartFormatting") + """ + Enable Deepgram smart formatting (dates, numbers, etc.). + """ + no_delay: bool | None = Field(default=None, alias="noDelay") + """ + Disable Deepgram's internal buffering for lower latency. + """ + profanity_filter: bool | None = Field(default=None, alias="profanityFilter") + """ + Filter profanity from transcripts. + """ + redact: Redact | None = None + """ + Redact sensitive information from transcripts. + """ + diarize: bool | None = None + """ + Enable speaker diarization. + """ + diarize_version: str | None = Field(default=None, alias="diarizeVersion") + """ + Diarization model version. + """ + ner: bool | None = None + """ + Enable named entity recognition. + """ + multichannel: bool | None = None + """ + Enable multichannel processing. + """ + alternatives: float | None = None + """ + Number of alternative transcripts to return. + """ + numerals: bool | None = None + """ + Convert spoken numbers to digits. + """ + search: list[str] | None = None + """ + Terms to search for in the transcript. + """ + replace: list[str] | None = None + """ + Terms to replace in the transcript. + """ + keywords: list[str] | None = None + """ + Keywords to boost recognition for. + """ + keyterms: list[str] | None = None + """ + Key terms to boost recognition for. + """ + endpointing: bool | float | None = None + """ + Endpointing sensitivity. Boolean to enable/disable, or number of milliseconds. + """ + utterance_end_ms: float | None = Field(default=None, alias="utteranceEndMs") + """ + Milliseconds of silence to detect end of utterance. + """ + short_utterance: bool | None = Field(default=None, alias="shortUtterance") + """ + Optimize for short utterances. + """ + vad_turnoff: float | None = Field(default=None, alias="vadTurnoff") + """ + Milliseconds of silence before VAD turns off. + """ + tag: str | None = None + """ + Tag to associate with the request for tracking. + """ + filler_words: bool | None = Field(default=None, alias="fillerWords") + """ + Include filler words (um, uh) in transcript. + """ + eot_threshold: float | None = Field(default=None, alias="eotThreshold") + """ + End-of-turn confidence threshold (0-1). + """ + eot_timeout_ms: float | None = Field(default=None, alias="eotTimeoutMs") + """ + End-of-turn timeout in milliseconds. + """ + mip_opt_out: bool | None = Field(default=None, alias="mipOptOut") + """ + Opt out of Deepgram's model improvement program. + """ + entity_prompt: str | None = Field(default=None, alias="entityPrompt") + """ + Prompt to guide entity detection. + """ + eager_eot_threshold: float | None = Field(default=None, alias="eagerEotThreshold") + """ + Eager end-of-turn threshold for faster response. + """ diff --git a/src/jambonz_sdk/_models/_generated/components/recognizer_elevenlabsOptions.py b/src/jambonz_sdk/_models/_generated/components/recognizer_elevenlabsOptions.py new file mode 100644 index 0000000..721e039 --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/components/recognizer_elevenlabsOptions.py @@ -0,0 +1,57 @@ +# generated by datamodel-codegen: +# filename: components/recognizer-elevenlabsOptions +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from enum import Enum + +from pydantic import ConfigDict, Field +from jambonz_sdk._models.base import JambonzModel + + +class CommitStrategy(Enum): + """ + How audio chunks are committed. 'manual' for explicit commits, 'vad' for voice activity detection. + """ + + manual = "manual" + vad = "vad" + + +class ElevenlabsRecognizerOptions(JambonzModel): + """ + ElevenLabs-specific STT options. Only applies when recognizer vendor is 'elevenlabs'. + """ + + model_config = ConfigDict( + extra="forbid", + ) + include_timestamps: bool | None = Field(default=None, alias="includeTimestamps") + """ + Include word-level timestamps in results. + """ + commit_strategy: CommitStrategy | None = Field(default=None, alias="commitStrategy") + """ + How audio chunks are committed. 'manual' for explicit commits, 'vad' for voice activity detection. + """ + vad_silence_threshold_secs: float | None = Field(default=None, alias="vadSilenceThresholdSecs") + """ + Silence duration in seconds to trigger VAD commit. + """ + vad_threshold: float | None = Field(default=None, alias="vadThreshold") + """ + VAD activation threshold. + """ + min_speech_duration_ms: float | None = Field(default=None, alias="minSpeechDurationMs") + """ + Minimum speech duration in milliseconds to accept. + """ + min_silence_duration_ms: float | None = Field(default=None, alias="minSilenceDurationMs") + """ + Minimum silence duration in milliseconds to trigger end of speech. + """ + enable_logging: bool | None = Field(default=None, alias="enableLogging") + """ + Enable server-side logging. + """ diff --git a/src/jambonz_sdk/_models/_generated/components/recognizer_gladiaOptions.py b/src/jambonz_sdk/_models/_generated/components/recognizer_gladiaOptions.py new file mode 100644 index 0000000..ab2a671 --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/components/recognizer_gladiaOptions.py @@ -0,0 +1,18 @@ +# generated by datamodel-codegen: +# filename: components/recognizer-gladiaOptions +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from pydantic import ConfigDict +from jambonz_sdk._models.base import JambonzModel + + +class GladiaRecognizerOptions(JambonzModel): + """ + Gladia-specific STT options. Only applies when recognizer vendor is 'gladia'. See Gladia API documentation for available options. + """ + + model_config = ConfigDict( + extra="forbid", + ) diff --git a/src/jambonz_sdk/_models/_generated/components/recognizer_googleOptions.py b/src/jambonz_sdk/_models/_generated/components/recognizer_googleOptions.py new file mode 100644 index 0000000..6fc9b20 --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/components/recognizer_googleOptions.py @@ -0,0 +1,58 @@ +# generated by datamodel-codegen: +# filename: components/recognizer-googleOptions +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from enum import Enum +from typing import Any + +from pydantic import ConfigDict, Field +from jambonz_sdk._models.base import JambonzModel + + +class ServiceVersion(Enum): + """ + Google Speech-to-Text API version. + """ + + v1 = "v1" + v2 = "v2" + + +class GoogleRecognizerOptions(JambonzModel): + """ + Google Speech-to-Text specific options. Only applies when recognizer vendor is 'google'. + """ + + model_config = ConfigDict( + extra="forbid", + ) + service_version: ServiceVersion | None = Field(default=None, alias="serviceVersion") + """ + Google Speech-to-Text API version. + """ + recognizer_id: str | None = Field(default=None, alias="recognizerId") + """ + ID of a Google Speech recognizer resource (v2 only). + """ + speech_start_timeout_ms: float | None = Field(default=None, alias="speechStartTimeoutMs") + """ + Timeout in milliseconds to wait for speech to start. + """ + speech_end_timeout_ms: float | None = Field(default=None, alias="speechEndTimeoutMs") + """ + Timeout in milliseconds to detect end of speech. + """ + enable_voice_activity_events: bool | None = Field( + default=None, alias="enableVoiceActivityEvents" + ) + """ + Enable voice activity detection events. + """ + transcript_normalization: list[Any] | None = Field( + default=None, alias="transcriptNormalization" + ) + """ + Array of transcript normalization rules. + """ diff --git a/src/jambonz_sdk/_models/_generated/components/recognizer_houndifyOptions.py b/src/jambonz_sdk/_models/_generated/components/recognizer_houndifyOptions.py new file mode 100644 index 0000000..0033f21 --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/components/recognizer_houndifyOptions.py @@ -0,0 +1,168 @@ +# generated by datamodel-codegen: +# filename: components/recognizer-houndifyOptions +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from typing import Any + +from pydantic import ConfigDict, Field +from jambonz_sdk._models.base import JambonzModel + + +class HoundifyRecognizerOptions(JambonzModel): + """ + Houndify-specific STT options. Only applies when recognizer vendor is 'houndify'. + """ + + model_config = ConfigDict( + extra="forbid", + ) + request_info: dict[str, Any] | None = Field(default=None, alias="requestInfo") + """ + Houndify RequestInfo object with context data. + """ + sample_rate: float | None = Field(default=None, alias="sampleRate") + """ + Audio sample rate in Hz. + """ + latitude: float | None = None + """ + User latitude for location-aware queries. + """ + longitude: float | None = None + """ + User longitude for location-aware queries. + """ + city: str | None = None + """ + User city. + """ + state: str | None = None + """ + User state. + """ + country: str | None = None + """ + User country. + """ + time_zone: str | None = Field(default=None, alias="timeZone") + """ + User timezone. + """ + domain: str | None = None + """ + Houndify domain. + """ + audio_endpoint: str | None = Field(default=None, alias="audioEndpoint") + """ + Custom audio endpoint URL. + """ + max_silence_seconds: float | None = Field(default=None, alias="maxSilenceSeconds") + """ + Maximum silence before stopping. + """ + max_silence_after_full_query_seconds: float | None = Field( + default=None, alias="maxSilenceAfterFullQuerySeconds" + ) + """ + Silence timeout after a complete query. + """ + max_silence_after_partial_query_seconds: float | None = Field( + default=None, alias="maxSilenceAfterPartialQuerySeconds" + ) + """ + Silence timeout after a partial query. + """ + vad_sensitivity: float | None = Field(default=None, alias="vadSensitivity") + """ + VAD sensitivity level. + """ + vad_timeout: float | None = Field(default=None, alias="vadTimeout") + """ + VAD timeout in milliseconds. + """ + vad_mode: str | None = Field(default=None, alias="vadMode") + """ + VAD mode. + """ + vad_voice_ms: float | None = Field(default=None, alias="vadVoiceMs") + """ + Milliseconds of voice to trigger VAD. + """ + vad_silence_ms: float | None = Field(default=None, alias="vadSilenceMs") + """ + Milliseconds of silence to trigger VAD. + """ + vad_debug: bool | None = Field(default=None, alias="vadDebug") + """ + Enable VAD debug logging. + """ + audio_format: str | None = Field(default=None, alias="audioFormat") + """ + Audio format. + """ + enable_noise_reduction: bool | None = Field(default=None, alias="enableNoiseReduction") + """ + Enable noise reduction. + """ + enable_profanity_filter: bool | None = Field(default=None, alias="enableProfanityFilter") + """ + Filter profanity. + """ + enable_punctuation: bool | None = Field(default=None, alias="enablePunctuation") + """ + Enable punctuation. + """ + enable_capitalization: bool | None = Field(default=None, alias="enableCapitalization") + """ + Enable capitalization. + """ + confidence_threshold: float | None = Field(default=None, alias="confidenceThreshold") + """ + Minimum confidence threshold. + """ + enable_disfluency_filter: bool | None = Field(default=None, alias="enableDisfluencyFilter") + """ + Filter disfluencies (um, uh). + """ + max_results: float | None = Field(default=None, alias="maxResults") + """ + Maximum number of results. + """ + enable_word_timestamps: bool | None = Field(default=None, alias="enableWordTimestamps") + """ + Include word timestamps. + """ + max_alternatives: float | None = Field(default=None, alias="maxAlternatives") + """ + Maximum alternative transcripts. + """ + partial_transcript_interval: float | None = Field( + default=None, alias="partialTranscriptInterval" + ) + """ + Interval for partial transcript delivery. + """ + session_timeout: float | None = Field(default=None, alias="sessionTimeout") + """ + Session timeout. + """ + connection_timeout: float | None = Field(default=None, alias="connectionTimeout") + """ + Connection timeout. + """ + custom_vocabulary: list[str] | None = Field(default=None, alias="customVocabulary") + """ + Custom vocabulary terms. + """ + language_model: str | None = Field(default=None, alias="languageModel") + """ + Language model to use. + """ + audio_query_absolute_timeout: float | None = Field( + default=None, alias="audioQueryAbsoluteTimeout" + ) + """ + Absolute timeout for audio queries. + """ diff --git a/src/jambonz_sdk/_models/_generated/components/recognizer_ibmOptions.py b/src/jambonz_sdk/_models/_generated/components/recognizer_ibmOptions.py new file mode 100644 index 0000000..c7b1c09 --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/components/recognizer_ibmOptions.py @@ -0,0 +1,62 @@ +# generated by datamodel-codegen: +# filename: components/recognizer-ibmOptions +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from pydantic import ConfigDict, Field +from jambonz_sdk._models.base import JambonzModel + + +class IbmRecognizerOptions(JambonzModel): + """ + IBM Watson Speech-to-Text specific options. Only applies when recognizer vendor is 'ibm'. + """ + + model_config = ConfigDict( + extra="forbid", + ) + stt_api_key: str | None = Field(default=None, alias="sttApiKey") + """ + IBM STT API key. Overrides credentials configured in jambonz. + """ + stt_region: str | None = Field(default=None, alias="sttRegion") + """ + IBM STT region. + """ + tts_api_key: str | None = Field(default=None, alias="ttsApiKey") + """ + IBM TTS API key. + """ + tts_region: str | None = Field(default=None, alias="ttsRegion") + """ + IBM TTS region. + """ + instance_id: str | None = Field(default=None, alias="instanceId") + """ + IBM Watson instance ID. + """ + model: str | None = None + """ + Recognition model name. + """ + language_customization_id: str | None = Field(default=None, alias="languageCustomizationId") + """ + ID of a custom language model. + """ + acoustic_customization_id: str | None = Field(default=None, alias="acousticCustomizationId") + """ + ID of a custom acoustic model. + """ + base_model_version: str | None = Field(default=None, alias="baseModelVersion") + """ + Base model version to use. + """ + watson_metadata: str | None = Field(default=None, alias="watsonMetadata") + """ + Customer ID metadata for data labeling. + """ + watson_learning_opt_out: bool | None = Field(default=None, alias="watsonLearningOptOut") + """ + Opt out of IBM data collection for service improvements. + """ diff --git a/src/jambonz_sdk/_models/_generated/components/recognizer_nuanceOptions.py b/src/jambonz_sdk/_models/_generated/components/recognizer_nuanceOptions.py new file mode 100644 index 0000000..f807c3f --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/components/recognizer_nuanceOptions.py @@ -0,0 +1,225 @@ +# generated by datamodel-codegen: +# filename: components/recognizer-nuanceOptions +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from enum import Enum +from typing import Any + +from pydantic import ConfigDict, Field +from jambonz_sdk._models.base import JambonzModel + + +class UtteranceDetectionMode(Enum): + """ + How utterance boundaries are detected. + """ + + single = "single" + multiple = "multiple" + disabled = "disabled" + + +class ResultType(Enum): + """ + Type of results to return. + """ + + final = "final" + partial = "partial" + immutable_partial = "immutable_partial" + + +class Formatting(JambonzModel): + """ + Formatting options for recognition results. + """ + + scheme: str + """ + Formatting scheme name. + """ + options: dict[str, Any] + """ + Scheme-specific formatting options. + """ + + +class Type(Enum): + undefined_resource_type = "undefined_resource_type" + wordset = "wordset" + compiled_wordset = "compiled_wordset" + domain_lm = "domain_lm" + speaker_profile = "speaker_profile" + grammar = "grammar" + settings = "settings" + + +class ExternalReference(JambonzModel): + """ + External resource reference. + """ + + type: Type | None = None + uri: str | None = None + max_load_failures: bool | None = Field(default=None, alias="maxLoadFailures") + request_timeout_ms: float | None = Field(default=None, alias="requestTimeoutMs") + headers: dict[str, Any] | None = None + + +class WeightName(Enum): + default_weight = "defaultWeight" + lowest = "lowest" + low = "low" + medium = "medium" + high = "high" + highest = "highest" + + +class Reuse(Enum): + undefined_reuse = "undefined_reuse" + low_reuse = "low_reuse" + high_reuse = "high_reuse" + + +class Resource(JambonzModel): + external_reference: ExternalReference | None = Field(default=None, alias="externalReference") + """ + External resource reference. + """ + inline_wordset: str | None = Field(default=None, alias="inlineWordset") + """ + Inline wordset JSON string. + """ + builtin: str | None = None + """ + Built-in grammar name. + """ + inline_grammar: str | None = Field(default=None, alias="inlineGrammar") + """ + Inline SRGS grammar. + """ + wakeup_word: list[str] | None = Field(default=None, alias="wakeupWord") + """ + Wakeup words. + """ + weight_name: WeightName | None = Field(default=None, alias="weightName") + weight_value: float | None = Field(default=None, alias="weightValue") + reuse: Reuse | None = None + + +class NuanceRecognizerOptions(JambonzModel): + """ + Nuance Mix specific options. Only applies when recognizer vendor is 'nuance'. + """ + + model_config = ConfigDict( + extra="forbid", + ) + client_id: str | None = Field(default=None, alias="clientId") + """ + Nuance Mix client ID. + """ + secret: str | None = None + """ + Nuance Mix client secret. + """ + krypton_endpoint: str | None = Field(default=None, alias="kryptonEndpoint") + """ + Custom Nuance Krypton endpoint URL. + """ + topic: str | None = None + """ + Recognition topic (domain). + """ + utterance_detection_mode: UtteranceDetectionMode | None = Field( + default=None, alias="utteranceDetectionMode" + ) + """ + How utterance boundaries are detected. + """ + punctuation: bool | None = None + """ + Enable automatic punctuation. + """ + profanity_filter: bool | None = Field(default=None, alias="profanityFilter") + """ + Filter profanity from results. + """ + include_tokenization: bool | None = Field(default=None, alias="includeTokenization") + """ + Include tokenization data in results. + """ + discard_speaker_adaptation: bool | None = Field(default=None, alias="discardSpeakerAdaptation") + """ + Discard speaker adaptation data. + """ + suppress_call_recording: bool | None = Field(default=None, alias="suppressCallRecording") + """ + Suppress call recording on the Nuance side. + """ + mask_load_failures: bool | None = Field(default=None, alias="maskLoadFailures") + """ + Mask resource load failures. + """ + suppress_initial_capitalization: bool | None = Field( + default=None, alias="suppressInitialCapitalization" + ) + """ + Suppress initial capitalization of results. + """ + allow_zero_base_lm_weight: bool | None = Field(default=None, alias="allowZeroBaseLmWeight") + """ + Allow zero base language model weight. + """ + filter_wakeup_word: bool | None = Field(default=None, alias="filterWakeupWord") + """ + Filter wakeup words from results. + """ + result_type: ResultType | None = Field(default=None, alias="resultType") + """ + Type of results to return. + """ + no_input_timeout_ms: float | None = Field(default=None, alias="noInputTimeoutMs") + """ + Timeout in milliseconds before no-input event. + """ + recognition_timeout_ms: float | None = Field(default=None, alias="recognitionTimeoutMs") + """ + Maximum recognition duration in milliseconds. + """ + utterance_end_silence_ms: float | None = Field(default=None, alias="utteranceEndSilenceMs") + """ + Silence duration in milliseconds to detect end of utterance. + """ + max_hypotheses: float | None = Field(default=None, alias="maxHypotheses") + """ + Maximum number of recognition hypotheses to return. + """ + speech_domain: str | None = Field(default=None, alias="speechDomain") + """ + Speech domain for optimized recognition. + """ + formatting: Formatting | None = None + """ + Formatting options for recognition results. + """ + client_data: dict[str, Any] | None = Field(default=None, alias="clientData") + """ + Custom client data to pass to Nuance. + """ + user_id: str | None = Field(default=None, alias="userId") + """ + User ID for speaker adaptation. + """ + speech_detection_sensitivity: float | None = Field( + default=None, alias="speechDetectionSensitivity" + ) + """ + Speech detection sensitivity (0-1). + """ + resources: list[Resource] | None = None + """ + Array of Nuance recognition resources (grammars, wordsets, etc.). + """ diff --git a/src/jambonz_sdk/_models/_generated/components/recognizer_nvidiaOptions.py b/src/jambonz_sdk/_models/_generated/components/recognizer_nvidiaOptions.py new file mode 100644 index 0000000..77efcbd --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/components/recognizer_nvidiaOptions.py @@ -0,0 +1,48 @@ +# generated by datamodel-codegen: +# filename: components/recognizer-nvidiaOptions +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from typing import Any + +from pydantic import ConfigDict, Field +from jambonz_sdk._models.base import JambonzModel + + +class NvidiaRecognizerOptions(JambonzModel): + """ + NVIDIA Riva specific options. Only applies when recognizer vendor is 'nvidia'. + """ + + model_config = ConfigDict( + extra="forbid", + ) + riva_uri: str | None = Field(default=None, alias="rivaUri") + """ + NVIDIA Riva server URI. + """ + max_alternatives: float | None = Field(default=None, alias="maxAlternatives") + """ + Maximum number of alternative transcripts. + """ + profanity_filter: bool | None = Field(default=None, alias="profanityFilter") + """ + Filter profanity from results. + """ + punctuation: bool | None = None + """ + Enable automatic punctuation. + """ + word_time_offsets: bool | None = Field(default=None, alias="wordTimeOffsets") + """ + Include word-level timestamps. + """ + verbatim_transcripts: bool | None = Field(default=None, alias="verbatimTranscripts") + """ + Return verbatim (unformatted) transcripts. + """ + custom_configuration: dict[str, Any] | None = Field(default=None, alias="customConfiguration") + """ + Custom Riva configuration parameters. + """ diff --git a/src/jambonz_sdk/_models/_generated/components/recognizer_openaiOptions.py b/src/jambonz_sdk/_models/_generated/components/recognizer_openaiOptions.py new file mode 100644 index 0000000..ecaebf5 --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/components/recognizer_openaiOptions.py @@ -0,0 +1,122 @@ +# generated by datamodel-codegen: +# filename: components/recognizer-openaiOptions +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from enum import Enum + +from pydantic import ConfigDict, Field +from jambonz_sdk._models.base import JambonzModel + + +class PromptTemplates(JambonzModel): + """ + Templates for dynamic prompt generation. + """ + + hints_template: str | None = Field(default=None, alias="hintsTemplate") + """ + Template for injecting hints into the prompt. + """ + conversation_history_template: str | None = Field( + default=None, alias="conversationHistoryTemplate" + ) + """ + Template for injecting conversation history. + """ + + +class InputAudioNoiseReduction(Enum): + """ + Input audio noise reduction mode. + """ + + near_field = "near_field" + far_field = "far_field" + + +class Type(Enum): + """ + Turn detection strategy. + """ + + none = "none" + server_vad = "server_vad" + semantic_vad = "semantic_vad" + + +class Eagerness(Enum): + """ + How eagerly the model should respond. + """ + + low = "low" + medium = "medium" + high = "high" + auto = "auto" + + +class TurnDetection(JambonzModel): + """ + Turn detection configuration for the OpenAI Realtime API. + """ + + type: Type + """ + Turn detection strategy. + """ + eagerness: Eagerness | None = None + """ + How eagerly the model should respond. + """ + threshold: float | None = None + """ + VAD activation threshold (0-1). + """ + prefix_padding_ms: float | None = None + """ + Milliseconds of audio to include before detected speech. + """ + silence_duration_ms: float | None = None + """ + Milliseconds of silence to detect end of speech. + """ + + +class OpenaiRecognizerOptions(JambonzModel): + """ + OpenAI Whisper/Realtime specific STT options. Only applies when recognizer vendor is 'openai'. + """ + + model_config = ConfigDict( + extra="forbid", + ) + api_key: str | None = Field(default=None, alias="apiKey") + """ + OpenAI API key. Overrides credentials configured in jambonz. + """ + model: str | None = None + """ + OpenAI STT model name. + """ + prompt: str | None = None + """ + Prompt to guide the recognition model. + """ + prompt_templates: PromptTemplates | None = Field(default=None, alias="promptTemplates") + """ + Templates for dynamic prompt generation. + """ + language: str | None = None + """ + Language code for recognition. + """ + input_audio_noise_reduction: InputAudioNoiseReduction | None = None + """ + Input audio noise reduction mode. + """ + turn_detection: TurnDetection | None = None + """ + Turn detection configuration for the OpenAI Realtime API. + """ diff --git a/src/jambonz_sdk/_models/_generated/components/recognizer_sonioxOptions.py b/src/jambonz_sdk/_models/_generated/components/recognizer_sonioxOptions.py new file mode 100644 index 0000000..e1ca0e7 --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/components/recognizer_sonioxOptions.py @@ -0,0 +1,79 @@ +# generated by datamodel-codegen: +# filename: components/recognizer-sonioxOptions +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from typing import Any + +from pydantic import ConfigDict, Field +from jambonz_sdk._models.base import JambonzModel + + +class Storage(JambonzModel): + """ + Soniox storage configuration for persisting transcripts. + """ + + id: str | None = None + """ + Storage ID. + """ + title: str | None = None + """ + Storage title. + """ + disable_store_audio: bool | None = Field(default=None, alias="disableStoreAudio") + """ + Disable audio storage. + """ + disable_store_transcript: bool | None = Field(default=None, alias="disableStoreTranscript") + """ + Disable transcript storage. + """ + disable_search: bool | None = Field(default=None, alias="disableSearch") + """ + Disable search indexing. + """ + metadata: dict[str, Any] | None = None + """ + Custom metadata. + """ + + +class SonioxRecognizerOptions(JambonzModel): + """ + Soniox-specific STT options. Only applies when recognizer vendor is 'soniox'. + """ + + model_config = ConfigDict( + extra="forbid", + ) + api_key: str | None = Field(default=None, alias="apiKey") + """ + Soniox API key. + """ + model: str | None = None + """ + Soniox recognition model. + """ + endpoint_detection: bool | None = Field(default=None, alias="endpointDetection") + """ + Enable endpoint detection. + """ + profanity_filter: bool | None = Field(default=None, alias="profanityFilter") + """ + Filter profanity from results. + """ + speech_context: str | None = Field(default=None, alias="speechContext") + """ + Speech context for improved recognition. + """ + client_request_reference: str | None = Field(default=None, alias="clientRequestReference") + """ + Client request reference for tracking. + """ + storage: Storage | None = None + """ + Soniox storage configuration for persisting transcripts. + """ diff --git a/src/jambonz_sdk/_models/_generated/components/recognizer_speechmaticsOptions.py b/src/jambonz_sdk/_models/_generated/components/recognizer_speechmaticsOptions.py new file mode 100644 index 0000000..5070e32 --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/components/recognizer_speechmaticsOptions.py @@ -0,0 +1,167 @@ +# generated by datamodel-codegen: +# filename: components/recognizer-speechmaticsOptions +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from enum import Enum +from typing import Any + +from pydantic import ConfigDict +from jambonz_sdk._models.base import JambonzModel + + +class Profile(Enum): + """ + Speechmatics profile. + """ + + adaptive = "adaptive" + agile = "agile" + smart = "smart" + external = "external" + + +class SpeakerDiarizationConfig(JambonzModel): + speaker_sensitivity: float | None = None + max_speakers: float | None = None + + +class ConversationConfig(JambonzModel): + end_of_utterance_silence_trigger: float | None = None + + +class MaxDelayMode(Enum): + """ + Delay mode. + """ + + fixed = "fixed" + flexible = "flexible" + + +class PunctuationOverrides(JambonzModel): + permitted_marks: list[str] | None = None + sensitivity: float | None = None + + +class AudioFilteringConfig(JambonzModel): + volume_threshold: float + + +class TranscriptFilteringConfig(JambonzModel): + remove_disfluencies: bool + + +class TranscriptionConfig(JambonzModel): + """ + Speechmatics transcription configuration. + """ + + language: str | None = None + """ + Language code. + """ + domain: str | None = None + """ + Domain model. + """ + additional_vocab: list[Any] | None = None + """ + Additional vocabulary entries. + """ + diarization: str | None = None + """ + Diarization mode. + """ + speaker_diarization_config: SpeakerDiarizationConfig | None = None + conversation_config: ConversationConfig | None = None + enable_partials: bool | None = None + """ + Enable partial transcripts. + """ + max_delay: float | None = None + """ + Maximum delay in seconds. + """ + max_delay_mode: MaxDelayMode | None = None + """ + Delay mode. + """ + output_locale: str | None = None + """ + Output locale for formatting. + """ + punctuation_overrides: PunctuationOverrides | None = None + operating_point: str | None = None + """ + Operating point (standard or enhanced). + """ + enable_entities: bool | None = None + """ + Enable entity detection. + """ + audio_filtering_config: AudioFilteringConfig | None = None + transcript_filtering_config: TranscriptFilteringConfig | None = None + + +class TranslationConfig(JambonzModel): + """ + Speechmatics translation configuration. + """ + + target_languages: list[str] + """ + Target languages for translation. + """ + enable_partials: bool | None = None + """ + Enable partial translations. + """ + + +class Type(Enum): + applause = "applause" + music = "music" + laughter = "laughter" + + +class AudioEventsConfig(JambonzModel): + """ + Audio event detection configuration. + """ + + types: list[Type] | None = None + """ + Audio event types to detect. + """ + + +class SpeechmaticsRecognizerOptions(JambonzModel): + """ + Speechmatics-specific STT options. Only applies when recognizer vendor is 'speechmatics'. + """ + + model_config = ConfigDict( + extra="forbid", + ) + host: str | None = None + """ + Speechmatics host URL. + """ + profile: Profile | None = None + """ + Speechmatics profile. + """ + transcription_config: TranscriptionConfig | None = None + """ + Speechmatics transcription configuration. + """ + translation_config: TranslationConfig | None = None + """ + Speechmatics translation configuration. + """ + audio_events_config: AudioEventsConfig | None = None + """ + Audio event detection configuration. + """ diff --git a/src/jambonz_sdk/_models/_generated/components/recognizer_verbioOptions.py b/src/jambonz_sdk/_models/_generated/components/recognizer_verbioOptions.py new file mode 100644 index 0000000..a27343f --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/components/recognizer_verbioOptions.py @@ -0,0 +1,54 @@ +# generated by datamodel-codegen: +# filename: components/recognizer-verbioOptions +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from pydantic import ConfigDict +from jambonz_sdk._models.base import JambonzModel + + +class VerbioRecognizerOptions(JambonzModel): + """ + Verbio-specific STT options. Only applies when recognizer vendor is 'verbio'. + """ + + model_config = ConfigDict( + extra="forbid", + ) + enable_formatting: bool | None = None + """ + Enable text formatting of results. + """ + enable_diarization: bool | None = None + """ + Enable speaker diarization. + """ + topic: float | None = None + """ + Topic ID for domain-specific recognition. + """ + inline_grammar: str | None = None + """ + Inline SRGS grammar for constrained recognition. + """ + grammar_uri: str | None = None + """ + URI of an external grammar resource. + """ + label: str | None = None + """ + Label for the recognition session. + """ + recognition_timeout: float | None = None + """ + Maximum recognition duration in seconds. + """ + speech_complete_timeout: float | None = None + """ + Silence duration in seconds after complete speech. + """ + speech_incomplete_timeout: float | None = None + """ + Silence duration in seconds after incomplete speech. + """ diff --git a/src/jambonz_sdk/_models/_generated/components/synthesizer.py b/src/jambonz_sdk/_models/_generated/components/synthesizer.py new file mode 100644 index 0000000..04ea223 --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/components/synthesizer.py @@ -0,0 +1,104 @@ +# generated by datamodel-codegen: +# filename: components/synthesizer +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from enum import Enum +from typing import Any + +from pydantic import Field +from jambonz_sdk._models.base import JambonzModel + + +class Engine(Enum): + """ + The synthesis engine tier to use. Availability depends on the vendor. + """ + + standard = "standard" + neural = "neural" + generative = "generative" + long_form = "long-form" + + +class Gender(Enum): + """ + Preferred voice gender. Used by some vendors (e.g. Google) when a specific voice is not specified. + """ + + male = "MALE" + female = "FEMALE" + neutral = "NEUTRAL" + + +class Synthesizer(JambonzModel): + """ + Configuration for text-to-speech synthesis. Specifies the TTS vendor, voice, language, and vendor-specific options. Can be set at the session level via the 'config' verb or overridden per-verb (e.g. on 'say'). + """ + + vendor: str = Field( + ..., + examples=[ + "google", + "aws", + "microsoft", + "elevenlabs", + "cartesia", + "deepgram", + "ibm", + "nuance", + "nvidia", + "wellsaid", + "whisper", + "verbio", + "custom", + ], + ) + """ + The TTS vendor to use. Must match a vendor configured in the jambonz platform. + """ + label: str | None = None + """ + An optional label identifying a specific credential set for this vendor. Used when multiple credentials are configured for the same vendor on the jambonz platform. + """ + language: str | None = Field( + default=None, examples=["en-US", "en-GB", "es-ES", "fr-FR", "de-DE"] + ) + """ + The language code for speech synthesis, in BCP-47 format. + """ + voice: str | dict[str, Any] | None = Field( + default=None, examples=["en-US-Wavenet-D", "Joanna", "EXAVITQu4vr4xnSDxMaL"] + ) + """ + The voice to use for synthesis. Format varies by vendor: Google uses voice names like 'en-US-Wavenet-D', AWS Polly uses names like 'Joanna', but ElevenLabs and Cartesia require voice IDs (alphanumeric strings like 'EXAVITQu4vr4xnSDxMaL'), not human-readable names. Some vendors accept an object for more complex voice configuration. + """ + fallback_vendor: str | None = Field(default=None, alias="fallbackVendor") + """ + A backup TTS vendor to use if the primary vendor fails or is unavailable. + """ + fallback_label: str | None = Field(default=None, alias="fallbackLabel") + """ + Credential label for the fallback vendor. + """ + fallback_language: str | None = Field(default=None, alias="fallbackLanguage") + """ + Language code to use with the fallback vendor. + """ + fallback_voice: str | dict[str, Any] | None = Field(default=None, alias="fallbackVoice") + """ + Voice to use with the fallback vendor. + """ + engine: Engine | None = None + """ + The synthesis engine tier to use. Availability depends on the vendor. + """ + gender: Gender | None = None + """ + Preferred voice gender. Used by some vendors (e.g. Google) when a specific voice is not specified. + """ + options: dict[str, Any] | None = None + """ + Vendor-specific options passed through to the TTS provider. The structure depends on the vendor being used. + """ diff --git a/src/jambonz_sdk/_models/_generated/components/target.py b/src/jambonz_sdk/_models/_generated/components/target.py new file mode 100644 index 0000000..352e864 --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/components/target.py @@ -0,0 +1,111 @@ +# generated by datamodel-codegen: +# filename: components/target +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from enum import Enum + +from pydantic import Field + +from . import actionHook +from . import auth as auth_1 +from jambonz_sdk._models.base import JambonzModel + + +class Type(Enum): + """ + The type of target to dial. + """ + + phone = "phone" + sip = "sip" + user = "user" + teams = "teams" + + +class Method(Enum): + """ + The HTTP method to use when invoking the confirmHook. + """ + + get = "GET" + post = "POST" + + +class From(JambonzModel): + """ + Override the From header on the outbound SIP INVITE. + """ + + user: str | None = None + """ + The user part of the SIP From URI. + """ + host: str | None = None + """ + The host part of the SIP From URI. + """ + + +class Target(JambonzModel): + """ + A call target for the 'dial' verb. Specifies who or what to connect the call to: a phone number (PSTN), a SIP endpoint, a registered user, or a Microsoft Teams user. + """ + + type: Type + """ + The type of target to dial. + """ + number: str | None = Field(default=None, examples=["+15085551212"]) + """ + The phone number to dial. Required when type is 'phone'. Use E.164 format. + """ + sip_uri: str | None = Field(default=None, alias="sipUri", examples=["sip:alice@example.com"]) + """ + The SIP URI to dial. Required when type is 'sip'. + """ + name: str | None = None + """ + The registered user name to dial. Required when type is 'user'. Also used as the display name for SIP targets. + """ + tenant: str | None = None + """ + The Microsoft Teams tenant ID. Required when type is 'teams'. + """ + trunk: str | None = None + """ + The SIP trunk to use for the outbound call. When specified, overrides the default carrier routing. + """ + confirm_hook: str | actionHook.ActionHook | None = Field(default=None, alias="confirmHook") + """ + A webhook to invoke when the target answers, before connecting the call. Use this to screen calls, play a whisper prompt, or require the target to press a key to accept. + """ + method: Method | None = "POST" + """ + The HTTP method to use when invoking the confirmHook. + """ + headers: dict[str, str] | None = None + """ + Custom SIP headers to include on the outbound INVITE. Keys are header names, values are header values. + """ + from_: From | None = Field(default=None, alias="from") + """ + Override the From header on the outbound SIP INVITE. + """ + auth: auth_1.Auth | None = None + """ + SIP authentication credentials for the outbound call, if the far end requires digest auth. + """ + vmail: bool | None = None + """ + If true, follow the call into voicemail if the target does not answer. + """ + override_to: str | None = Field(default=None, alias="overrideTo") + """ + Override the Request-URI on the outbound SIP INVITE. Useful when the Request-URI needs to differ from the To header. + """ + proxy: str | None = Field(default=None, examples=["sip:proxy.example.com"]) + """ + A SIP proxy to route the outbound call through, specified as a SIP URI. + """ diff --git a/src/jambonz_sdk/_models/_generated/components/vad.py b/src/jambonz_sdk/_models/_generated/components/vad.py new file mode 100644 index 0000000..d4152e6 --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/components/vad.py @@ -0,0 +1,58 @@ +# generated by datamodel-codegen: +# filename: components/vad +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from enum import Enum + +from pydantic import Field, confloat +from jambonz_sdk._models.base import JambonzModel + + +class Vendor(Enum): + """ + The VAD engine to use. + """ + + webrtc = "webrtc" + silero = "silero" + + +class VAD(JambonzModel): + """ + Voice Activity Detection configuration. Controls how jambonz detects the presence or absence of speech on the audio channel. Used to determine speech start/end boundaries for recognition and barge-in. + """ + + enable: bool | None = None + """ + Whether to enable voice activity detection. + """ + voice_ms: float | None = Field(default=None, alias="voiceMs", examples=[250]) + """ + Duration of voice activity (in milliseconds) required before speech is considered to have started. + """ + silence_ms: float | None = Field(default=None, alias="silenceMs", examples=[1000]) + """ + Duration of silence (in milliseconds) required before speech is considered to have ended. + """ + strategy: str | None = None + """ + The VAD strategy to use. + """ + mode: confloat(ge=0.0, le=3.0) | None = None + """ + WebRTC VAD aggressiveness mode (0-3). Higher values are more aggressive at filtering non-speech. Only applies when vendor is 'webrtc'. + """ + vendor: Vendor | None = None + """ + The VAD engine to use. + """ + threshold: confloat(ge=0.0, le=1.0) | None = None + """ + Speech detection confidence threshold for Silero VAD. Value between 0 and 1, where higher values require greater confidence. Only applies when vendor is 'silero'. + """ + speech_pad_ms: float | None = Field(default=None, alias="speechPadMs") + """ + Padding in milliseconds added before and after detected speech segments. Prevents clipping utterance boundaries. Only applies when vendor is 'silero'. + """ diff --git a/src/jambonz_sdk/_models/_generated/jambonz_app.py b/src/jambonz_sdk/_models/_generated/jambonz_app.py new file mode 100644 index 0000000..2867ac3 --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/jambonz_app.py @@ -0,0 +1,212 @@ +# generated by datamodel-codegen: +# filename: jambonz-app +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from pydantic import Field, RootModel + +from .verbs import ( + agent, + alert, + answer, + conference, + config, + deepgram_s2s, + dequeue, + dial, + dialogflow, + dtmf, + dub, + elevenlabs_s2s, + enqueue, + gather, + google_s2s, + hangup, + leave, + listen, + llm, + message, + openai_s2s, + pause, + play, + redirect, + rest_dial, + s2s, + say, + sip_decline, + sip_refer, + sip_request, + stream, + tag, + transcribe, + ultravox_s2s, +) + + +class Verb( + RootModel[ + answer.Answer + | alert.Alert + | config.Config + | say.Say + | play.Play + | gather.Gather + | dial.Dial + | listen.Listen + | stream.Stream + | llm.LLM + | s2s.S2S + | openai_s2s.OpenaiS2S + | google_s2s.GoogleS2S + | elevenlabs_s2s.ElevenlabsS2S + | deepgram_s2s.DeepgramS2S + | ultravox_s2s.UltravoxS2S + | dialogflow.Dialogflow + | agent.Agent + | conference.Conference + | transcribe.Transcribe + | enqueue.Enqueue + | dequeue.Dequeue + | dtmf.DTMF + | dub.Dub + | hangup.Hangup + | leave.Leave + | message.Message + | pause.Pause + | redirect.Redirect + | tag.Tag + | sip_decline.SipDecline + | sip_request.SipRequest + | sip_refer.SipRefer + | rest_dial.RestDial + ] +): + root: ( + answer.Answer + | alert.Alert + | config.Config + | say.Say + | play.Play + | gather.Gather + | dial.Dial + | listen.Listen + | stream.Stream + | llm.LLM + | s2s.S2S + | openai_s2s.OpenaiS2S + | google_s2s.GoogleS2S + | elevenlabs_s2s.ElevenlabsS2S + | deepgram_s2s.DeepgramS2S + | ultravox_s2s.UltravoxS2S + | dialogflow.Dialogflow + | agent.Agent + | conference.Conference + | transcribe.Transcribe + | enqueue.Enqueue + | dequeue.Dequeue + | dtmf.DTMF + | dub.Dub + | hangup.Hangup + | leave.Leave + | message.Message + | pause.Pause + | redirect.Redirect + | tag.Tag + | sip_decline.SipDecline + | sip_request.SipRequest + | sip_refer.SipRefer + | rest_dial.RestDial + ) = Field(..., discriminator="verb") + + +class JambonzApplication(RootModel[list[Verb]]): + """ + A jambonz application is an array of verbs that are executed sequentially to control a phone call. Each verb performs an action: speaking text, playing audio, collecting input, dialing a number, connecting to an AI model, etc. When a webhook (actionHook) is invoked, it must return a new verb array to continue call processing. + + The execution model is simple: verbs execute one after another, top to bottom. When a verb with an actionHook completes (e.g. gather collects input), the actionHook is called and its response replaces the remaining verb stack. If the verb array is exhausted without a hangup, the call is terminated. + + There are two transport modes for delivering verb arrays to jambonz: + - **Webhook**: Your HTTP server receives POST/GET requests with call data and returns JSON verb arrays in the response body. + - **WebSocket**: Your server maintains a persistent websocket connection with jambonz and sends/receives verb arrays as JSON messages. Required for real-time features like LLM conversations. + + The verb schemas and JSON structure are identical regardless of transport mode. + """ + + root: list[Verb] = Field( + ..., + examples=[ + [ + { + "verb": "config", + "synthesizer": { + "vendor": "elevenlabs", + "voice": "EXAVITQu4vr4xnSDxMaL", + "language": "en-US", + }, + "recognizer": {"vendor": "deepgram", "language": "en-US"}, + }, + { + "verb": "say", + "text": "Hello! Welcome to Acme Corp. How can I help you today?", + }, + { + "verb": "gather", + "input": ["speech"], + "actionHook": "/process-input", + "timeout": 15, + "say": {"text": "I'm listening."}, + }, + ], + [ + {"verb": "say", "text": "Please hold while I connect you to an agent."}, + { + "verb": "dial", + "target": [{"type": "phone", "number": "+15085551212"}], + "answerOnBridge": True, + "timeout": 30, + "actionHook": "/dial-complete", + }, + { + "verb": "say", + "text": "Sorry, the agent is not available. Please try again later.", + }, + {"verb": "hangup"}, + ], + [ + { + "verb": "config", + "synthesizer": {"vendor": "cartesia", "voice": "sonic-english"}, + "recognizer": {"vendor": "deepgram", "language": "en-US"}, + }, + { + "verb": "openai_s2s", + "model": "gpt-4o", + "llmOptions": { + "messages": [ + { + "role": "system", + "content": "You are a helpful customer service agent for Acme Corp. Be concise and friendly.", + } + ], + "temperature": 0.7, + }, + "actionHook": "/llm-complete", + "toolHook": "/llm-tool", + }, + ], + ], + min_length=1, + title="jambonz Application", + ) + """ + A jambonz application is an array of verbs that are executed sequentially to control a phone call. Each verb performs an action: speaking text, playing audio, collecting input, dialing a number, connecting to an AI model, etc. When a webhook (actionHook) is invoked, it must return a new verb array to continue call processing. + + The execution model is simple: verbs execute one after another, top to bottom. When a verb with an actionHook completes (e.g. gather collects input), the actionHook is called and its response replaces the remaining verb stack. If the verb array is exhausted without a hangup, the call is terminated. + + There are two transport modes for delivering verb arrays to jambonz: + - **Webhook**: Your HTTP server receives POST/GET requests with call data and returns JSON verb arrays in the response body. + - **WebSocket**: Your server maintains a persistent websocket connection with jambonz and sends/receives verb arrays as JSON messages. Required for real-time features like LLM conversations. + + The verb schemas and JSON structure are identical regardless of transport mode. + """ diff --git a/src/jambonz_sdk/_models/_generated/verbs/__init__.py b/src/jambonz_sdk/_models/_generated/verbs/__init__.py new file mode 100644 index 0000000..1a9201e --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/verbs/__init__.py @@ -0,0 +1,3 @@ +# generated by datamodel-codegen: +# filename: schema +# timestamp: 2026-04-22T10:54:53+00:00 diff --git a/src/jambonz_sdk/_models/_generated/verbs/agent.py b/src/jambonz_sdk/_models/_generated/verbs/agent.py new file mode 100644 index 0000000..db1358c --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/verbs/agent.py @@ -0,0 +1,202 @@ +# generated by datamodel-codegen: +# filename: verbs/agent +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from enum import Enum +from typing import Any, Literal + +from pydantic import ConfigDict, Field, confloat + +from ..components import actionHook, recognizer, synthesizer +from jambonz_sdk._models.base import JambonzModel +from .llm import LLM + + +class TurnDetection(Enum): + """ + Turn detection strategy shorthand. 'stt' uses the STT vendor's native signals (silence-based for most vendors; acoustic+semantic for deepgramflux, assemblyai, speechmatics). 'krisp' uses the Krisp acoustic end-of-turn model with default settings. + """ + + stt = "stt" + krisp = "krisp" + + +class Mode(Enum): + """ + Turn detection mode. Currently only 'krisp' supports object-form tuning. + """ + + krisp = "krisp" + + +class TurnDetection1(JambonzModel): + """ + Turn detection configuration with tunable parameters. + """ + + model_config = ConfigDict( + extra="forbid", + ) + mode: Mode + """ + Turn detection mode. Currently only 'krisp' supports object-form tuning. + """ + threshold: confloat(ge=0.0, le=1.0) | None = None + """ + Krisp end-of-turn confidence threshold (0.0–1.0). Lower values trigger earlier turn transitions. Default: 0.5 + """ + model: str | None = None + """ + Optional Krisp model name override. + """ + + +class BargeIn(JambonzModel): + """ + Controls whether and how the user can interrupt the assistant while it is speaking. + """ + + model_config = ConfigDict( + extra="allow", + ) + enable: bool | None = True + """ + Allow the user to interrupt the assistant while it is speaking. Default: true. + """ + min_speech_duration: confloat(ge=0.0) | None = Field(default=0.5, alias="minSpeechDuration") + """ + Seconds of detected speech required before confirming an interruption. Prevents brief noises from cutting off the assistant. Default: 0.5 + """ + sticky: bool | None = False + """ + If true, once the user interrupts the assistant does not resume speaking. Default: false. + """ + + +class NoiseIsolation(Enum): + """ + Shorthand — enable noise isolation with the specified vendor using default settings. + """ + + krisp = "krisp" + rnnoise = "rnnoise" + + +class Direction(Enum): + """ + Audio direction to apply noise isolation. 'read' filters caller audio, 'write' filters outbound audio. Default: 'read'. + """ + + read = "read" + write = "write" + + +class NoiseIsolation1(JambonzModel): + """ + Detailed noise isolation configuration. + """ + + model_config = ConfigDict( + extra="forbid", + ) + mode: str + """ + Noise isolation vendor/mode (e.g. 'krisp'). + """ + level: confloat(ge=0.0, le=100.0) | None = None + """ + Suppression level 0–100. Default: 100. + """ + direction: Direction | None = None + """ + Audio direction to apply noise isolation. 'read' filters caller audio, 'write' filters outbound audio. Default: 'read'. + """ + model: str | None = None + """ + Optional model name override. + """ + + +class McpServer(JambonzModel): + url: str + """ + The URL of the MCP server. + """ + auth: dict[str, Any] | None = None + """ + Authentication for the MCP server. + """ + roots: list[dict[str, Any]] | None = None + """ + MCP root definitions. + """ + + +class Agent(JambonzModel): + """ + Configures a complete voice AI agent by wiring together STT → LLM → TTS with integrated turn detection. Provides a higher-level abstraction than manually orchestrating the individual components. Optimized for building voice AI agents with proper turn-taking behavior. + """ + + verb: Literal["agent"] = "agent" + id: str | None = None + """ + An optional unique identifier for this verb instance. + """ + stt: recognizer.Recognizer | None = None + """ + Speech-to-text configuration for the agent. + """ + tts: synthesizer.Synthesizer | None = None + """ + Text-to-speech configuration for the agent. + """ + turn_detection: TurnDetection | TurnDetection1 | None = Field( + default="stt", alias="turnDetection", validate_default=True + ) + """ + Turn detection strategy. Controls when the agent decides the user has finished speaking. STT vendors with native turn-taking (deepgramflux, assemblyai, speechmatics) always use their built-in detection regardless of this setting. + """ + barge_in: BargeIn | None = Field(default=None, alias="bargeIn") + """ + Controls whether and how the user can interrupt the assistant while it is speaking. + """ + no_response_timeout: confloat(ge=0.0) | None = Field(default=0, alias="noResponseTimeout") + """ + Seconds to wait after the assistant finishes speaking before prompting the user to respond. 0 disables. Default: 0. + """ + llm: LLM + """ + LLM configuration for the agent. See the 'llm' verb schema for details. + """ + action_hook: actionHook.ActionHook | None = Field(default=None, alias="actionHook") + """ + A webhook invoked when the agent ends. + """ + event_hook: actionHook.ActionHook | None = Field(default=None, alias="eventHook") + """ + A webhook invoked for agent events. Receives event types: 'user_transcript' (user speech recognized), 'llm_response' (assistant reply), 'user_interruption' (barge-in detected), and 'turn_end' (end-of-turn summary with transcript, response, and latency metrics). + """ + tool_hook: actionHook.ActionHook | None = Field(default=None, alias="toolHook") + """ + A webhook invoked when the LLM requests a tool/function call. The payload includes the tool name and arguments; the response provides the tool result. + """ + greeting: bool | None = True + """ + Whether the LLM should generate an initial greeting before the user speaks. Default: true. + """ + early_generation: bool | None = Field(default=False, alias="earlyGeneration") + """ + Enable speculative LLM prompting before end-of-turn is confirmed. When using Krisp turn detection, set this to true to speculatively prompt the LLM before Krisp confirms the turn has ended. If the transcript matches when turn ends, buffered tokens are released immediately — reducing response latency. Note: Deepgram Flux performs early generation automatically via its native EagerEndOfTurn signal regardless of this setting. Default: false. + """ + noise_isolation: NoiseIsolation | NoiseIsolation1 | None = Field( + default=None, alias="noiseIsolation" + ) + """ + Enable server-side noise isolation to reduce background noise on call audio. Defaults to filtering inbound (caller) audio; set direction to 'write' for outbound. Useful for improving STT accuracy in noisy environments. + """ + mcp_servers: list[McpServer] | None = Field(default=None, alias="mcpServers") + """ + External MCP servers that provide tools to the LLM. The agent connects at startup via SSE, discovers available tools, and makes them callable by the LLM. + """ diff --git a/src/jambonz_sdk/_models/_generated/verbs/alert.py b/src/jambonz_sdk/_models/_generated/verbs/alert.py new file mode 100644 index 0000000..e2cc2eb --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/verbs/alert.py @@ -0,0 +1,26 @@ +# generated by datamodel-codegen: +# filename: verbs/alert +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from typing import Literal + +from pydantic import Field +from jambonz_sdk._models.base import JambonzModel + + +class Alert(JambonzModel): + """ + Sends a 180 Ringing provisional response with an Alert-Info header. Used to trigger a specific ring tone or alert behavior on the caller's device before the call is answered. + """ + + verb: Literal["alert"] = "alert" + id: str | None = None + """ + An optional unique identifier for this verb instance. + """ + message: str = Field(..., examples=["info=alert-internal", "http://example.com/ringtone.wav"]) + """ + The value to include in the Alert-Info header. + """ diff --git a/src/jambonz_sdk/_models/_generated/verbs/answer.py b/src/jambonz_sdk/_models/_generated/verbs/answer.py new file mode 100644 index 0000000..314a0ec --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/verbs/answer.py @@ -0,0 +1,20 @@ +# generated by datamodel-codegen: +# filename: verbs/answer +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from typing import Literal +from jambonz_sdk._models.base import JambonzModel + + +class Answer(JambonzModel): + """ + Answers an incoming call (sends a 200 OK to the SIP INVITE). Most verbs implicitly answer the call, so this verb is only needed when you want to explicitly control when the call is answered — for example, to play early media before answering. + """ + + verb: Literal["answer"] = "answer" + id: str | None = None + """ + An optional unique identifier for this verb instance. + """ diff --git a/src/jambonz_sdk/_models/_generated/verbs/conference.py b/src/jambonz_sdk/_models/_generated/verbs/conference.py new file mode 100644 index 0000000..2d43d87 --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/verbs/conference.py @@ -0,0 +1,92 @@ +# generated by datamodel-codegen: +# filename: verbs/conference +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from typing import Any, Literal + +from pydantic import Field + +from ..components import actionHook +from jambonz_sdk._models.base import JambonzModel + + +class Conference(JambonzModel): + """ + Places the caller into a multi-party conference room. Multiple callers in the same named conference can speak to each other. Supports features like muting, recording, waiting rooms, and participant limits. + """ + + verb: Literal["conference"] = "conference" + id: str | None = None + """ + An optional unique identifier for this verb instance. + """ + name: str = Field(..., examples=["team-standup", "customer-call-12345"]) + """ + The name of the conference room. All callers joining the same named conference are connected together. + """ + beep: bool | None = None + """ + If true, play a beep when participants join or leave. + """ + member_tag: str | None = Field(default=None, alias="memberTag") + """ + A tag to identify this participant. Can be used to target specific members for actions like muting or whispering. + """ + speak_only_to: str | None = Field(default=None, alias="speakOnlyTo") + """ + If set, this participant's audio is only heard by the member with the specified memberTag. Creates a private whisper channel. + """ + start_conference_on_enter: bool | None = Field(default=None, alias="startConferenceOnEnter") + """ + If true (default), the conference starts when this participant joins. If false, this participant waits silently until a participant with startConferenceOnEnter=true joins. + """ + end_conference_on_exit: bool | None = Field(default=None, alias="endConferenceOnExit") + """ + If true, the conference ends for all participants when this participant leaves. + """ + end_conference_duration: float | None = Field(default=None, alias="endConferenceDuration") + """ + Maximum duration of the conference in seconds. + """ + max_participants: float | None = Field(default=None, alias="maxParticipants") + """ + Maximum number of participants allowed in the conference. + """ + join_muted: bool | None = Field(default=None, alias="joinMuted") + """ + If true, this participant joins the conference muted. + """ + action_hook: actionHook.ActionHook | None = Field(default=None, alias="actionHook") + """ + A webhook invoked when this participant leaves the conference. + """ + wait_hook: actionHook.ActionHook | None = Field(default=None, alias="waitHook") + """ + A webhook invoked while this participant is waiting for the conference to start. Should return verbs to play (e.g. hold music). + """ + status_events: list[str] | None = Field(default=None, alias="statusEvents") + """ + List of conference events to receive via the statusHook. + """ + status_hook: actionHook.ActionHook | None = Field(default=None, alias="statusHook") + """ + A webhook to receive conference status events (joins, leaves, etc.). + """ + enter_hook: actionHook.ActionHook | None = Field(default=None, alias="enterHook") + """ + A webhook invoked when this participant first enters the conference. + """ + record: dict[str, Any] | None = None + """ + Recording configuration for the conference. + """ + listen: dict[str, Any] | None = None + """ + Audio streaming configuration for the conference. + """ + distribute_dtmf: bool | None = Field(default=None, alias="distributeDtmf") + """ + If true, DTMF events from this participant are distributed to all other participants. + """ diff --git a/src/jambonz_sdk/_models/_generated/verbs/config.py b/src/jambonz_sdk/_models/_generated/verbs/config.py new file mode 100644 index 0000000..326716b --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/verbs/config.py @@ -0,0 +1,188 @@ +# generated by datamodel-codegen: +# filename: verbs/config +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from enum import Enum +from typing import Any, Literal + +from pydantic import Field + +from ..components import actionHook, actionHookDelayAction +from ..components import amd as amd_1 +from ..components import fillerNoise +from ..components import recognizer as recognizer_1 +from ..components import synthesizer as synthesizer_1 +from ..components import vad as vad_1 +from jambonz_sdk._models.base import JambonzModel + + +class InputEnum(Enum): + speech = "speech" + digits = "digits" + + +class NoiseIsolation(JambonzModel): + """ + Noise isolation configuration to reduce background noise on call audio. Defaults to filtering inbound (caller) audio; can also filter outbound audio via the direction option. + """ + + enable: bool | None = None + vendor: str | None = None + level: float | None = None + model: str | None = None + + +class TurnTaking(JambonzModel): + """ + Turn-taking detection configuration for conversational AI applications. + """ + + enable: bool | None = None + vendor: str | None = None + threshold: float | None = None + model: str | None = None + + +class BargeIn(JambonzModel): + """ + Default barge-in configuration. When enabled, callers can interrupt playing prompts with speech or DTMF. + """ + + enable: bool | None = None + sticky: bool | None = None + """ + If true, barge-in settings persist across verbs rather than resetting after each verb. + """ + action_hook: actionHook.ActionHook | None = Field(default=None, alias="actionHook") + input: list[InputEnum] | None = None + min_bargein_word_count: float | None = Field(default=None, alias="minBargeinWordCount") + + +class TtsStream(JambonzModel): + """ + Default TTS streaming configuration for the session. + """ + + enable: bool | None = None + synthesizer: synthesizer_1.Synthesizer | None = None + + +class Config(JambonzModel): + """ + Sets session-level defaults for the call. Configures default TTS, STT, VAD, recording, streaming, and other session-wide settings. These defaults apply to all subsequent verbs unless overridden at the verb level. Typically the first verb in an application. Can be used multiple times during a call to change settings. + """ + + verb: Literal["config"] = "config" + """ + The verb name. + """ + id: str | None = None + """ + An optional unique identifier for this verb instance. + """ + synthesizer: synthesizer_1.Synthesizer | None = None + """ + Default TTS configuration for the session. + """ + recognizer: recognizer_1.Recognizer | None = None + """ + Default STT configuration for the session. + """ + barge_in: BargeIn | None = Field(default=None, alias="bargeIn") + """ + Default barge-in configuration. When enabled, callers can interrupt playing prompts with speech or DTMF. + """ + tts_stream: TtsStream | None = Field(default=None, alias="ttsStream") + """ + Default TTS streaming configuration for the session. + """ + record: dict[str, Any] | None = None + """ + Session-level call recording configuration. + """ + listen: dict[str, Any] | None = None + """ + Session-level audio streaming configuration defaults. Properties match the listen verb but no fields are required here. + """ + stream: dict[str, Any] | None = None + """ + Session-level audio streaming configuration defaults. Alias for 'listen'. + """ + transcribe: dict[str, Any] | None = None + """ + Session-level transcription configuration defaults. + """ + amd: amd_1.AnsweringMachineDetection | None = None + """ + Session-level answering machine detection configuration. + """ + filler_noise: fillerNoise.FillerNoise | None = Field(default=None, alias="fillerNoise") + """ + Default filler noise configuration for the session. + """ + vad: vad_1.VAD | None = None + """ + Default voice activity detection configuration for the session. + """ + notify_events: bool | None = Field(default=None, alias="notifyEvents") + """ + If true, send call events (e.g. DTMF, call status changes) to the application via the status webhook. + """ + notify_stt_latency: bool | None = Field(default=None, alias="notifySttLatency") + """ + If true, include STT latency measurements in webhook payloads. + """ + reset: str | list[str] | None = None + """ + Reset specific session-level settings to their defaults. Pass a setting name or array of setting names to reset. + """ + on_hold_music: str | None = Field(default=None, alias="onHoldMusic") + """ + URL of an audio file to play when the call is placed on hold. + """ + action_hook_delay_action: actionHookDelayAction.ActionHookDelayAction | None = Field( + default=None, alias="actionHookDelayAction" + ) + """ + Default configuration for handling slow webhook responses. + """ + sip_request_within_dialog_hook: actionHook.ActionHook | None = Field( + default=None, alias="sipRequestWithinDialogHook" + ) + """ + A webhook to invoke when a SIP request (e.g. INFO, NOTIFY) is received within the dialog. + """ + boost_audio_signal: float | str | None = Field(default=None, alias="boostAudioSignal") + """ + Boost (or attenuate) the audio signal in dB for the session. + """ + refer_hook: actionHook.ActionHook | None = Field(default=None, alias="referHook") + """ + A webhook to invoke when a SIP REFER request is received. + """ + early_media: bool | None = Field(default=None, alias="earlyMedia") + """ + If true, allow early media (audio before call answer) for the session. + """ + auto_stream_tts: bool | None = Field(default=None, alias="autoStreamTts") + """ + If true, automatically use streaming TTS for all 'say' verbs in the session. + """ + disable_tts_cache: bool | None = Field(default=None, alias="disableTtsCache") + """ + If true, disable TTS caching for the session. + """ + track_tts_playout: bool | None = Field(default=None, alias="trackTtsPlayout") + """ + If true, report the actual text spoken via TTS. Requires a TTS vendor that supports alignment data (e.g. ElevenLabs). On each utterance completion or interruption, a tts_spoken event is sent to the '/streaming-event' endpoint with fields: 'text' (string — the text actually spoken) and 'bargein' (boolean — true if the user interrupted before TTS finished). See the tts-streaming-event callback schema for full details. + """ + noise_isolation: NoiseIsolation | None = Field(default=None, alias="noiseIsolation") + """ + Noise isolation configuration to reduce background noise on call audio. Defaults to filtering inbound (caller) audio; can also filter outbound audio via the direction option. + """ + turn_taking: TurnTaking | None = Field(default=None, alias="turnTaking") + """ + Turn-taking detection configuration for conversational AI applications. + """ diff --git a/src/jambonz_sdk/_models/_generated/verbs/deepgram_s2s.py b/src/jambonz_sdk/_models/_generated/verbs/deepgram_s2s.py new file mode 100644 index 0000000..946a5c5 --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/verbs/deepgram_s2s.py @@ -0,0 +1,51 @@ +# generated by datamodel-codegen: +# filename: verbs/deepgram_s2s +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from typing import Any, Literal + +from pydantic import Field + +from ..components.llm_base import LlmBaseProperties + + +class DeepgramS2S(LlmBaseProperties): + """ + Shortcut for 'llm' with vendor automatically set to 'deepgram'. Connects the caller to a Deepgram model for real-time speech-to-speech voice conversation. + """ + + verb: Literal["deepgram_s2s"] = "deepgram_s2s" + """ + The verb name. + """ + vendor: Literal["deepgram"] = "deepgram" + """ + The LLM vendor (always 'deepgram' for this shortcut). + """ + llm_options: dict[str, Any] = Field( + ..., + alias="llmOptions", + examples=[ + { + "Settings": { + "agent": { + "think": { + "provider": {"type": "open_ai", "model": "gpt-4o"}, + "prompt": "You are a helpful voice assistant.", + }, + "speak": { + "provider": { + "type": "deepgram", + "model": "aura-2-thalia-en", + } + }, + } + } + } + ], + ) + """ + IMPORTANT: Deepgram does NOT use a 'messages' array. The llmOptions must contain a 'Settings' object with 'agent.think' (LLM provider, model, and prompt) and 'agent.speak' (TTS provider and voice model). The system prompt goes in Settings.agent.think.prompt, NOT in messages. + """ diff --git a/src/jambonz_sdk/_models/_generated/verbs/dequeue.py b/src/jambonz_sdk/_models/_generated/verbs/dequeue.py new file mode 100644 index 0000000..2350684 --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/verbs/dequeue.py @@ -0,0 +1,44 @@ +# generated by datamodel-codegen: +# filename: verbs/dequeue +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from typing import Literal + +from pydantic import Field + +from ..components import actionHook +from jambonz_sdk._models.base import JambonzModel + + +class Dequeue(JambonzModel): + """ + Removes a caller from a named queue and bridges them to the current call. Typically used by an agent or operator call flow to connect with the next waiting caller. + """ + + verb: Literal["dequeue"] = "dequeue" + id: str | None = None + """ + An optional unique identifier for this verb instance. + """ + name: str = Field(..., examples=["support", "sales"]) + """ + The name of the queue to dequeue from. + """ + action_hook: actionHook.ActionHook | None = Field(default=None, alias="actionHook") + """ + A webhook invoked when the dequeued call ends. + """ + timeout: float | None = None + """ + Time in seconds to wait for a caller to be available in the queue. + """ + beep: bool | None = None + """ + If true, play a beep when the calls are connected. + """ + call_sid: str | None = Field(default=None, alias="callSid") + """ + Dequeue a specific call by its call SID, rather than the next caller in line. + """ diff --git a/src/jambonz_sdk/_models/_generated/verbs/dial.py b/src/jambonz_sdk/_models/_generated/verbs/dial.py new file mode 100644 index 0000000..ca0a7c4 --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/verbs/dial.py @@ -0,0 +1,132 @@ +# generated by datamodel-codegen: +# filename: verbs/dial +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from typing import Any, Literal + +from pydantic import Field + +from ..components import actionHook +from ..components import amd as amd_1 +from ..components import target as target_1 +from . import dub as dub_1 +from jambonz_sdk._models.base import JambonzModel + + +class Dial(JambonzModel): + """ + Initiates an outbound call to one or more targets and bridges the caller to the first target that answers. Targets can be phone numbers (PSTN), SIP endpoints, registered users, or Microsoft Teams users. Supports simultaneous ringing, call screening, recording, and DTMF capture during the bridged call. + """ + + verb: Literal["dial"] = "dial" + """ + The verb name. + """ + id: str | None = None + """ + An optional unique identifier for this verb instance. + """ + target: list[target_1.Target] = Field(..., min_length=1) + """ + One or more call targets to dial. If multiple targets are specified, they are rung simultaneously and the first to answer is connected. The rest are canceled. + """ + action_hook: actionHook.ActionHook | None = Field(default=None, alias="actionHook") + """ + A webhook invoked when the dialed call ends. Receives call disposition details (duration, who hung up, etc.) and should return the next verbs to execute. + """ + on_hold_hook: actionHook.ActionHook | None = Field(default=None, alias="onHoldHook") + """ + A webhook invoked when the call is placed on hold. Should return verbs to execute (e.g. play hold music) while the caller is holding. + """ + answer_on_bridge: bool | None = Field(default=None, alias="answerOnBridge") + """ + If true, delay answering the inbound call until the outbound leg is answered. This allows the caller to hear ringing until the target picks up, and avoids billing the caller for unanswered outbound attempts. + """ + caller_id: str | None = Field(default=None, alias="callerId", examples=["+15085551212"]) + """ + The caller ID (phone number) to present on the outbound call. Overrides the default caller ID. + """ + caller_name: str | None = Field(default=None, alias="callerName") + """ + The caller display name to present on the outbound call. + """ + confirm_hook: actionHook.ActionHook | None = Field(default=None, alias="confirmHook") + """ + A webhook invoked when a target answers, before the call is bridged. Used for call screening — the webhook can return verbs (e.g. a 'say' prompt and 'gather') to confirm the callee wants to accept the call. + """ + refer_hook: actionHook.ActionHook | None = Field(default=None, alias="referHook") + """ + A webhook invoked when a SIP REFER is received on the bridged call. Allows handling call transfers initiated by the far end. + """ + dial_music: str | None = Field(default=None, alias="dialMusic") + """ + URL of an audio file to play to the caller while the outbound call is ringing. Replaces the default ringback tone. + """ + dtmf_capture: list[str] | dict[str, Any] | None = Field(default=None, alias="dtmfCapture") + """ + Configuration for capturing DTMF digits during the bridged call. Can be a simple array of patterns (applied to both legs) or an object with childCall/parentCall arrays. + """ + dtmf_hook: actionHook.ActionHook | None = Field(default=None, alias="dtmfHook") + """ + A webhook invoked when a captured DTMF pattern is detected during the bridged call. + """ + headers: dict[str, str | float] | None = None + """ + Custom SIP headers to include on the outbound INVITE. + """ + anchor_media: bool | None = Field(default=None, alias="anchorMedia") + """ + If true, keep media anchored through the jambonz media server even if a direct media path is possible. Required for features like recording, listen, and DTMF capture during bridged calls. + """ + exit_media_path: bool | None = Field(default=None, alias="exitMediaPath") + """ + If true, remove jambonz from the media path after the call is bridged. Reduces latency but disables mid-call features like recording and DTMF capture. + """ + boost_audio_signal: float | str | None = Field( + default=None, alias="boostAudioSignal", examples=[6, -3] + ) + """ + Boost (or attenuate) the audio signal in dB. Positive values increase volume, negative values decrease it. + """ + listen: dict[str, Any] | None = None + """ + Nested listen configuration for streaming audio of the bridged call. + """ + stream: dict[str, Any] | None = None + """ + Nested stream configuration for streaming audio of the bridged call. Alias for 'listen'. + """ + transcribe: dict[str, Any] | None = None + """ + Nested transcribe configuration for real-time transcription of the bridged call. + """ + time_limit: float | None = Field(default=None, alias="timeLimit", examples=[3600]) + """ + Maximum duration in seconds for the bridged call. The call is automatically hung up when this limit is reached. + """ + timeout: float | None = Field(default=None, examples=[30, 60]) + """ + Time in seconds to wait for the target to answer before giving up. + """ + proxy: str | None = Field(default=None, examples=["sip:proxy.example.com"]) + """ + A SIP proxy to route the outbound call through. + """ + amd: amd_1.AnsweringMachineDetection | None = None + """ + Answering machine detection configuration. When enabled, jambonz attempts to determine whether the call was answered by a human or a machine. + """ + dub: list[dub_1.Dub] | None = None + """ + Nested dub verbs — audio dubbing configuration for mixing additional audio tracks into the bridged call. + """ + tag: dict[str, Any] | None = None + """ + Arbitrary metadata to attach to this call leg. Included in subsequent webhook invocations and CDRs. + """ + forward_pai: bool | None = Field(default=None, alias="forwardPAI") + """ + If true, forward the P-Asserted-Identity header from the inbound call to the outbound call. + """ diff --git a/src/jambonz_sdk/_models/_generated/verbs/dialogflow.py b/src/jambonz_sdk/_models/_generated/verbs/dialogflow.py new file mode 100644 index 0000000..9d8ab72 --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/verbs/dialogflow.py @@ -0,0 +1,140 @@ +# generated by datamodel-codegen: +# filename: verbs/dialogflow +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from enum import Enum +from typing import Any, Literal + +from pydantic import ConfigDict, Field + +from ..components import actionHook, synthesizer +from jambonz_sdk._models.base import JambonzModel + + +class Model(Enum): + """ + The Dialogflow model type: 'es' for Dialogflow ES, 'cx' for Dialogflow CX, 'ces' for Dialogflow CES. + """ + + es = "es" + cx = "cx" + ces = "ces" + + +class QueryInput(JambonzModel): + """ + Initial query input to send to Dialogflow. + """ + + model_config = ConfigDict( + extra="forbid", + ) + text: str | None = None + """ + Text input. + """ + intent: str | None = None + """ + Intent to trigger. + """ + event: str | None = None + """ + Event to trigger. + """ + dtmf: str | None = None + """ + DTMF input. + """ + + +class Dialogflow(JambonzModel): + """ + Connects the caller to a Google Dialogflow agent for a voice conversation. Supports Dialogflow ES, CX, and CES models. The caller speaks and Dialogflow handles intent detection and response generation. + """ + + verb: Literal["dialogflow"] = "dialogflow" + """ + The verb name. + """ + id: str | None = None + """ + An optional unique identifier for this verb instance. + """ + credentials: dict[str, Any] | str + """ + Google service account credentials as a JSON object or stringified JSON. + """ + project: str + """ + The Google Cloud project ID. + """ + agent: str | None = None + """ + The Dialogflow agent ID. Required for CX agents. + """ + environment: str | None = None + """ + The Dialogflow environment to use. + """ + region: str | None = None + """ + The Google Cloud region for the Dialogflow API endpoint. + """ + model: Model | None = None + """ + The Dialogflow model type: 'es' for Dialogflow ES, 'cx' for Dialogflow CX, 'ces' for Dialogflow CES. + """ + lang: str + """ + The language code for the conversation (e.g. 'en-US'). + """ + action_hook: actionHook.ActionHook | None = Field(default=None, alias="actionHook") + """ + A webhook invoked when the Dialogflow session ends. + """ + event_hook: actionHook.ActionHook | None = Field(default=None, alias="eventHook") + """ + A webhook invoked for Dialogflow events during the conversation. + """ + events: list[str] | None = None + """ + List of event types to receive via the eventHook. + """ + welcome_event: str | None = Field(default=None, alias="welcomeEvent") + """ + A Dialogflow event to trigger at the start of the conversation (e.g. 'welcome'). + """ + welcome_event_params: dict[str, Any] | None = Field(default=None, alias="welcomeEventParams") + """ + Parameters to pass with the welcome event. + """ + no_input_timeout: float | None = Field(default=None, alias="noInputTimeout") + """ + Seconds to wait for caller input before triggering the no-input event. + """ + no_input_event: str | None = Field(default=None, alias="noInputEvent") + """ + Dialogflow event to trigger when no input is received within the timeout. + """ + pass_dtmf_as_text_input: bool | None = Field(default=None, alias="passDtmfAsTextInput") + """ + If true, pass DTMF digits to Dialogflow as text input. + """ + thinking_music: str | None = Field(default=None, alias="thinkingMusic") + """ + URL of an audio file to play while waiting for Dialogflow to respond. + """ + tts: synthesizer.Synthesizer | None = None + """ + TTS configuration for Dialogflow responses. + """ + bargein: bool | None = None + """ + If true, allow the caller to interrupt Dialogflow responses with speech. + """ + query_input: QueryInput | None = Field(default=None, alias="queryInput") + """ + Initial query input to send to Dialogflow. + """ diff --git a/src/jambonz_sdk/_models/_generated/verbs/dtmf.py b/src/jambonz_sdk/_models/_generated/verbs/dtmf.py new file mode 100644 index 0000000..b6e63a4 --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/verbs/dtmf.py @@ -0,0 +1,30 @@ +# generated by datamodel-codegen: +# filename: verbs/dtmf +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from typing import Literal + +from pydantic import Field +from jambonz_sdk._models.base import JambonzModel + + +class DTMF(JambonzModel): + """ + Sends DTMF tones on the call. Used to interact with IVR systems on the far end, or to signal systems that respond to DTMF. + """ + + verb: Literal["dtmf"] = "dtmf" + id: str | None = None + """ + An optional unique identifier for this verb instance. + """ + dtmf: str = Field(..., examples=["1234#", "1w2w3", "5551212"]) + """ + The DTMF digits to send. Valid characters are 0-9, *, #, and A-D. Use 'w' for a 500ms pause between digits. + """ + duration: float | None = Field(default=500, examples=[250, 500]) + """ + Duration in milliseconds for each DTMF tone. + """ diff --git a/src/jambonz_sdk/_models/_generated/verbs/dub.py b/src/jambonz_sdk/_models/_generated/verbs/dub.py new file mode 100644 index 0000000..ddb1cde --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/verbs/dub.py @@ -0,0 +1,59 @@ +# generated by datamodel-codegen: +# filename: verbs/dub +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from enum import Enum +from typing import Any, Literal + +from pydantic import Field +from jambonz_sdk._models.base import JambonzModel + + +class Action(Enum): + """ + The dubbing action to perform. + """ + + add_track = "addTrack" + remove_track = "removeTrack" + silence_track = "silenceTrack" + play_on_track = "playOnTrack" + say_on_track = "sayOnTrack" + + +class Dub(JambonzModel): + """ + Manages audio dubbing tracks on a call. Allows adding, removing, and controlling auxiliary audio tracks that are mixed into the call audio. Used for background music, coaching whispers, or injecting audio from external sources. + """ + + verb: Literal["dub"] = "dub" + id: str | None = None + """ + An optional unique identifier for this verb instance. + """ + action: Action + """ + The dubbing action to perform. + """ + track: str = Field(..., examples=["background-music", "coach-whisper"]) + """ + The name of the audio track. Used to reference the track in subsequent dub actions. + """ + play: str | None = None + """ + URL of an audio file to play on the track. Used with 'playOnTrack' action. + """ + say: str | dict[str, Any] | None = None + """ + Text to synthesize and play on the track. Used with 'sayOnTrack' action. Can be a string or a say configuration object. + """ + loop: bool | None = None + """ + If true, loop the audio on the track continuously. + """ + gain: float | str | None = Field(default=None, examples=[-10, 0, 6]) + """ + Audio gain for the track in dB. Use negative values to reduce volume. + """ diff --git a/src/jambonz_sdk/_models/_generated/verbs/elevenlabs_s2s.py b/src/jambonz_sdk/_models/_generated/verbs/elevenlabs_s2s.py new file mode 100644 index 0000000..8f89ec2 --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/verbs/elevenlabs_s2s.py @@ -0,0 +1,72 @@ +# generated by datamodel-codegen: +# filename: verbs/elevenlabs_s2s +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from typing import Any, Literal + +from pydantic import ConfigDict, Field + +from ..components.llm_base import LlmBaseProperties +from jambonz_sdk._models.base import JambonzModel + + +class Auth(JambonzModel): + """ + Authentication credentials for ElevenLabs. Requires agent_id; api_key is optional (if not provided, an unsigned URL is used). + """ + + agent_id: str + """ + The ElevenLabs Conversational AI agent ID. Required. + """ + api_key: str | None = None + """ + The ElevenLabs API key. Optional; when provided, a signed URL is used for the WebSocket connection. + """ + + +class LlmOptions(JambonzModel): + """ + Options for the ElevenLabs conversation session. + """ + + model_config = ConfigDict( + extra="allow", + ) + conversation_initiation_client_data: dict[str, Any] | None = None + """ + Optional data sent to the agent when the conversation starts. + """ + input_sample_rate: int | None = 16000 + """ + Audio input sample rate in Hz. + """ + output_sample_rate: int | None = 16000 + """ + Audio output sample rate in Hz. + """ + + +class ElevenlabsS2S(LlmBaseProperties): + """ + Shortcut for 'llm' with vendor automatically set to 'elevenlabs'. Connects the caller to an ElevenLabs Conversational AI agent for real-time speech-to-speech voice conversation. Unlike other s2s vendors, ElevenLabs requires a pre-configured agent_id rather than a model and messages. + """ + + verb: Literal["elevenlabs_s2s"] = "elevenlabs_s2s" + """ + The verb name. + """ + vendor: Literal["elevenlabs"] = "elevenlabs" + """ + The LLM vendor (always 'elevenlabs' for this shortcut). + """ + auth: Auth + """ + Authentication credentials for ElevenLabs. Requires agent_id; api_key is optional (if not provided, an unsigned URL is used). + """ + llm_options: LlmOptions | None = Field(default=None, alias="llmOptions") + """ + Options for the ElevenLabs conversation session. + """ diff --git a/src/jambonz_sdk/_models/_generated/verbs/enqueue.py b/src/jambonz_sdk/_models/_generated/verbs/enqueue.py new file mode 100644 index 0000000..b8c34cf --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/verbs/enqueue.py @@ -0,0 +1,40 @@ +# generated by datamodel-codegen: +# filename: verbs/enqueue +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from typing import Literal + +from pydantic import Field + +from ..components import actionHook +from jambonz_sdk._models.base import JambonzModel + + +class Enqueue(JambonzModel): + """ + Places the caller into a named call queue. While in the queue, the caller hears content returned by the waitHook (typically hold music or position announcements). The caller remains in the queue until dequeued by another call or process. + """ + + verb: Literal["enqueue"] = "enqueue" + id: str | None = None + """ + An optional unique identifier for this verb instance. + """ + name: str = Field(..., examples=["support", "sales"]) + """ + The name of the queue to place the caller in. Queues are created implicitly when first referenced. + """ + action_hook: actionHook.ActionHook | None = Field(default=None, alias="actionHook") + """ + A webhook invoked when the caller leaves the queue (either dequeued or hung up). Should return the next verbs to execute. + """ + wait_hook: actionHook.ActionHook | None = Field(default=None, alias="waitHook") + """ + A webhook invoked immediately when the caller enters the queue and periodically while waiting. Should return verbs to play to the caller (e.g. hold music, queue position announcements). + """ + priority: float | None = Field(default=None, examples=[1, 5, 10]) + """ + The priority of this caller in the queue. Lower numbers are higher priority and are dequeued first. + """ diff --git a/src/jambonz_sdk/_models/_generated/verbs/gather.py b/src/jambonz_sdk/_models/_generated/verbs/gather.py new file mode 100644 index 0000000..3628d98 --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/verbs/gather.py @@ -0,0 +1,136 @@ +# generated by datamodel-codegen: +# filename: verbs/gather +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from enum import Enum +from typing import Any, Literal + +from pydantic import Field + +from ..components import actionHook, actionHookDelayAction, fillerNoise +from ..components import recognizer as recognizer_1 +from jambonz_sdk._models.base import JambonzModel +from .say import Say +from .play import Play +from pydantic import model_validator + + +class InputEnum(Enum): + speech = "speech" + digits = "digits" + + +class Gather(JambonzModel): + """ + Collects user input via speech (STT) and/or DTMF digits. Optionally plays a prompt (using nested 'say' or 'play') while listening. When input is received, the result is sent to the actionHook which should return the next set of verbs. This is the primary verb for building interactive voice menus and conversational flows. + """ + + verb: Literal["gather"] = "gather" + """ + The verb name. + """ + id: str | None = None + """ + An optional unique identifier for this verb instance. + """ + action_hook: actionHook.ActionHook | None = Field(default=None, alias="actionHook") + """ + Invoked when the gather completes. The payload includes 'reason' ('speechDetected', 'dtmfDetected', or 'timeout'), 'speech' (object with alternatives[].transcript and alternatives[].confidence when reason is speechDetected), and 'digits' (string when reason is dtmfDetected). In webhook mode this is a URL that receives an HTTP POST. In WebSocket mode this is an event name — use session.on('/hookName', (evt) => {...}) and respond with session.reply(). + """ + input: list[InputEnum] | None = Field( + ["digits"], examples=[["speech", "digits"], ["speech"], ["digits"]] + ) + """ + The types of input to accept. Can include 'speech' (STT), 'digits' (DTMF), or both. + """ + finish_on_key: str | None = Field(default=None, alias="finishOnKey", examples=["#", "*"]) + """ + A DTMF key that signals the end of digit input. The key itself is not included in the collected digits. + """ + num_digits: float | None = Field(default=None, alias="numDigits") + """ + Exact number of DTMF digits to collect. Gather completes automatically when this many digits are received. + """ + min_digits: float | None = Field(default=None, alias="minDigits") + """ + Minimum number of DTMF digits required. + """ + max_digits: float | None = Field(default=None, alias="maxDigits") + """ + Maximum number of DTMF digits to collect. + """ + inter_digit_timeout: float | None = Field(default=None, alias="interDigitTimeout", examples=[5]) + """ + Time in seconds to wait between DTMF digits before considering input complete. + """ + speech_timeout: float | None = Field(default=None, alias="speechTimeout", examples=[2, 3]) + """ + Time in seconds of silence after speech before considering the utterance complete. + """ + timeout: float | None = Field(default=None, examples=[10, 30]) + """ + Overall timeout in seconds. If no input is received within this time, the gather completes with no input and the actionHook is invoked. + """ + partial_result_hook: actionHook.ActionHook | None = Field( + default=None, alias="partialResultHook" + ) + """ + A webhook to invoke with interim (partial) speech recognition results. Useful for providing real-time feedback or early processing. + """ + listen_during_prompt: bool | None = Field(default=True, alias="listenDuringPrompt") + """ + If true, listen for input while the prompt is playing. If false, only start listening after the prompt finishes. + """ + dtmf_bargein: bool | None = Field(default=None, alias="dtmfBargein") + """ + If true, DTMF input interrupts (barges in on) any playing prompt. + """ + bargein: bool | None = None + """ + If true, speech input interrupts (barges in on) any playing prompt. + """ + min_bargein_word_count: float | None = Field( + default=None, alias="minBargeinWordCount", examples=[1, 2] + ) + """ + Minimum number of words that must be recognized before barge-in is triggered. Prevents brief noises from interrupting prompts. + """ + recognizer: recognizer_1.Recognizer | None = None + """ + Override the session-level STT configuration for this gather. + """ + say: Say | None = None + """ + A nested say prompt played to the caller while listening for input. Accepts the same properties as the say verb (text, synthesizer, etc.) but no fields are required. + """ + play: Play | None = None + """ + A nested play prompt played to the caller while listening for input. Accepts the same properties as the play verb (url, etc.) but no fields are required. + """ + filler_noise: fillerNoise.FillerNoise | None = Field(default=None, alias="fillerNoise") + """ + Filler noise configuration while waiting for the actionHook to respond. + """ + action_hook_delay_action: actionHookDelayAction.ActionHookDelayAction | None = Field( + default=None, alias="actionHookDelayAction" + ) + """ + Configuration for interim actions while the actionHook is processing. + """ + + @model_validator(mode="after") + def _check_digit_bounds(self) -> "Gather": + """``numDigits`` is mutually exclusive with ``min/maxDigits``.""" + if self.num_digits is not None and ( + self.min_digits is not None or self.max_digits is not None + ): + raise ValueError("numDigits cannot be combined with minDigits or maxDigits") + if ( + self.min_digits is not None + and self.max_digits is not None + and self.min_digits > self.max_digits + ): + raise ValueError("minDigits cannot exceed maxDigits") + return self diff --git a/src/jambonz_sdk/_models/_generated/verbs/google_s2s.py b/src/jambonz_sdk/_models/_generated/verbs/google_s2s.py new file mode 100644 index 0000000..3252097 --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/verbs/google_s2s.py @@ -0,0 +1,44 @@ +# generated by datamodel-codegen: +# filename: verbs/google_s2s +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from typing import Any, Literal + +from pydantic import Field + +from ..components.llm_base import LlmBaseProperties + + +class GoogleS2S(LlmBaseProperties): + """ + Shortcut for 'llm' with vendor automatically set to 'google'. Connects the caller to a Google model for real-time speech-to-speech voice conversation. + """ + + verb: Literal["google_s2s"] = "google_s2s" + """ + The verb name. + """ + vendor: Literal["google"] = "google" + """ + The LLM vendor (always 'google' for this shortcut). + """ + llm_options: dict[str, Any] = Field( + ..., + alias="llmOptions", + examples=[ + { + "messages": [ + { + "role": "system", + "content": "You are a helpful customer service agent for Acme Corp.", + } + ], + "temperature": 0.7, + } + ], + ) + """ + Configuration passed to the LLM including the system prompt, temperature, tools/functions, and other model parameters. The structure varies by vendor but typically includes 'messages' (conversation history), 'temperature', 'tools' (function definitions), and 'maxTokens'. + """ diff --git a/src/jambonz_sdk/_models/_generated/verbs/hangup.py b/src/jambonz_sdk/_models/_generated/verbs/hangup.py new file mode 100644 index 0000000..92b3189 --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/verbs/hangup.py @@ -0,0 +1,27 @@ +# generated by datamodel-codegen: +# filename: verbs/hangup +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from typing import Literal +from jambonz_sdk._models.base import JambonzModel + + +class Hangup(JambonzModel): + """ + Terminates the call. Optionally includes custom SIP headers on the BYE request. + """ + + verb: Literal["hangup"] = "hangup" + """ + The verb name. + """ + id: str | None = None + """ + An optional unique identifier for this verb instance. + """ + headers: dict[str, str | float] | None = None + """ + Custom SIP headers to include on the BYE request. + """ diff --git a/src/jambonz_sdk/_models/_generated/verbs/leave.py b/src/jambonz_sdk/_models/_generated/verbs/leave.py new file mode 100644 index 0000000..079f1dd --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/verbs/leave.py @@ -0,0 +1,20 @@ +# generated by datamodel-codegen: +# filename: verbs/leave +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from typing import Literal +from jambonz_sdk._models.base import JambonzModel + + +class Leave(JambonzModel): + """ + Removes the caller from a conference or queue that they are currently in. Execution continues with the next verb in the application. + """ + + verb: Literal["leave"] = "leave" + id: str | None = None + """ + An optional unique identifier for this verb instance. + """ diff --git a/src/jambonz_sdk/_models/_generated/verbs/listen.py b/src/jambonz_sdk/_models/_generated/verbs/listen.py new file mode 100644 index 0000000..702b90b --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/verbs/listen.py @@ -0,0 +1,109 @@ +# generated by datamodel-codegen: +# filename: verbs/listen +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from enum import Enum +from typing import Any, Literal + +from pydantic import Field + +from ..components import actionHook, auth, bidirectionalAudio +from . import transcribe as transcribe_1 +from jambonz_sdk._models.base import JambonzModel + + +class MixType(Enum): + """ + How to mix the audio channels when streaming. 'mono' sends a single mixed channel, 'stereo' sends caller and callee as separate left/right channels, 'mixed' sends both as a single mixed stream. + """ + + mono = "mono" + stereo = "stereo" + mixed = "mixed" + + +class Listen(JambonzModel): + """ + Streams real-time call audio to an external websocket endpoint. The remote endpoint receives raw audio and can optionally send audio back (bidirectional). Used for custom speech processing, real-time analysis, AI agent integration, and recording to external systems. + """ + + verb: Literal["listen"] = "listen" + """ + The verb name. + """ + id: str | None = None + """ + An optional unique identifier for this verb instance. + """ + url: str = Field(..., examples=["wss://myapp.example.com/audio-stream"]) + """ + The websocket URL to stream audio to. + """ + action_hook: actionHook.ActionHook | None = Field(default=None, alias="actionHook") + """ + A webhook invoked when the listen session ends. Should return the next verbs to execute. + """ + ws_auth: auth.Auth | None = Field(default=None, alias="wsAuth") + """ + Authentication credentials for the websocket connection. + """ + mix_type: MixType | None = Field(default="mono", alias="mixType") + """ + How to mix the audio channels when streaming. 'mono' sends a single mixed channel, 'stereo' sends caller and callee as separate left/right channels, 'mixed' sends both as a single mixed stream. + """ + metadata: dict[str, Any] | None = None + """ + Arbitrary metadata to send to the websocket endpoint in the initial connection message. + """ + sample_rate: float | None = Field( + default=8000, alias="sampleRate", examples=[8000, 16000, 24000] + ) + """ + The audio sample rate in Hz. + """ + finish_on_key: str | None = Field(default=None, alias="finishOnKey", examples=["#"]) + """ + A DTMF key that ends the listen session when pressed. + """ + max_length: float | None = Field(default=None, alias="maxLength") + """ + Maximum duration in seconds for the listen session. + """ + pass_dtmf: bool | None = Field(default=None, alias="passDtmf") + """ + If true, forward DTMF events to the websocket endpoint. + """ + play_beep: bool | None = Field(default=None, alias="playBeep") + """ + If true, play a beep tone before streaming begins. + """ + disable_bidirectional_audio: bool | None = Field( + default=None, alias="disableBidirectionalAudio" + ) + """ + If true, disable receiving audio from the websocket endpoint. Audio flows only from the call to the websocket, not back. + """ + bidirectional_audio: bidirectionalAudio.BidirectionalAudio | None = Field( + default=None, alias="bidirectionalAudio" + ) + """ + Fine-grained configuration for bidirectional audio. + """ + timeout: float | None = None + """ + Time in seconds to wait for audio activity before ending the listen session. + """ + transcribe: transcribe_1.Transcribe | None = None + """ + Nested transcribe verb — enables simultaneous real-time transcription of the audio being streamed. + """ + early_media: bool | None = Field(default=None, alias="earlyMedia") + """ + If true, begin streaming audio before the call is formally answered. + """ + channel: float | None = None + """ + Specific audio channel to stream. Used when streaming a single channel of a multi-channel call. + """ diff --git a/src/jambonz_sdk/_models/_generated/verbs/llm.py b/src/jambonz_sdk/_models/_generated/verbs/llm.py new file mode 100644 index 0000000..c8ab8d6 --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/verbs/llm.py @@ -0,0 +1,56 @@ +# generated by datamodel-codegen: +# filename: verbs/llm +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from typing import Any, Literal + +from pydantic import Field + +from ..components.llm_base import LlmBaseProperties + + +class LLM(LlmBaseProperties): + """ + Connects the caller to a large language model for a real-time voice conversation. Handles the complete STT → LLM → TTS pipeline, including turn detection, interruption handling, and tool/function calling. The caller speaks naturally and the LLM responds via synthesized speech. This is the primary verb for building AI voice agents on jambonz. + """ + + verb: Literal["llm"] = "llm" + """ + The verb name. + """ + vendor: str = Field( + ..., + examples=[ + "openai", + "anthropic", + "google", + "groq", + "deepseek", + "deepgram", + "ultravox", + "custom", + ], + ) + """ + The LLM vendor to use. + """ + llm_options: dict[str, Any] = Field( + ..., + alias="llmOptions", + examples=[ + { + "messages": [ + { + "role": "system", + "content": "You are a helpful customer service agent for Acme Corp.", + } + ], + "temperature": 0.7, + } + ], + ) + """ + Configuration passed to the LLM including the system prompt, temperature, tools/functions, and other model parameters. The structure varies by vendor but typically includes 'messages' (conversation history), 'temperature', 'tools' (function definitions), and 'maxTokens'. + """ diff --git a/src/jambonz_sdk/_models/_generated/verbs/message.py b/src/jambonz_sdk/_models/_generated/verbs/message.py new file mode 100644 index 0000000..63e092f --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/verbs/message.py @@ -0,0 +1,58 @@ +# generated by datamodel-codegen: +# filename: verbs/message +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from typing import Literal + +from pydantic import Field + +from ..components import actionHook +from jambonz_sdk._models.base import JambonzModel + + +class Message(JambonzModel): + """ + Sends an SMS or MMS message. Can be used during a voice call to send a text message to the caller or another party, or as a standalone action. + """ + + verb: Literal["message"] = "message" + id: str | None = None + """ + An optional unique identifier for this verb instance. + """ + to: str = Field(..., examples=["+15085551212"]) + """ + The destination phone number in E.164 format. + """ + from_: str = Field(..., alias="from", examples=["+15085559876"]) + """ + The sender phone number in E.164 format. Must be a number provisioned on the jambonz platform. + """ + text: str | None = None + """ + The text content of the message. + """ + media: str | list[str] | None = Field( + default=None, examples=["https://example.com/images/receipt.png"] + ) + """ + URL(s) of media to attach to the message (MMS). Can be images, audio, or video. + """ + carrier: str | None = None + """ + The messaging carrier to use. If not specified, the default carrier is used. + """ + account_sid: str | None = None + """ + The account SID to use for sending. Defaults to the current account. + """ + message_sid: str | None = None + """ + An optional message SID for tracking. + """ + action_hook: actionHook.ActionHook | None = Field(default=None, alias="actionHook") + """ + A webhook invoked when the message send completes or fails. + """ diff --git a/src/jambonz_sdk/_models/_generated/verbs/openai_s2s.py b/src/jambonz_sdk/_models/_generated/verbs/openai_s2s.py new file mode 100644 index 0000000..b5c6b95 --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/verbs/openai_s2s.py @@ -0,0 +1,44 @@ +# generated by datamodel-codegen: +# filename: verbs/openai_s2s +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from typing import Any, Literal + +from pydantic import Field + +from ..components.llm_base import LlmBaseProperties + + +class OpenaiS2S(LlmBaseProperties): + """ + Shortcut for 'llm' with vendor automatically set to 'openai'. Connects the caller to an OpenAI model for real-time speech-to-speech voice conversation. + """ + + verb: Literal["openai_s2s"] = "openai_s2s" + """ + The verb name. + """ + vendor: Literal["openai"] = "openai" + """ + The LLM vendor (always 'openai' for this shortcut). + """ + llm_options: dict[str, Any] = Field( + ..., + alias="llmOptions", + examples=[ + { + "messages": [ + { + "role": "system", + "content": "You are a helpful customer service agent for Acme Corp.", + } + ], + "temperature": 0.7, + } + ], + ) + """ + Configuration passed to the LLM including the system prompt, temperature, tools/functions, and other model parameters. The structure varies by vendor but typically includes 'messages' (conversation history), 'temperature', 'tools' (function definitions), and 'maxTokens'. + """ diff --git a/src/jambonz_sdk/_models/_generated/verbs/pause.py b/src/jambonz_sdk/_models/_generated/verbs/pause.py new file mode 100644 index 0000000..bddbaa8 --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/verbs/pause.py @@ -0,0 +1,29 @@ +# generated by datamodel-codegen: +# filename: verbs/pause +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from typing import Literal + +from pydantic import Field +from jambonz_sdk._models.base import JambonzModel + + +class Pause(JambonzModel): + """ + Pauses execution for a specified number of seconds. The caller hears silence during the pause. Useful for adding delays between verbs. + """ + + verb: Literal["pause"] = "pause" + """ + The verb name. + """ + id: str | None = None + """ + An optional unique identifier for this verb instance. + """ + length: float = Field(..., examples=[1, 2, 5]) + """ + The duration of the pause in seconds. + """ diff --git a/src/jambonz_sdk/_models/_generated/verbs/play.py b/src/jambonz_sdk/_models/_generated/verbs/play.py new file mode 100644 index 0000000..33122b9 --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/verbs/play.py @@ -0,0 +1,60 @@ +# generated by datamodel-codegen: +# filename: verbs/play +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from typing import Literal + +from pydantic import Field + +from ..components import actionHook +from jambonz_sdk._models.base import JambonzModel + + +class Play(JambonzModel): + """ + Plays an audio file to the caller. Supports WAV and MP3 formats hosted at a URL. Can play a single file or cycle through a list of files. + """ + + verb: Literal["play"] = "play" + """ + The verb name. + """ + id: str | None = None + """ + An optional unique identifier for this verb instance. + """ + url: str | list[str] = Field( + ..., + examples=[ + "https://example.com/sounds/greeting.wav", + [ + "https://example.com/sounds/part1.wav", + "https://example.com/sounds/part2.wav", + ], + ], + ) + """ + The URL(s) of the audio file(s) to play. Supports WAV and MP3. If an array, files are played in sequence. + """ + loop: float | str | None = Field(default=None, examples=[3, "forever"]) + """ + Number of times to repeat playback. Use 0 or 'forever' to loop indefinitely until interrupted. + """ + early_media: bool | None = Field(default=None, alias="earlyMedia") + """ + If true, play the audio as early media before the call is answered. + """ + seek_offset: float | str | None = Field(default=None, alias="seekOffset") + """ + Start playback at this offset in seconds from the beginning of the file. + """ + timeout_secs: float | str | None = Field(default=None, alias="timeoutSecs") + """ + Maximum time in seconds to play the audio. Playback stops after this duration even if the file has not finished. + """ + action_hook: actionHook.ActionHook | None = Field(default=None, alias="actionHook") + """ + A webhook to invoke when playback completes. + """ diff --git a/src/jambonz_sdk/_models/_generated/verbs/redirect.py b/src/jambonz_sdk/_models/_generated/verbs/redirect.py new file mode 100644 index 0000000..f4215b4 --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/verbs/redirect.py @@ -0,0 +1,32 @@ +# generated by datamodel-codegen: +# filename: verbs/redirect +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from typing import Literal + +from pydantic import Field + +from ..components import actionHook +from jambonz_sdk._models.base import JambonzModel + + +class Redirect(JambonzModel): + """ + Transfers call control to a different webhook URL. The current verb stack is abandoned and the new webhook's response becomes the active application. Useful for modular application design where different URLs handle different phases of a call. + """ + + verb: Literal["redirect"] = "redirect" + id: str | None = None + """ + An optional unique identifier for this verb instance. + """ + action_hook: actionHook.ActionHook = Field(..., alias="actionHook") + """ + The webhook to transfer control to. Must return a new array of verbs. + """ + status_hook: actionHook.ActionHook | None = Field(default=None, alias="statusHook") + """ + A webhook to receive call status events after the redirect. + """ diff --git a/src/jambonz_sdk/_models/_generated/verbs/rest_dial.py b/src/jambonz_sdk/_models/_generated/verbs/rest_dial.py new file mode 100644 index 0000000..1df6599 --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/verbs/rest_dial.py @@ -0,0 +1,87 @@ +# generated by datamodel-codegen: +# filename: verbs/rest:dial +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from typing import Any, Literal + +from pydantic import Field + +from ..components import amd as amd_1 +from ..components import target +from jambonz_sdk._models.base import JambonzModel + + +class RestDial(JambonzModel): + """ + Internal verb used to originate an outbound call via the REST API. Not typically used directly in application verb arrays. + """ + + verb: Literal["rest:dial"] = "rest:dial" + id: str | None = None + account_sid: str | None = None + application_sid: str | None = None + call_hook: str | dict[str, Any] + """ + Webhook URL or object for call control. + """ + call_status_hook: str | dict[str, Any] | None = None + """ + Webhook URL or object for call status notifications. + """ + from_: str = Field(..., alias="from") + """ + The caller ID for the outbound call. + """ + caller_name: str | None = Field(default=None, alias="callerName") + """ + Display name for the caller. + """ + from_host: str | None = Field(default=None, alias="fromHost") + """ + SIP host to use in the From header. + """ + speech_synthesis_vendor: str | None = None + speech_synthesis_voice: str | None = None + speech_synthesis_language: str | None = None + speech_recognizer_vendor: str | None = None + speech_recognizer_language: str | None = None + tag: dict[str, Any] | None = None + """ + Arbitrary metadata to attach to the call. + """ + to: target.Target + """ + The call destination. + """ + headers: dict[str, str | float] | None = None + """ + Custom SIP headers to include on the outbound INVITE. + """ + timeout: float | None = None + """ + Ring timeout in seconds. + """ + amd: amd_1.AnsweringMachineDetection | None = None + """ + Answering machine detection configuration. + """ + dual_streams: bool | None = None + """ + If true, send separate audio streams for each call leg. + """ + sip_request_within_dialog_hook: str | None = Field( + default=None, alias="sipRequestWithinDialogHook" + ) + """ + Webhook for in-dialog SIP requests. + """ + refer_hook: str | dict[str, Any] | None = Field(default=None, alias="referHook") + """ + Webhook for SIP REFER handling. + """ + time_limit: float | None = Field(default=None, alias="timeLimit") + """ + Maximum call duration in seconds. + """ diff --git a/src/jambonz_sdk/_models/_generated/verbs/s2s.py b/src/jambonz_sdk/_models/_generated/verbs/s2s.py new file mode 100644 index 0000000..6b7e160 --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/verbs/s2s.py @@ -0,0 +1,56 @@ +# generated by datamodel-codegen: +# filename: verbs/s2s +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from typing import Any, Literal + +from pydantic import Field + +from ..components.llm_base import LlmBaseProperties + + +class S2S(LlmBaseProperties): + """ + Synonym for 'llm'. Connects the caller to a large language model for a real-time speech-to-speech voice conversation. Requires 'vendor' to be specified explicitly. + """ + + verb: Literal["s2s"] = "s2s" + """ + The verb name. + """ + vendor: str = Field( + ..., + examples=[ + "openai", + "anthropic", + "google", + "groq", + "deepseek", + "deepgram", + "ultravox", + "custom", + ], + ) + """ + The LLM vendor to use. + """ + llm_options: dict[str, Any] = Field( + ..., + alias="llmOptions", + examples=[ + { + "messages": [ + { + "role": "system", + "content": "You are a helpful customer service agent for Acme Corp.", + } + ], + "temperature": 0.7, + } + ], + ) + """ + Configuration passed to the LLM including the system prompt, temperature, tools/functions, and other model parameters. The structure varies by vendor but typically includes 'messages' (conversation history), 'temperature', 'tools' (function definitions), and 'maxTokens'. + """ diff --git a/src/jambonz_sdk/_models/_generated/verbs/say.py b/src/jambonz_sdk/_models/_generated/verbs/say.py new file mode 100644 index 0000000..31358dc --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/verbs/say.py @@ -0,0 +1,68 @@ +# generated by datamodel-codegen: +# filename: verbs/say +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from typing import Literal + +from pydantic import Field + +from ..components import synthesizer as synthesizer_1 +from jambonz_sdk._models.base import JambonzModel + + +class Say(JambonzModel): + """ + Speaks text to the caller using text-to-speech. The text can be plain text or SSML. Optionally streams TTS output incrementally for lower latency. This is one of the most commonly used verbs in jambonz applications. + """ + + verb: Literal["say"] = "say" + """ + The verb name. + """ + id: str | None = None + """ + An optional unique identifier for this verb instance. Can be used to reference it in other contexts. + """ + text: str | list[str] | None = Field( + default=None, + examples=[ + "Hello, welcome to our service.", + "Hello welcome.", + ["Hello!", "Hi there!", "Welcome!"], + ], + ) + """ + The text to speak. Can be plain text or SSML markup. If an array is provided, one entry is selected at random (useful for variety in prompts). + """ + instructions: str | None = Field( + default=None, examples=["Speak in a warm, friendly tone", "Sound excited and energetic"] + ) + """ + Natural language instructions to guide TTS expression and delivery. Supported by vendors that offer instruction-based synthesis (e.g. ElevenLabs, some OpenAI models). + """ + stream: bool | None = None + """ + If true, stream TTS audio to the caller incrementally as it is generated, rather than waiting for the complete audio. Reduces time-to-first-byte for long utterances. Requires a vendor that supports streaming synthesis. + """ + loop: float | str | None = Field(default=None, examples=[2, "forever"]) + """ + Number of times to repeat the speech. Use 0 or 'forever' to loop indefinitely until interrupted. + """ + synthesizer: synthesizer_1.Synthesizer | None = None + """ + Override the session-level TTS configuration for this specific utterance. + """ + early_media: bool | None = Field(default=None, alias="earlyMedia") + """ + If true, play the audio as early media (before the call is answered). Used for playing announcements or prompts to the caller before the call is formally connected. + """ + disable_tts_cache: bool | None = Field(default=None, alias="disableTtsCache") + """ + If true, bypass the TTS cache and always generate fresh audio. Useful when the same text should be re-synthesized (e.g. with different SSML or when the voice has been updated). + """ + close_stream_on_empty: bool | None = Field(default=None, alias="closeStreamOnEmpty") + """ + If true, close the TTS stream when an empty text string is received. Only applies when stream is true. + """ diff --git a/src/jambonz_sdk/_models/_generated/verbs/sip_decline.py b/src/jambonz_sdk/_models/_generated/verbs/sip_decline.py new file mode 100644 index 0000000..ff485ce --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/verbs/sip_decline.py @@ -0,0 +1,34 @@ +# generated by datamodel-codegen: +# filename: verbs/sip-decline +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from typing import Literal + +from pydantic import Field +from jambonz_sdk._models.base import JambonzModel + + +class SipDecline(JambonzModel): + """ + Rejects an incoming call with a SIP error response. Used to decline calls with a specific status code and reason (e.g. 486 Busy Here, 603 Decline). + """ + + verb: Literal["sip:decline"] = "sip:decline" + id: str | None = None + """ + An optional unique identifier for this verb instance. + """ + status: float = Field(..., examples=[486, 603, 404, 480]) + """ + The SIP response status code to send. + """ + reason: str | None = Field(default=None, examples=["Busy Here", "Decline", "Not Found"]) + """ + The SIP reason phrase to include in the response. + """ + headers: dict[str, str | float] | None = None + """ + Custom SIP headers to include in the response. + """ diff --git a/src/jambonz_sdk/_models/_generated/verbs/sip_refer.py b/src/jambonz_sdk/_models/_generated/verbs/sip_refer.py new file mode 100644 index 0000000..98a05d2 --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/verbs/sip_refer.py @@ -0,0 +1,48 @@ +# generated by datamodel-codegen: +# filename: verbs/sip-refer +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from typing import Literal + +from pydantic import Field + +from ..components import actionHook +from jambonz_sdk._models.base import JambonzModel + + +class SipRefer(JambonzModel): + """ + Sends a SIP REFER request to transfer the call to another party. Initiates an attended or unattended (blind) transfer. + """ + + verb: Literal["sip:refer"] = "sip:refer" + id: str | None = None + """ + An optional unique identifier for this verb instance. + """ + refer_to: str = Field(..., alias="referTo", examples=["sip:alice@example.com", "+15085551212"]) + """ + The SIP URI or phone number to transfer the call to. + """ + referred_by: str | None = Field(default=None, alias="referredBy") + """ + The SIP URI to use in the Referred-By header. + """ + referred_by_display_name: str | None = Field(default=None, alias="referredByDisplayName") + """ + The display name to use in the Referred-By header. + """ + headers: dict[str, str | float] | None = None + """ + Custom SIP headers to include in the REFER request. + """ + action_hook: actionHook.ActionHook | None = Field(default=None, alias="actionHook") + """ + A webhook invoked when the REFER completes (or fails). + """ + event_hook: actionHook.ActionHook | None = Field(default=None, alias="eventHook") + """ + A webhook invoked for NOTIFY events during the REFER process, providing transfer progress updates. + """ diff --git a/src/jambonz_sdk/_models/_generated/verbs/sip_request.py b/src/jambonz_sdk/_models/_generated/verbs/sip_request.py new file mode 100644 index 0000000..81ae7c7 --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/verbs/sip_request.py @@ -0,0 +1,40 @@ +# generated by datamodel-codegen: +# filename: verbs/sip-request +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from typing import Literal + +from pydantic import Field + +from ..components import actionHook +from jambonz_sdk._models.base import JambonzModel + + +class SipRequest(JambonzModel): + """ + Sends a SIP request within the current dialog. Used to send INFO, NOTIFY, or other SIP methods to the remote party during an active call. + """ + + verb: Literal["sip:request"] = "sip:request" + id: str | None = None + """ + An optional unique identifier for this verb instance. + """ + method: str = Field(..., examples=["INFO", "NOTIFY", "MESSAGE"]) + """ + The SIP method to send. + """ + body: str | None = None + """ + The body of the SIP request. + """ + headers: dict[str, str | float] | None = None + """ + Custom SIP headers to include in the request. + """ + action_hook: actionHook.ActionHook | None = Field(default=None, alias="actionHook") + """ + A webhook invoked when the response to the SIP request is received. + """ diff --git a/src/jambonz_sdk/_models/_generated/verbs/stream.py b/src/jambonz_sdk/_models/_generated/verbs/stream.py new file mode 100644 index 0000000..40c9ca0 --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/verbs/stream.py @@ -0,0 +1,104 @@ +# generated by datamodel-codegen: +# filename: verbs/stream +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from enum import Enum +from typing import Any, Literal + +from pydantic import Field + +from ..components import actionHook, auth, bidirectionalAudio +from . import transcribe as transcribe_1 +from jambonz_sdk._models.base import JambonzModel + + +class MixType(Enum): + """ + How to mix audio channels. + """ + + mono = "mono" + stereo = "stereo" + mixed = "mixed" + + +class Stream(JambonzModel): + """ + Streams real-time call audio to an external websocket endpoint. Functionally equivalent to 'listen' — this is an alias provided for naming clarity when the intent is audio streaming rather than recording. + """ + + verb: Literal["stream"] = "stream" + id: str | None = None + """ + An optional unique identifier for this verb instance. + """ + url: str + """ + The websocket URL to stream audio to. + """ + action_hook: actionHook.ActionHook | None = Field(default=None, alias="actionHook") + """ + A webhook invoked when the stream ends. + """ + ws_auth: auth.Auth | None = Field(default=None, alias="wsAuth") + """ + Authentication credentials for the websocket connection. + """ + mix_type: MixType | None = Field(default=None, alias="mixType") + """ + How to mix audio channels. + """ + metadata: dict[str, Any] | None = None + """ + Metadata to send with the initial connection. + """ + sample_rate: float | None = Field(default=None, alias="sampleRate", examples=[8000, 16000]) + """ + Audio sample rate in Hz. + """ + finish_on_key: str | None = Field(default=None, alias="finishOnKey") + """ + DTMF key that ends the stream. + """ + max_length: float | None = Field(default=None, alias="maxLength") + """ + Maximum duration in seconds. + """ + pass_dtmf: bool | None = Field(default=None, alias="passDtmf") + """ + Forward DTMF events to the websocket. + """ + play_beep: bool | None = Field(default=None, alias="playBeep") + """ + Play a beep before streaming begins. + """ + disable_bidirectional_audio: bool | None = Field( + default=None, alias="disableBidirectionalAudio" + ) + """ + Disable receiving audio from the websocket. + """ + bidirectional_audio: bidirectionalAudio.BidirectionalAudio | None = Field( + default=None, alias="bidirectionalAudio" + ) + """ + Bidirectional audio configuration. + """ + timeout: float | None = None + """ + Inactivity timeout in seconds. + """ + transcribe: transcribe_1.Transcribe | None = None + """ + Nested transcribe verb — enables simultaneous real-time transcription of the streamed audio. + """ + early_media: bool | None = Field(default=None, alias="earlyMedia") + """ + Stream audio before the call is answered. + """ + channel: float | None = None + """ + Specific audio channel to stream. Used when streaming a single channel of a multi-channel call. + """ diff --git a/src/jambonz_sdk/_models/_generated/verbs/tag.py b/src/jambonz_sdk/_models/_generated/verbs/tag.py new file mode 100644 index 0000000..81a2955 --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/verbs/tag.py @@ -0,0 +1,29 @@ +# generated by datamodel-codegen: +# filename: verbs/tag +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from typing import Any, Literal + +from pydantic import Field +from jambonz_sdk._models.base import JambonzModel + + +class Tag(JambonzModel): + """ + Attaches arbitrary metadata to the current call. Tagged data is included in all subsequent webhook requests and in the call detail record (CDR). Useful for tracking business context, routing decisions, or analytics data through the call lifecycle. + """ + + verb: Literal["tag"] = "tag" + id: str | None = None + """ + An optional unique identifier for this verb instance. + """ + data: dict[str, Any] = Field( + ..., + examples=[{"customerId": "12345", "department": "support", "priority": "high"}], + ) + """ + An object containing the metadata to attach to the call. Keys and values are application-defined. + """ diff --git a/src/jambonz_sdk/_models/_generated/verbs/transcribe.py b/src/jambonz_sdk/_models/_generated/verbs/transcribe.py new file mode 100644 index 0000000..789754a --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/verbs/transcribe.py @@ -0,0 +1,48 @@ +# generated by datamodel-codegen: +# filename: verbs/transcribe +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from typing import Literal + +from pydantic import Field + +from ..components import recognizer as recognizer_1 +from jambonz_sdk._models.base import JambonzModel + + +class Transcribe(JambonzModel): + """ + Enables real-time transcription of the call audio. Transcription results are sent to the transcriptionHook as they are produced. Runs as a background process — subsequent verbs execute immediately while transcription continues. + """ + + verb: Literal["transcribe"] = "transcribe" + id: str | None = None + """ + An optional unique identifier for this verb instance. + """ + enable: bool | None = None + """ + Enable or disable transcription. Used when transcribe is nested inside a config or dial verb to start or stop background transcription. + """ + transcription_hook: str | None = Field(default=None, alias="transcriptionHook") + """ + The webhook URL to receive transcription results. + """ + translation_hook: str | None = Field(default=None, alias="translationHook") + """ + The webhook URL to receive translated transcription results. + """ + recognizer: recognizer_1.Recognizer | None = None + """ + STT configuration for the transcription. + """ + early_media: bool | None = Field(default=None, alias="earlyMedia") + """ + If true, begin transcribing before the call is answered. + """ + channel: float | None = None + """ + Specific audio channel to transcribe. + """ diff --git a/src/jambonz_sdk/_models/_generated/verbs/ultravox_s2s.py b/src/jambonz_sdk/_models/_generated/verbs/ultravox_s2s.py new file mode 100644 index 0000000..fc7f286 --- /dev/null +++ b/src/jambonz_sdk/_models/_generated/verbs/ultravox_s2s.py @@ -0,0 +1,44 @@ +# generated by datamodel-codegen: +# filename: verbs/ultravox_s2s +# timestamp: 2026-04-22T10:54:53+00:00 + +from __future__ import annotations + +from typing import Any, Literal + +from pydantic import Field + +from ..components.llm_base import LlmBaseProperties + + +class UltravoxS2S(LlmBaseProperties): + """ + Shortcut for 'llm' with vendor automatically set to 'ultravox'. Connects the caller to an Ultravox model for real-time speech-to-speech voice conversation. + """ + + verb: Literal["ultravox_s2s"] = "ultravox_s2s" + """ + The verb name. + """ + vendor: Literal["ultravox"] = "ultravox" + """ + The LLM vendor (always 'ultravox' for this shortcut). + """ + llm_options: dict[str, Any] = Field( + ..., + alias="llmOptions", + examples=[ + { + "messages": [ + { + "role": "system", + "content": "You are a helpful customer service agent for Acme Corp.", + } + ], + "temperature": 0.7, + } + ], + ) + """ + Configuration passed to the LLM including the system prompt, temperature, tools/functions, and other model parameters. The structure varies by vendor but typically includes 'messages' (conversation history), 'temperature', 'tools' (function definitions), and 'maxTokens'. + """ diff --git a/src/jambonz_sdk/_models/_patches/__init__.py b/src/jambonz_sdk/_models/_patches/__init__.py new file mode 100644 index 0000000..4bdb0bd --- /dev/null +++ b/src/jambonz_sdk/_models/_patches/__init__.py @@ -0,0 +1,10 @@ +"""Hand-written supplements to the generated models. + +Currently empty — cross-field validators (e.g. ``Gather.numDigits`` vs +``min/maxDigits``) are appended directly to the generated class bodies +by ``scripts/regen_models.py`` via the ``CLASS_VALIDATORS`` table, so +there is no runtime patching to apply here. The package exists so +callers can import it symbolically if future patches need a home. +""" + +__all__: list[str] = [] diff --git a/src/jambonz_sdk/_models/_registry.py b/src/jambonz_sdk/_models/_registry.py new file mode 100644 index 0000000..2cd62aa --- /dev/null +++ b/src/jambonz_sdk/_models/_registry.py @@ -0,0 +1,66 @@ +"""Lazy map from JSON verb name to its generated pydantic model class. + +Scans the generated ``_models._generated.verbs`` package once and indexes +every class that declares a ``verb: Literal[...]`` field. The literal +default becomes the key; the class becomes the value. + +Used by :mod:`jambonz_sdk.verb_builder` to route verb method calls through +their corresponding model for validation and serialization. +""" + +from __future__ import annotations + +import importlib +import logging +import pkgutil +from typing import Any + +from pydantic import BaseModel + +logger = logging.getLogger("jambonz_sdk._models._registry") + +_cache: dict[str, type[BaseModel]] | None = None + + +def _build_registry() -> dict[str, type[BaseModel]]: + try: + from jambonz_sdk._models._generated import verbs as verbs_pkg + except ImportError: + logger.warning("generated verbs package not found — regen with scripts/regen_models.py") + return {} + + registry: dict[str, type[BaseModel]] = {} + for _, modname, _ in pkgutil.iter_modules(verbs_pkg.__path__): + try: + module = importlib.import_module(f"{verbs_pkg.__name__}.{modname}") + except Exception as exc: # noqa: BLE001 — gracefully skip broken modules + logger.warning("failed to import %s: %s", modname, exc) + continue + for name, attr in vars(module).items(): + if not isinstance(attr, type) or not issubclass(attr, BaseModel): + continue + if attr.__module__ != module.__name__: + continue # skip re-exported classes from other modules + verb_field: Any = attr.model_fields.get("verb") + if verb_field is None: + continue + default = verb_field.default + if isinstance(default, str): + registry[default] = attr + return registry + + +def verb_model(json_verb: str) -> type[BaseModel] | None: + """Return the generated model class for a JSON verb name, or ``None``.""" + global _cache + if _cache is None: + _cache = _build_registry() + return _cache.get(json_verb) + + +def all_verb_models() -> dict[str, type[BaseModel]]: + """Return a copy of the full registry, keyed by JSON verb name.""" + global _cache + if _cache is None: + _cache = _build_registry() + return dict(_cache) diff --git a/src/jambonz_sdk/_models/base.py b/src/jambonz_sdk/_models/base.py new file mode 100644 index 0000000..9aff238 --- /dev/null +++ b/src/jambonz_sdk/_models/base.py @@ -0,0 +1,43 @@ +"""Shared base class for all jambonz verb/component pydantic models. + +Every generated model inherits from ``JambonzModel``. The regen script +rewrites ``pydantic.BaseModel`` imports to point at this class so the +generated output picks up the shared configuration automatically. + +Configuration: + +- ``populate_by_name=True`` — constructors accept either the Python field + name (``action_hook``) or the camelCase alias (``actionHook``). This is + what lets users pass raw dicts (camelCase or snake_case) and have them + coerce into typed models. +- ``serialize_by_alias=True`` — ``model_dump()`` emits camelCase, matching + the on-the-wire format jambonz expects. +- ``extra="forbid"`` — unknown fields raise at construction time, so typos + fail fast instead of silently hitting the jambonz server. Per-model + overrides (e.g. ``BargeIn``, ``LlmOptions``) use ``extra="allow"`` when + the schema declares ``additionalProperties: true``; the regen script + reads this from the schema and emits the override per class. + +Per-field camelCase aliases are emitted by the code generator rather than +derived from an ``alias_generator`` — this is more robust for fields where +the mapping isn't a clean snake→camel (e.g. ``naicsCode``). +""" + +from __future__ import annotations + +from pydantic import BaseModel, ConfigDict + + +class JambonzModel(BaseModel): + """Base for all jambonz verb and component models.""" + + model_config = ConfigDict( + populate_by_name=True, + serialize_by_alias=True, + extra="forbid", + # Store enum fields as their underlying str/int value so that + # ``model_dump(mode='json')`` emits the value directly without + # emitting "expected enum" serializer warnings when a caller + # passes the raw value (``method='POST'``) via a dict or kwargs. + use_enum_values=True, + ) diff --git a/src/jambonz_sdk/components/__init__.py b/src/jambonz_sdk/components/__init__.py new file mode 100644 index 0000000..6309374 --- /dev/null +++ b/src/jambonz_sdk/components/__init__.py @@ -0,0 +1,102 @@ +"""Public re-exports of generated jambonz components models. + +Auto-generated by ``scripts/regen_models.py`` — do not edit by hand. + +Typical usage:: + + from jambonz_sdk.components import Gather, Say +""" + +from __future__ import annotations + +from jambonz_sdk._models._generated.components.actionHook import ActionHook +from jambonz_sdk._models._generated.components.actionHookDelayAction import ActionHookDelayAction +from jambonz_sdk._models._generated.components.amd import AnsweringMachineDetection +from jambonz_sdk._models._generated.components.recognizer_assemblyAiOptions import ( + AssemblyaiRecognizerOptions, +) +from jambonz_sdk._models._generated.components.auth import Auth +from jambonz_sdk._models._generated.components.recognizer_awsOptions import AwsRecognizerOptions +from jambonz_sdk._models._generated.components.recognizer_azureOptions import AzureRecognizerOptions +from jambonz_sdk._models._generated.verbs.agent import BargeIn +from jambonz_sdk._models._generated.components.bidirectionalAudio import BidirectionalAudio +from jambonz_sdk._models._generated.components.recognizer_cobaltOptions import ( + CobaltRecognizerOptions, +) +from jambonz_sdk._models._generated.components.recognizer_customOptions import ( + CustomRecognizerOptions, +) +from jambonz_sdk._models._generated.components.recognizer_deepgramOptions import ( + DeepgramRecognizerOptions, +) +from jambonz_sdk._models._generated.components.recognizer_elevenlabsOptions import ( + ElevenlabsRecognizerOptions, +) +from jambonz_sdk._models._generated.components.fillerNoise import FillerNoise +from jambonz_sdk._models._generated.components.recognizer_gladiaOptions import ( + GladiaRecognizerOptions, +) +from jambonz_sdk._models._generated.components.recognizer_googleOptions import ( + GoogleRecognizerOptions, +) +from jambonz_sdk._models._generated.components.recognizer_houndifyOptions import ( + HoundifyRecognizerOptions, +) +from jambonz_sdk._models._generated.components.recognizer_ibmOptions import IbmRecognizerOptions +from jambonz_sdk._models._generated.components.llm_base import LlmBaseProperties +from jambonz_sdk._models._generated.verbs.agent import McpServer +from jambonz_sdk._models._generated.components.recognizer_nuanceOptions import ( + NuanceRecognizerOptions, +) +from jambonz_sdk._models._generated.components.recognizer_nvidiaOptions import ( + NvidiaRecognizerOptions, +) +from jambonz_sdk._models._generated.components.recognizer_openaiOptions import ( + OpenaiRecognizerOptions, +) +from jambonz_sdk._models._generated.components.recognizer import Recognizer +from jambonz_sdk._models._generated.components.recognizer_sonioxOptions import ( + SonioxRecognizerOptions, +) +from jambonz_sdk._models._generated.components.recognizer_speechmaticsOptions import ( + SpeechmaticsRecognizerOptions, +) +from jambonz_sdk._models._generated.components.synthesizer import Synthesizer +from jambonz_sdk._models._generated.components.target import Target +from jambonz_sdk._models._generated.components.vad import VAD +from jambonz_sdk._models._generated.components.recognizer_verbioOptions import ( + VerbioRecognizerOptions, +) + +__all__ = [ + "ActionHook", + "ActionHookDelayAction", + "AnsweringMachineDetection", + "AssemblyaiRecognizerOptions", + "Auth", + "AwsRecognizerOptions", + "AzureRecognizerOptions", + "BargeIn", + "BidirectionalAudio", + "CobaltRecognizerOptions", + "CustomRecognizerOptions", + "DeepgramRecognizerOptions", + "ElevenlabsRecognizerOptions", + "FillerNoise", + "GladiaRecognizerOptions", + "GoogleRecognizerOptions", + "HoundifyRecognizerOptions", + "IbmRecognizerOptions", + "LlmBaseProperties", + "McpServer", + "NuanceRecognizerOptions", + "NvidiaRecognizerOptions", + "OpenaiRecognizerOptions", + "Recognizer", + "SonioxRecognizerOptions", + "SpeechmaticsRecognizerOptions", + "Synthesizer", + "Target", + "VAD", + "VerbioRecognizerOptions", +] diff --git a/src/jambonz_sdk/verb_builder.py b/src/jambonz_sdk/verb_builder.py index aa0ff29..727ff36 100644 --- a/src/jambonz_sdk/verb_builder.py +++ b/src/jambonz_sdk/verb_builder.py @@ -1,11 +1,20 @@ -"""VerbBuilder base class with auto-generated chainable verb methods. +"""VerbBuilder base class with chainable verb methods. -Methods are generated at import time from JSON Schema files + the verb registry. -When the schema changes, the SDK automatically picks up new parameters — -no manual method signatures to maintain. +Each verb method accepts three interchangeable forms: -Each generated method has a real ``inspect.Signature`` with typed parameters -so IDEs (VS Code, PyCharm) show proper autocomplete and type hints. +1. A typed pydantic model: ``session.gather(Gather(input=["speech"], ...))`` +2. A raw dict: ``session.gather({"input": ["speech"], "actionHook": "/x"})`` +3. Keyword arguments: ``session.gather(input=["speech"], actionHook="/x")`` + +All three are validated and normalized through the verb's generated pydantic +model before being appended to the queue. Validation errors (unknown fields, +wrong types, cross-field rule violations) are raised at construction time, +not hours later on the jambonz server. + +Methods are built from the verb registry at import time. Each method also +carries a real ``inspect.Signature`` + ``__annotations__`` so IDEs show +autocomplete hints for the kwargs style. For full typed autocomplete, +users should import and pass the model classes directly. """ from __future__ import annotations @@ -22,6 +31,9 @@ else: from typing_extensions import Self +from pydantic import ValidationError + +from jambonz_sdk._models._registry import verb_model from jambonz_sdk.types.verbs import AnyVerb from jambonz_sdk.verb_registry import VERB_DEFS, VerbDef @@ -193,36 +205,90 @@ def _load_schemas() -> dict[str, Any]: # ── Method factory ────────────────────────────────────────────────── def _make_verb_method(verb_def: VerbDef, spec: dict[str, Any]) -> Any: - """Create a verb method with a real typed signature from the spec. - - Each generated method has: - - ``inspect.Signature`` with keyword-only parameters (default ``None``) - - ``__annotations__`` with resolved Python types (not ``Any``) - - Docstring with parameter types and required markers + """Create a verb method that routes through the generated pydantic model. + + The returned method accepts either a positional ``Model`` / ``dict`` + argument or keyword arguments matching the verb's schema. The payload + is validated via the model (raising ``ValidationError`` on typos, + wrong types, or cross-field rule violations) then dumped with + ``mode='json', by_alias=True, exclude_none=True`` to produce the exact + wire format jambonz expects. + + Each method also carries a real ``inspect.Signature`` derived from the + verb schema, so IDEs show kwargs-style hints. For richer hints, users + should pass the model classes directly (``session.gather(Gather(...))``). """ properties = spec.get("properties", {}) required = set(spec.get("required", [])) json_verb = verb_def.json_verb - inject = verb_def.inject - - def verb_method(self: VerbBuilder, **kwargs: Any) -> Self: - data: dict[str, Any] = {} - if inject: - data.update(inject) - for key, value in kwargs.items(): - if value is None: - continue - if key == "from_": - data["from"] = value + inject = dict(verb_def.inject) # copy; never mutated but defensive + model_cls = verb_model(json_verb) + + def verb_method( + self: VerbBuilder, + arg: Any = None, + /, + **kwargs: Any, + ) -> Self: + if arg is not None and kwargs: + raise TypeError( + f"{verb_def.method_name}() takes either a model/dict or keyword " + "arguments, not both" + ) + + if model_cls is not None and isinstance(arg, model_cls): + data = arg.model_dump(mode="json", by_alias=True, exclude_none=True) + else: + # Build a payload dict from arg (if a dict) or kwargs, then merge + # the registry's injected fields and coerce ``from_`` → ``from``. + if isinstance(arg, dict): + payload: dict[str, Any] = dict(arg) + elif arg is None: + payload = {} + else: + raise TypeError( + f"{verb_def.method_name}() expected a {model_cls.__name__ if model_cls else 'dict'} " + f"or dict, got {type(arg).__name__}" + ) + + for key, value in kwargs.items(): + if value is None: + continue + payload["from" if key == "from_" else key] = value + + # Inject verb-registry defaults (e.g. vendor for vendor-specific + # shortcuts) if not already set by the caller. + for key, value in inject.items(): + payload.setdefault(key, value) + + if model_cls is not None: + try: + model = model_cls.model_validate(payload) + except ValidationError as exc: + raise exc + data = model.model_dump(mode="json", by_alias=True, exclude_none=True) else: - data[key] = value - verb: dict[str, Any] = {"verb": json_verb, **data} - self._verbs.append(verb) # type: ignore[arg-type] + # Fallback when no generated model exists (e.g. fresh checkout + # before scripts/regen_models.py has run). Preserve legacy + # behavior: raw dict assembly with the verb tag. + data = {"verb": json_verb, **payload} + + self._verbs.append(data) # type: ignore[arg-type] return self # ── Build inspect.Signature with typed keyword-only params ────── - params = [inspect.Parameter("self", inspect.Parameter.POSITIONAL_OR_KEYWORD)] - annotations: dict[str, Any] = {} + # The first param is a positional-only model/dict. Subsequent params + # mirror the schema's top-level properties for kwargs-style autocomplete. + params = [ + inspect.Parameter("self", inspect.Parameter.POSITIONAL_ONLY), + inspect.Parameter( + "arg", + inspect.Parameter.POSITIONAL_ONLY, + default=None, + annotation=Union[model_cls, dict, None] if model_cls else Union[dict, None], + ), + ] + annotations: dict[str, Any] = {"arg": Union[model_cls, dict, None] if model_cls else Union[dict, None]} for prop_name, prop_spec in properties.items(): py_name = "from_" if prop_name == "from" else prop_name diff --git a/src/jambonz_sdk/verb_builder.pyi b/src/jambonz_sdk/verb_builder.pyi index 5d0e5cd..f0e01cb 100644 --- a/src/jambonz_sdk/verb_builder.pyi +++ b/src/jambonz_sdk/verb_builder.pyi @@ -1,6 +1,11 @@ """Auto-generated type stubs for VerbBuilder. DO NOT EDIT — regenerate with: python scripts/generate_stubs.py + +Each verb method accepts three interchangeable input forms: + 1. a positional generated model instance + 2. a positional dict payload + 3. keyword arguments matching the verb's JSON Schema """ from typing import Any, Self @@ -14,7 +19,7 @@ class VerbBuilder: def to_list(self) -> list[AnyVerb]: ... def say( - self, + self, arg: Any = ..., /, id: str = ..., text: Any = ..., instructions: str = ..., @@ -29,6 +34,7 @@ class VerbBuilder: """Speak text using TTS. Args: + arg: a typed model instance or a dict payload (alternative to kwargs). id: str text: Any instructions: str @@ -45,7 +51,7 @@ class VerbBuilder: ... def play( - self, + self, arg: Any = ..., /, id: str = ..., url: Any = ..., loop: Any = ..., @@ -60,6 +66,7 @@ class VerbBuilder: Required: url Args: + arg: a typed model instance or a dict payload (alternative to kwargs). id: str url: Any (required) loop: Any @@ -74,7 +81,7 @@ class VerbBuilder: ... def gather( - self, + self, arg: Any = ..., /, id: str = ..., actionHook: Any = ..., input: list[Any] = ..., @@ -100,6 +107,7 @@ class VerbBuilder: """Collect speech (STT) and/or DTMF input. Args: + arg: a typed model instance or a dict payload (alternative to kwargs). id: str actionHook: Any input: list[Any] @@ -127,7 +135,7 @@ class VerbBuilder: ... def openai_s2s( - self, + self, arg: Any = ..., /, **kwargs: Any, ) -> Self: """Connect caller to OpenAI for real-time voice conversation. @@ -135,6 +143,7 @@ class VerbBuilder: Required: llmOptions, vendor Args: + arg: a typed model instance or a dict payload (alternative to kwargs). Returns: self for chaining. @@ -142,7 +151,7 @@ class VerbBuilder: ... def google_s2s( - self, + self, arg: Any = ..., /, **kwargs: Any, ) -> Self: """Connect caller to Google for real-time voice conversation. @@ -150,6 +159,7 @@ class VerbBuilder: Required: llmOptions, vendor Args: + arg: a typed model instance or a dict payload (alternative to kwargs). Returns: self for chaining. @@ -157,7 +167,7 @@ class VerbBuilder: ... def deepgram_s2s( - self, + self, arg: Any = ..., /, **kwargs: Any, ) -> Self: """Connect caller to Deepgram for real-time voice conversation. @@ -165,6 +175,7 @@ class VerbBuilder: Required: llmOptions, vendor Args: + arg: a typed model instance or a dict payload (alternative to kwargs). Returns: self for chaining. @@ -172,7 +183,7 @@ class VerbBuilder: ... def elevenlabs_s2s( - self, + self, arg: Any = ..., /, **kwargs: Any, ) -> Self: """Connect caller to ElevenLabs Conversational AI agent. @@ -180,6 +191,7 @@ class VerbBuilder: Required: llmOptions, vendor Args: + arg: a typed model instance or a dict payload (alternative to kwargs). Returns: self for chaining. @@ -187,7 +199,7 @@ class VerbBuilder: ... def ultravox_s2s( - self, + self, arg: Any = ..., /, **kwargs: Any, ) -> Self: """Connect caller to Ultravox for real-time voice conversation. @@ -195,6 +207,7 @@ class VerbBuilder: Required: llmOptions, vendor Args: + arg: a typed model instance or a dict payload (alternative to kwargs). Returns: self for chaining. @@ -202,7 +215,7 @@ class VerbBuilder: ... def s2s( - self, + self, arg: Any = ..., /, **kwargs: Any, ) -> Self: """Generic S2S verb (use when vendor is determined at runtime). @@ -210,6 +223,7 @@ class VerbBuilder: Required: llmOptions, vendor Args: + arg: a typed model instance or a dict payload (alternative to kwargs). Returns: self for chaining. @@ -217,7 +231,7 @@ class VerbBuilder: ... def llm( - self, + self, arg: Any = ..., /, **kwargs: Any, ) -> Self: """Legacy LLM verb (prefer s2s or vendor-specific shortcuts). @@ -225,6 +239,7 @@ class VerbBuilder: Required: llmOptions, vendor Args: + arg: a typed model instance or a dict payload (alternative to kwargs). Returns: self for chaining. @@ -232,7 +247,7 @@ class VerbBuilder: ... def dialogflow( - self, + self, arg: Any = ..., /, id: str = ..., credentials: Any = ..., project: str = ..., @@ -260,6 +275,7 @@ class VerbBuilder: Required: credentials, lang, project Args: + arg: a typed model instance or a dict payload (alternative to kwargs). id: str credentials: Any (required) project: str (required) @@ -287,7 +303,7 @@ class VerbBuilder: ... def agent( - self, + self, arg: Any = ..., /, id: str = ..., stt: Any = ..., tts: Any = ..., @@ -309,6 +325,7 @@ class VerbBuilder: Required: llm Args: + arg: a typed model instance or a dict payload (alternative to kwargs). id: str stt: Any tts: Any @@ -330,7 +347,7 @@ class VerbBuilder: ... def listen( - self, + self, arg: Any = ..., /, id: str = ..., url: str = ..., actionHook: Any = ..., @@ -355,6 +372,7 @@ class VerbBuilder: Required: url Args: + arg: a typed model instance or a dict payload (alternative to kwargs). id: str url: str (required) actionHook: Any @@ -379,7 +397,7 @@ class VerbBuilder: ... def stream( - self, + self, arg: Any = ..., /, id: str = ..., url: str = ..., actionHook: Any = ..., @@ -404,6 +422,7 @@ class VerbBuilder: Required: url Args: + arg: a typed model instance or a dict payload (alternative to kwargs). id: str url: str (required) actionHook: Any @@ -428,7 +447,7 @@ class VerbBuilder: ... def transcribe( - self, + self, arg: Any = ..., /, id: str = ..., enable: bool = ..., transcriptionHook: str = ..., @@ -441,6 +460,7 @@ class VerbBuilder: """Enable real-time call transcription. Args: + arg: a typed model instance or a dict payload (alternative to kwargs). id: str enable: bool transcriptionHook: str @@ -455,7 +475,7 @@ class VerbBuilder: ... def dial( - self, + self, arg: Any = ..., /, id: str = ..., target: list[Any] = ..., actionHook: Any = ..., @@ -489,6 +509,7 @@ class VerbBuilder: Required: target Args: + arg: a typed model instance or a dict payload (alternative to kwargs). id: str target: list[Any] (required) actionHook: Any @@ -522,7 +543,7 @@ class VerbBuilder: ... def conference( - self, + self, arg: Any = ..., /, id: str = ..., name: str = ..., beep: bool = ..., @@ -548,6 +569,7 @@ class VerbBuilder: Required: name Args: + arg: a typed model instance or a dict payload (alternative to kwargs). id: str name: str (required) beep: bool @@ -573,7 +595,7 @@ class VerbBuilder: ... def enqueue( - self, + self, arg: Any = ..., /, id: str = ..., name: str = ..., actionHook: Any = ..., @@ -586,6 +608,7 @@ class VerbBuilder: Required: name Args: + arg: a typed model instance or a dict payload (alternative to kwargs). id: str name: str (required) actionHook: Any @@ -598,7 +621,7 @@ class VerbBuilder: ... def dequeue( - self, + self, arg: Any = ..., /, id: str = ..., name: str = ..., actionHook: Any = ..., @@ -612,6 +635,7 @@ class VerbBuilder: Required: name Args: + arg: a typed model instance or a dict payload (alternative to kwargs). id: str name: str (required) actionHook: Any @@ -625,7 +649,7 @@ class VerbBuilder: ... def hangup( - self, + self, arg: Any = ..., /, id: str = ..., headers: dict[str, Any] = ..., **kwargs: Any, @@ -633,6 +657,7 @@ class VerbBuilder: """Terminate the call. Args: + arg: a typed model instance or a dict payload (alternative to kwargs). id: str headers: dict[str, Any] @@ -642,7 +667,7 @@ class VerbBuilder: ... def redirect( - self, + self, arg: Any = ..., /, id: str = ..., actionHook: Any = ..., statusHook: Any = ..., @@ -653,6 +678,7 @@ class VerbBuilder: Required: actionHook Args: + arg: a typed model instance or a dict payload (alternative to kwargs). id: str actionHook: Any (required) statusHook: Any @@ -663,7 +689,7 @@ class VerbBuilder: ... def pause( - self, + self, arg: Any = ..., /, id: str = ..., length: int | float = ..., **kwargs: Any, @@ -673,6 +699,7 @@ class VerbBuilder: Required: length Args: + arg: a typed model instance or a dict payload (alternative to kwargs). id: str length: int | float (required) @@ -682,7 +709,7 @@ class VerbBuilder: ... def sip_decline( - self, + self, arg: Any = ..., /, id: str = ..., status: int | float = ..., reason: str = ..., @@ -694,6 +721,7 @@ class VerbBuilder: Required: status Args: + arg: a typed model instance or a dict payload (alternative to kwargs). id: str status: int | float (required) reason: str @@ -705,7 +733,7 @@ class VerbBuilder: ... def sip_request( - self, + self, arg: Any = ..., /, id: str = ..., method: str = ..., body: str = ..., @@ -718,6 +746,7 @@ class VerbBuilder: Required: method Args: + arg: a typed model instance or a dict payload (alternative to kwargs). id: str method: str (required) body: str @@ -730,7 +759,7 @@ class VerbBuilder: ... def sip_refer( - self, + self, arg: Any = ..., /, id: str = ..., referTo: str = ..., referredBy: str = ..., @@ -745,6 +774,7 @@ class VerbBuilder: Required: referTo Args: + arg: a typed model instance or a dict payload (alternative to kwargs). id: str referTo: str (required) referredBy: str @@ -759,7 +789,7 @@ class VerbBuilder: ... def config( - self, + self, arg: Any = ..., /, id: str = ..., synthesizer: Any = ..., recognizer: Any = ..., @@ -791,6 +821,7 @@ class VerbBuilder: """Set session-level defaults. Args: + arg: a typed model instance or a dict payload (alternative to kwargs). id: str synthesizer: Any recognizer: Any @@ -824,7 +855,7 @@ class VerbBuilder: ... def tag( - self, + self, arg: Any = ..., /, id: str = ..., data: dict[str, Any] = ..., **kwargs: Any, @@ -834,6 +865,7 @@ class VerbBuilder: Required: data Args: + arg: a typed model instance or a dict payload (alternative to kwargs). id: str data: dict[str, Any] (required) @@ -843,7 +875,7 @@ class VerbBuilder: ... def dtmf( - self, + self, arg: Any = ..., /, id: str = ..., dtmf: str = ..., duration: int | float = ..., @@ -854,6 +886,7 @@ class VerbBuilder: Required: dtmf Args: + arg: a typed model instance or a dict payload (alternative to kwargs). id: str dtmf: str (required) duration: int | float @@ -864,7 +897,7 @@ class VerbBuilder: ... def dub( - self, + self, arg: Any = ..., /, id: str = ..., action: str = ..., track: str = ..., @@ -879,6 +912,7 @@ class VerbBuilder: Required: action, track Args: + arg: a typed model instance or a dict payload (alternative to kwargs). id: str action: str (required) track: str (required) @@ -893,7 +927,7 @@ class VerbBuilder: ... def message( - self, + self, arg: Any = ..., /, id: str = ..., to: str = ..., from_: str = ..., @@ -910,6 +944,7 @@ class VerbBuilder: Required: from, to Args: + arg: a typed model instance or a dict payload (alternative to kwargs). id: str to: str (required) from_: str (required) @@ -926,7 +961,7 @@ class VerbBuilder: ... def alert( - self, + self, arg: Any = ..., /, id: str = ..., message: str = ..., **kwargs: Any, @@ -936,6 +971,7 @@ class VerbBuilder: Required: message Args: + arg: a typed model instance or a dict payload (alternative to kwargs). id: str message: str (required) @@ -945,13 +981,14 @@ class VerbBuilder: ... def answer( - self, + self, arg: Any = ..., /, id: str = ..., **kwargs: Any, ) -> Self: """Explicitly answer the call. Args: + arg: a typed model instance or a dict payload (alternative to kwargs). id: str Returns: @@ -960,13 +997,14 @@ class VerbBuilder: ... def leave( - self, + self, arg: Any = ..., /, id: str = ..., **kwargs: Any, ) -> Self: """Leave a conference or queue. Args: + arg: a typed model instance or a dict payload (alternative to kwargs). id: str Returns: diff --git a/src/jambonz_sdk/verbs/__init__.py b/src/jambonz_sdk/verbs/__init__.py new file mode 100644 index 0000000..3be450d --- /dev/null +++ b/src/jambonz_sdk/verbs/__init__.py @@ -0,0 +1,82 @@ +"""Public re-exports of generated jambonz verbs models. + +Auto-generated by ``scripts/regen_models.py`` — do not edit by hand. + +Typical usage:: + + from jambonz_sdk.verbs import Gather, Say +""" + +from __future__ import annotations + +from jambonz_sdk._models._generated.verbs.agent import Agent +from jambonz_sdk._models._generated.verbs.alert import Alert +from jambonz_sdk._models._generated.verbs.answer import Answer +from jambonz_sdk._models._generated.verbs.conference import Conference +from jambonz_sdk._models._generated.verbs.config import Config +from jambonz_sdk._models._generated.verbs.dtmf import DTMF +from jambonz_sdk._models._generated.verbs.deepgram_s2s import DeepgramS2S +from jambonz_sdk._models._generated.verbs.dequeue import Dequeue +from jambonz_sdk._models._generated.verbs.dial import Dial +from jambonz_sdk._models._generated.verbs.dialogflow import Dialogflow +from jambonz_sdk._models._generated.verbs.dub import Dub +from jambonz_sdk._models._generated.verbs.elevenlabs_s2s import ElevenlabsS2S +from jambonz_sdk._models._generated.verbs.enqueue import Enqueue +from jambonz_sdk._models._generated.verbs.gather import Gather +from jambonz_sdk._models._generated.verbs.google_s2s import GoogleS2S +from jambonz_sdk._models._generated.verbs.hangup import Hangup +from jambonz_sdk._models._generated.verbs.llm import LLM +from jambonz_sdk._models._generated.verbs.leave import Leave +from jambonz_sdk._models._generated.verbs.listen import Listen +from jambonz_sdk._models._generated.verbs.message import Message +from jambonz_sdk._models._generated.verbs.openai_s2s import OpenaiS2S +from jambonz_sdk._models._generated.verbs.pause import Pause +from jambonz_sdk._models._generated.verbs.play import Play +from jambonz_sdk._models._generated.verbs.redirect import Redirect +from jambonz_sdk._models._generated.verbs.rest_dial import RestDial +from jambonz_sdk._models._generated.verbs.s2s import S2S +from jambonz_sdk._models._generated.verbs.say import Say +from jambonz_sdk._models._generated.verbs.sip_decline import SipDecline +from jambonz_sdk._models._generated.verbs.sip_refer import SipRefer +from jambonz_sdk._models._generated.verbs.sip_request import SipRequest +from jambonz_sdk._models._generated.verbs.stream import Stream +from jambonz_sdk._models._generated.verbs.tag import Tag +from jambonz_sdk._models._generated.verbs.transcribe import Transcribe +from jambonz_sdk._models._generated.verbs.ultravox_s2s import UltravoxS2S + +__all__ = [ + "Agent", + "Alert", + "Answer", + "Conference", + "Config", + "DTMF", + "DeepgramS2S", + "Dequeue", + "Dial", + "Dialogflow", + "Dub", + "ElevenlabsS2S", + "Enqueue", + "Gather", + "GoogleS2S", + "Hangup", + "LLM", + "Leave", + "Listen", + "Message", + "OpenaiS2S", + "Pause", + "Play", + "Redirect", + "RestDial", + "S2S", + "Say", + "SipDecline", + "SipRefer", + "SipRequest", + "Stream", + "Tag", + "Transcribe", + "UltravoxS2S", +] diff --git a/tests/unit/test_verb_builder.py b/tests/unit/test_verb_builder.py index 4f17da1..d12ece5 100644 --- a/tests/unit/test_verb_builder.py +++ b/tests/unit/test_verb_builder.py @@ -1,25 +1,72 @@ """Spec-driven tests for VerbBuilder. -These tests validate that: -1. Every verb in the registry has a corresponding method on VerbBuilder -2. Every method produces JSON output matching the JSON Schema contract -3. Verb synonyms and injected properties work correctly -4. The builder's chaining and reset behavior is correct -5. Property names in output match JSON Schema exactly (camelCase preserved) -6. The 'from' → 'from_' Python mapping works for the message verb - -Tests are driven by JSON Schema — if a new property is added to a verb schema, -these tests verify the SDK can pass it through correctly. +Validates the contract VerbBuilder offers to users: + +- every verb in the registry is reachable as a method and produces a dict + with the correct ``verb`` key +- method input styles (pydantic model, dict, kwargs) are interchangeable +- property names on the wire match the JSON Schema exactly (camelCase) +- Python reserved word mapping (``from_`` → ``from``) works +- chaining and reset behavior match the documented API +- strict validation catches typos / missing required fields at construction + time rather than at the jambonz server + +Since verbs have heterogeneous required-field sets, per-verb minimal valid +payloads are hand-maintained in ``MINIMAL_VALID_KWARGS`` below. They are +chosen to be the smallest input that round-trips through the model. """ import json import pytest +from pydantic import ValidationError -from jambonz_sdk.verb_builder import _SPECS, VerbBuilder +from jambonz_sdk._models._registry import verb_model +from jambonz_sdk.verb_builder import VerbBuilder from jambonz_sdk.verb_registry import VERB_DEFS -# ── Spec-driven: every registered verb must exist as a method ─────── +# ── Per-verb minimal payloads that satisfy required-field validation ─ +# Values here are intentionally the smallest valid input for each verb; +# changes to a verb schema's ``required`` list will surface here. + +MINIMAL_VALID_KWARGS: dict[str, dict] = { + "say": {"text": "hi"}, + "play": {"url": "https://example.com/audio.mp3"}, + "gather": {}, + "openai_s2s": {"llmOptions": {}}, + "google_s2s": {"llmOptions": {}}, + "deepgram_s2s": {"llmOptions": {}}, + "elevenlabs_s2s": {"auth": {"agent_id": "agent-123"}}, + "ultravox_s2s": {"llmOptions": {}}, + "s2s": {"vendor": "openai", "llmOptions": {}}, + "llm": {"vendor": "openai", "llmOptions": {}}, + "dialogflow": {"project": "p", "lang": "en-US", "credentials": "{}"}, + "agent": {"llm": {"vendor": "openai", "llmOptions": {}}}, + "listen": {"url": "wss://example.com/a"}, + "stream": {"url": "wss://example.com/a"}, + "transcribe": {}, + "dial": {"target": [{"type": "phone", "number": "+15085551212"}]}, + "conference": {"name": "room"}, + "enqueue": {"name": "q"}, + "dequeue": {"name": "q"}, + "hangup": {}, + "redirect": {"actionHook": "/next"}, + "pause": {"length": 1}, + "sip_decline": {"status": 486}, + "sip_request": {"method": "INFO"}, + "sip_refer": {"referTo": "sip:alice@example.com"}, + "config": {}, + "tag": {"data": {"foo": "bar"}}, + "dtmf": {"dtmf": "1234"}, + "dub": {"action": "addTrack", "track": "1"}, + "message": {"to": "+1", "from_": "+2", "text": "hi"}, + "alert": {"message": "info=alert-internal"}, + "answer": {}, + "leave": {}, +} + + +# ── Method existence and verb-name mapping ────────────────────────── class TestAllVerbsRegistered: """Every VerbDef in the registry must produce a working method.""" @@ -30,10 +77,7 @@ class TestAllVerbsRegistered: ids=[d.method_name for d in VERB_DEFS], ) def test_method_exists(self, verb_def): - assert hasattr(VerbBuilder, verb_def.method_name), ( - f"VerbBuilder missing method '{verb_def.method_name}' " - f"for spec '{verb_def.spec_name}'" - ) + assert hasattr(VerbBuilder, verb_def.method_name) @pytest.mark.parametrize( "verb_def", @@ -41,62 +85,67 @@ def test_method_exists(self, verb_def): ids=[d.method_name for d in VERB_DEFS], ) def test_method_is_callable(self, verb_def): - method = getattr(VerbBuilder, verb_def.method_name) - assert callable(method) + assert callable(getattr(VerbBuilder, verb_def.method_name)) @pytest.mark.parametrize( "verb_def", VERB_DEFS, ids=[d.method_name for d in VERB_DEFS], ) - def test_method_produces_correct_verb_name(self, verb_def): - """Calling the method must produce a dict with the correct 'verb' key.""" + def test_minimal_payload_produces_correct_verb_name(self, verb_def): + """A minimal valid payload round-trips and carries the verb name.""" + kwargs = MINIMAL_VALID_KWARGS.get(verb_def.method_name) + assert kwargs is not None, ( + f"no minimal payload defined for {verb_def.method_name}; " + "add an entry to MINIMAL_VALID_KWARGS" + ) builder = VerbBuilder() - method = getattr(builder, verb_def.method_name) - method() # Call with no args — all are optional at Python level + getattr(builder, verb_def.method_name)(**kwargs) verbs = builder.to_list() assert len(verbs) == 1 assert verbs[0]["verb"] == verb_def.json_verb -# ── Spec-driven: output properties must match JSON Schema ───────── +# ── Every schema property round-trips through the builder ─────────── class TestVerbOutputMatchesSpec: - """For each verb, passing a property defined in the JSON Schema must - appear in the output JSON with the exact same key name.""" + """Properties declared on a verb's schema must pass through to the output.""" @pytest.mark.parametrize( "verb_def", VERB_DEFS, ids=[d.method_name for d in VERB_DEFS], ) - def test_all_spec_properties_pass_through(self, verb_def): - """Every property in the spec can be passed and appears in output.""" - spec = _SPECS[verb_def.spec_name] - properties = spec.get("properties", {}) - - # Build kwargs with dummy values matching expected types - kwargs = {} - for prop_name, prop_type in properties.items(): - py_name = "from_" if prop_name == "from" else prop_name - kwargs[py_name] = _dummy_value(prop_type) - + def test_required_properties_pass_through(self, verb_def): + """Every required field on the generated model appears in output.""" + model = verb_model(verb_def.json_verb) + if model is None: + pytest.skip(f"no generated model for {verb_def.json_verb}") + + required_aliases: set[str] = set() + for name, info in model.model_fields.items(): + if name == "verb" or not info.is_required(): + continue + required_aliases.add(info.alias or name) + + kwargs = dict(MINIMAL_VALID_KWARGS.get(verb_def.method_name, {})) builder = VerbBuilder() - method = getattr(builder, verb_def.method_name) - method(**kwargs) - verbs = builder.to_list() - output = verbs[0] - - # Verify every spec property appears in output (with exact key name) - for prop_name in properties: - assert prop_name in output, ( - f"Verb '{verb_def.method_name}': spec property '{prop_name}' " - f"missing from output. Got keys: {list(output.keys())}" + getattr(builder, verb_def.method_name)(**kwargs) + output = builder.to_list()[0] + + for alias in required_aliases: + if alias == "from": + continue # covered by TestPythonReservedWordMapping + assert alias in output, ( + f"{verb_def.method_name}: required field '{alias}' " + f"missing from output {list(output.keys())}" ) +# ── Verb synonyms and vendor-shortcut behavior ────────────────────── + class TestVerbSynonyms: - """Synonym verbs must produce the correct json_verb and inject defaults.""" + """Synonym verbs produce the correct json_verb and honor schema defaults.""" def test_stream_produces_stream_verb(self): builder = VerbBuilder() @@ -109,7 +158,6 @@ def test_listen_produces_listen_verb(self): assert builder.to_list()[0]["verb"] == "listen" def test_stream_and_listen_accept_same_properties(self): - """Both synonyms accept the same spec properties.""" b1 = VerbBuilder() b1.stream(url="wss://a.com", sampleRate=16000, mixType="stereo") b2 = VerbBuilder() @@ -136,7 +184,7 @@ def test_deepgram_s2s_injects_vendor(self): def test_elevenlabs_s2s_injects_vendor(self): builder = VerbBuilder() - builder.elevenlabs_s2s(llmOptions={}) + builder.elevenlabs_s2s(auth={"agent_id": "agent-123"}) assert builder.to_list()[0]["vendor"] == "elevenlabs" def test_ultravox_s2s_injects_vendor(self): @@ -145,21 +193,21 @@ def test_ultravox_s2s_injects_vendor(self): assert builder.to_list()[0]["vendor"] == "ultravox" def test_s2s_does_not_inject_vendor(self): - """Generic s2s should NOT inject a vendor — user provides it.""" + """Generic s2s should not default a vendor — user provides it.""" builder = VerbBuilder() builder.s2s(vendor="custom", llmOptions={}) assert builder.to_list()[0]["vendor"] == "custom" - def test_user_can_override_injected_vendor(self): - """Explicit vendor kwarg should override the injected default.""" + def test_vendor_shortcut_rejects_mismatched_vendor(self): + """Vendor-specific shortcut enforces its Literal vendor constraint.""" builder = VerbBuilder() - builder.openai_s2s(vendor="custom-openai", llmOptions={}) - assert builder.to_list()[0]["vendor"] == "custom-openai" + with pytest.raises(ValidationError): + builder.openai_s2s(vendor="anthropic", llmOptions={}) -class TestPythonReservedWordMapping: - """'from' is reserved in Python; we accept 'from_' and serialize as 'from'.""" +# ── 'from' Python reserved word mapping ───────────────────────────── +class TestPythonReservedWordMapping: def test_message_from_mapping(self): builder = VerbBuilder() builder.message(to="+1234", from_="+5678", text="Hello") @@ -169,6 +217,8 @@ def test_message_from_mapping(self): assert verbs[0]["from"] == "+5678" +# ── Chaining and builder lifecycle ───────────────────────────────── + class TestBuilderChaining: def test_chaining_returns_self(self): builder = VerbBuilder() @@ -201,6 +251,8 @@ def test_none_values_are_stripped(self): assert "synthesizer" not in verbs[0] +# ── JSON serialization of the full verb queue ───────────────────── + class TestJsonSerialization: def test_output_is_json_serializable(self): builder = VerbBuilder() @@ -226,9 +278,9 @@ def test_nested_objects_preserved(self): assert verbs[0]["say"]["synthesizer"]["vendor"] == "elevenlabs" -class TestSipVerbNaming: - """SIP verbs use colon in JSON (sip:decline) but underscore in Python.""" +# ── SIP verb naming (colon in JSON, underscore in Python) ────────── +class TestSipVerbNaming: def test_sip_decline_json_verb(self): builder = VerbBuilder() builder.sip_decline(status=486, reason="Busy Here") @@ -245,7 +297,7 @@ def test_sip_refer_json_verb(self): assert builder.to_list()[0]["verb"] == "sip:refer" -# ── Realistic jambonz application flows ───────────────────────────── +# ── Realistic end-to-end flows ───────────────────────────────────── class TestRealisticFlows: def test_ivr_menu_flow(self): @@ -309,22 +361,130 @@ def test_listen_with_bidirectional_audio(self): assert v["metadata"]["purpose"] == "recording" -# ── Helpers ───────────────────────────────────────────────────────── - -def _dummy_value(spec_type): - """Generate a dummy value matching a JSON Schema type descriptor.""" - if isinstance(spec_type, str): - if spec_type.startswith("#"): - return {} - if "|" in spec_type: - first = spec_type.split("|")[0].strip() - return _dummy_value(first) - return {"string": "test", "number": 1, "boolean": True, "object": {}, "array": []}.get(spec_type, "test") - if isinstance(spec_type, list): - return [{}] - if isinstance(spec_type, dict): - enum = spec_type.get("enum") - if enum: - return enum[0] - return _dummy_value(spec_type.get("type", "string")) - return "test" +# ── New: the three input styles (model, dict, kwargs) are equivalent + +class TestInputStylesEquivalent: + """Passing a model, dict, or kwargs must produce the same wire output.""" + + def test_gather_three_styles_match(self): + Gather = verb_model("gather") + assert Gather is not None + + payload = { + "input": ["speech", "digits"], + "actionHook": "/menu", + "timeout": 15, + "numDigits": 1, + "say": {"text": "Press 1 for sales"}, + "recognizer": {"vendor": "deepgram", "language": "en-US"}, + } + + # 1) model + b1 = VerbBuilder() + b1.gather(Gather.model_validate(payload)) + + # 2) dict + b2 = VerbBuilder() + b2.gather(dict(payload)) + + # 3) kwargs + b3 = VerbBuilder() + b3.gather(**payload) + + assert b1.to_list() == b2.to_list() == b3.to_list() + + def test_say_three_styles_match(self): + Say = verb_model("say") + payload = {"text": "hello", "loop": 2} + b1 = VerbBuilder() + b1.say(Say.model_validate(payload)) + b2 = VerbBuilder() + b2.say(dict(payload)) + b3 = VerbBuilder() + b3.say(**payload) + assert b1.to_list() == b2.to_list() == b3.to_list() + + +# ── New: validation catches typos and wrong types at construction ── + +class TestStrictValidation: + def test_typo_in_nested_field_rejected(self): + """Unknown fields on inner types raise at construction.""" + builder = VerbBuilder() + with pytest.raises(ValidationError): + builder.gather(say={"txet": "typo — extra field"}) + + def test_missing_required_field_rejected(self): + """Play requires url — constructing without it fails fast.""" + builder = VerbBuilder() + with pytest.raises(ValidationError): + builder.play() + + def test_wrong_type_rejected(self): + """Passing a wrong-typed value for a field fails pydantic validation.""" + builder = VerbBuilder() + with pytest.raises(ValidationError): + builder.say(text=12345, synthesizer="not-a-dict") + + def test_model_and_kwargs_both_raises(self): + """Passing both a model/dict and kwargs is a user error.""" + Say = verb_model("say") + builder = VerbBuilder() + with pytest.raises(TypeError): + builder.say(Say(text="hi"), text="also hi") + + +# ── Public re-export modules provide typed model access ──────────── + +class TestPublicImports: + def test_verbs_package_exports_typed_models(self): + from jambonz_sdk.verbs import Agent, Gather, OpenaiS2S, Say, SipDecline + + assert Gather.__name__ == "Gather" + assert Say.__name__ == "Say" + assert Agent.__name__ == "Agent" + assert OpenaiS2S.__name__ == "OpenaiS2S" + assert SipDecline.__name__ == "SipDecline" + + def test_components_package_exports_typed_models(self): + from jambonz_sdk.components import ( + ActionHook, + Recognizer, + Synthesizer, + Target, + ) + + assert Recognizer.__name__ == "Recognizer" + assert Synthesizer.__name__ == "Synthesizer" + assert Target.__name__ == "Target" + assert ActionHook is not None + + def test_end_to_end_typed_construction(self): + """The full typed API from the handover's 'goal' example works.""" + from jambonz_sdk._models._generated.components.recognizer_deepgramOptions import ( + DeepgramRecognizerOptions, + ) + from jambonz_sdk.components import Recognizer + from jambonz_sdk.verbs import Gather, Say + + builder = VerbBuilder() + builder.gather(Gather( + input=["speech", "digits"], + action_hook="/menu", + timeout=15, + num_digits=1, + say=Say(text="Press 1 for sales, 2 for support"), + recognizer=Recognizer( + vendor="deepgram", + language="en-US", + hints=["jambonz", "drachtio"], + deepgram_options=DeepgramRecognizerOptions( + model="nova-3", smart_formatting=True + ), + ), + )) + [output] = builder.to_list() + assert output["verb"] == "gather" + assert output["actionHook"] == "/menu" + assert output["numDigits"] == 1 + assert output["recognizer"]["deepgramOptions"]["smartFormatting"] is True